# This Document covers data import # You must have file1 - file7 in your working directory # They can be found at the link to the right of this document. # Before importing to R, you should get and idea of the data # format. For small data sets, any text editor should work, # for large data sets, use the unix program "less" # ----------------------------------------------------------------- # # -------------------------- read.table --------------------------- # # OPEN file1 -- Note: there is a header and entries are separated by , # OPEN file2 -- Note: it is just like file 1 but there is no header # The approapriate commands are x1 = read.table("file1", header=T, sep=",") x2 = read.table("file2", header=F, sep=",") # or x2 = read.table("file2", sep=",") # since "header=F" is the default, as can be seen by typing ?read.table x1; x2; class(x1); class(x2) # note that x1 has named columns corresponding to the header, # whereas x2 has default column names V1 and V2. # also note that read.table returns a data frame # ----------------------------------------------------------------- # # OPEN file3 -- Note: there is a header entries are separated by a space # OPEN file4 -- Note: it is just like file 1 but one entry has *** # The approapriate commands are x3 = read.table("file3", header=T) x4 = read.table("file4", header=T, sep=",", na.strings="***") x4 # note that the *** has become NA # Every system has its conventions for dealing with missing values; for R # the convention is NA, and there are many statistical functions that will # work with data containing NAs. For example, summary(x4) mean(x4[,2], na.rm=T) # ----------------------------------------------------------------- # # ----------------------------- scan ------------------------------ # # Scan is good for simple data structures, such as a long vector or a matrix. # It can only handle 1 type (plus missing values), and the default is "double". # For large data sets, scan is much faster than read.table # Open file5-file7, type "?scan", and try the following: x5 = scan("file5") x6 = scan("file6", what="character") x7 = scan("file7", sep = ",") x5; x6; x7 summary(x7)