fileName = "~/Projects/ComputingCurriculum/Workshop2/IO/Longitudinal/longitudinal"
con = file(fileName, "r")

# Read the header
general = read.table(con, sep = ",", nrows = 1)
general

values = read.table(con, na.strings = ".", nrows = 2 )
t(values)

close(con)
# So we can loop over the records and extract this information one
# observational unit at a time.
# Then we can stack the results if we want, but risk issues of memory
# growth by concatenation.


V1 = numeric()
V2 = numeric()
id = integer()
recordNum = 1
con = file(fileName, "r")

while(TRUE) {
  readLines(con, n = 1)
  vals = scan(con, nlines = 1, what = "numeric")
  V1 <- c(V1, vals)
  V2 <- c(V2, scan(con, nlines = 1, what = "numeric"))
  id <- c(id, rep(recordNum, length(vals)))
  recordNum <- recordNum + 1
}

close(con)

################################################################################


# An alternative approach is to read the entire file contents
# into memory, and the extract the personal details
# and then the values by dealing with the lines for all people
# as entire blocks, i.e. not person by person.
# This almost works!

# No connections needs since atomic act of reading all the content

lines = readLines(fileName)

  # Discard the last line
lines = lines[lines != ""]

numPeople = length(lines)/3

# We know each record has 3 lines: personal details and 2 value lines
# So let's get all the personal details first
# These are in lines 1, 4, 7, 10, ...

i = seq(1, length = numPeople, by = 3)

tmp = textConnection(lines[i])
personal.details = read.table(tmp, sep = ",", strip.white = TRUE)
close(con)


# Now for the values
# which are in the other lines, the complement of those indexed by i

tmp = textConnection(lines[ -i])
values = read.table(tmp, na.strings = ".", fill = TRUE)
close(tmp)


j = seq(1, by = 2, length = numPeople)
vals = data.frame(V1 = unlist(values[j, ]),
                  V2 = unlist(values[j + 1, ]),
                  id = factor(rep(1:numPeople, ncol(values)),
                                 labels = as.character(personal.details[,3])))

vals = vals[order(vals$id), ]

# gl - generate factor levels
#           gl(numPeople, 5, labels = as.character(personal.details[,3])))
# Same as
#  rep(1:numPeople, rep(ncol(values), numPeople))


# Problem is that we have too many
# observations for the second record
# The last set of missing values is to many.
# So we can try to fix this or use a different approach.
#

# Each variable separate.
a = as.data.frame(
      lapply(1:2,
         function(which) {
            tmp = textConnection(lines[i + which])
            on.exit(close(tmp))
            nvals = list(V1 = scan(tmp, na.strings = "."))
         }))

names(a) = c("V1", "V2")

# But now we need to know how to match observations
# to people. So we need to know the number of observations
# per person.

numObs = sapply(lines[i + 1],
                 function(x)
                   length(scan(textConnection(x), na.strings = ".")))
a$id = factor(rep(1:numPeople, numObs),
               labels = as.character(personal.details[,3])))


# Alternatively, we could use count.fields, but that doesn't
# have a na.strings parameter.

if(FALSE) {
 tmp = textConnection(lines[i + 1])
   #XXX Fails!
 num = count.fields(con, na.strings = ".")
 close(tmp)
}


###

# Read all the lines, and then just work on the values
# by themselves.  
# Process all the value lines separately
# and keep them separate so that we know how many observations
# we have for each individual.

tmp =
  lapply(lines[-i], function(x) {
                    tmp = textConnection(x)
                    on.exit(tmp)
                    scan(tmp, na.strings = ".")
                  })

# 1, 3, 5, ...
j = seq(1, by = 2, length = numPeople)

numObsPerPerson = sapply(tmp[j], length)

vals = data.frame(V1 = unlist(tmp[j]),
                  V2 = unlist(tmp[j + 1]),
                  id = factor(rep(1:numPeople, numObsPerPerson),
                              labels = as.character(personal.details[,3])))


#############################################