library(XML) #Read an xml file into an R list which is formatted like a tree xmlTreeParse("~/Stat133/lectures/XML/matrix.xml") x = xmlTreeParse("~/Stat133/lectures/XML/matrix.xml") # Note that what prints on the screen is an xml document, # but what is stored internally is a list of class XMLDocument class(x) mode(x) names(x) # Explore a bit to find that the main document is x$doc x$dtd names(x$doc) x$doc["file"] x$doc["version"] x$doc["children"] # This function is very handy for pulling out # the root of the xml document xtree = xmlRoot(x) class(xtree) # There are several functions provided by the XML package # These operate on XMLNode objects xmlName(xtree) xmlAttrs(xtree) xmlChildren(xtree) class(xmlChildren(xtree)) xmlSize(xmlChildren(xtree)) # There is a special apply function in the XML package # that is for XMLNodes xmlSApply(xtree, xmlSize) xtree[[1]] class(xtree[[1]]) xmlSize(xtree[[1]]) xmlName(xtree[[1]]) xmlChildren(xtree[[1]]) xmlValue(xtree[[1]][[1]]) xmlApply(xtree, function(node) if(xmlName(node) == "row") xmlAttrs(node)[["id"]]) rn = xmlApply(xtree, function(node) if(xmlName(node) == "row") xmlAttrs(node)[["id"]]) rn = unlist(rn) cn = xmlSApply(xtree[["colNames"]], xmlValue) dm = xmlAttrs(xtree) numrow = as.integer(dm[["nrow"]]) numcol = as.integer(dm[["ncol"]]) xtree[[2]] xmlSapply(xtree[[2]], xmlValue) # Note we have to use sapply here because xtree[-1] is not an XMLNode matData = sapply(xtree[-1], function(node) { if (xmlName(node) =="row") xmlSApply(node, xmlValue)}) matData as.numeric(matData)