lec_0

Advanced Plotting Routines in R


# Load the Titanic data into R with the data command
# These data are R objects found in the main installation of R 

> data(Titanic)

# Mosaic plots are visual display of categorical data
# The entire set of observations are first represented by a box
# This box is then divided into parrallel boxes according to
# the counts of males and females in the data (sex is the categorical
# variable used to divide up the box.

> mosaicplot(margin.table(Titanic, 2))

# A second categorical variable, survived (Y/N) then cuts each
# of theese boxes in the y-direction.  If the proportion of survivors 
#is independent of sex then the male/female boxes will be cut at 
# the same place.  we see that thhis is far from the case and that
# there are many more females that survived (proportionally) than males.
# Create regular expressions for matching the following three examples.  

> mosaicplot(margin.table(Titanic,c(2, 4)), shade = TRUE)

# A third categorical variable, class (first, second, third, crew),
# can be added. It will cut along the x-axis. Note that the female
# crew is red indicating far fewer survived then expected based on
# conditional independence assumptions.

# Notice the difference between the next two mosaic plots.
# Which is easier to make the comparison of who survived?
> mosaicplot(margin.table(Titanic,c(2, 4, 1)), shade = TRUE)
> mosaicplot(margin.table(Titanic,c(2, 1, 4)), shade = TRUE)


# Next load the iris data and plot petal length and width

> data(iris)

> plot(iris$Petal.Length, iris$Petal.Width)

# It may not be obviousfrom the plot but a number of observations 
# have been plotted on top of each other.
# The sunflower plot indicates this multiplicity via the petals
# in the sunflower.

> sunflowerplot(iris$Petal.Length, iris$Petal.Width)

# an alternative (preferred) approach is to jitter the data a bit by 
# adding a small random amount to each of the values so that they do not
# over plot each other.

> plot(jitter(iris[[3]], amount = 0.1),jitter(iris[[4]], amount = 0.1))

# The stars plot gives one star per row in the data frame
# Each star has a "point"  corresponding to a praticular variable
# in the row. The length of it is determined by the value of the
# variable for the particular record.
# similar shaped stars indicate subgroups or clusters.

> stars(iris[1:4])

# The lattice plots (also known as trellis plots) provide a family
# of conditional plots.

> library(lattice)

# The plotting formula in the call of the xyplot function: 
#  Sepal.Length + Sepal.Width ~ Petal.Length + Petal.Width| Species
# indicates that both sepal variables are to be plotted on the
# y-axis (not added as in traditional vector addition) and 
# both petal variables are to be plotted on the x-axis.
# Here scatterplots are made conditional on species.
# This means there will be three plots, one for eacdh species.  
# The layout parameter indicates that the 3 scatter plots are to be 
# placed in a 2 by 2 grid, one of which will be empty.
# The scales parameter set to "free"  indicates that the plots do 
# not need to be on the same scale. 

> xyplot(Sepal.Length + Sepal.Width ~ Petal.Length + Petal.Width| Species,
data = iris, layout = c(2,2)) 

> xyplot(Sepal.Length + Sepal.Width ~ Petal.Length + Petal.Width| Species,
data = iris,  scales="free", layout = c(2,2)) 

> xyplot(Sepal.Length + Sepal.Width ~ Petal.Length + Petal.Width| Species,
data = iris, allowed.multiple = TRUE, scales="free", layout = c(2,2)) 

# An alternative to conditioning on Species is to group by species.
# The xyplot below groups by species, so each record belonging to a
# specific species is colored one color.  
# In addition the type paramter allows us to smooth the groups of 
# observations 

> xyplot(iris$Petal.Length ~ iris$Petal.Width, groups = iris$Species)

> xyplot(iris$Petal.Length ~ iris$Petal.Width, groups = iris$Species, 
type = c("p", "smooth", span = 0.75))

# Another plot is the scatter plot matrix, or splom. 
# The plotting formula 
# ~iris[,1:4]
# indicates that all pairs of the four variables (sepal and petal width
# and length are to be plotted.
# The groups parameter indicates that the records corresponding to
# each group will be plotted with the same color.

> splom(~iris[-5], groups = Species, data = iris)

# Alternatively, we can make separate scatter plot matrices, one for each species,
#  as follows.

> splom(~iris[-5]| Species, data = iris)

# We can accomplish a similar plot with the pairs function,

> pairs( iris[-5] )

# To add colors we use the Species variable to index a color vector

>cols = c("red", "green", "blue")
>pairs( iris[-5], col = cols[iris$Species])

# Yet another technique for examining multiple variable conditionally
# is the parallel plot. Here each variable is represented in the same
# dimension with parallel lines. The records consist of joined colored
# line segments joining the 4 values of the variables being plotted.

> parallel(~iris[,1:4]|Species, data = iris, layout = c(3,1))


# The lattice plotting library provides pre-defined panel functions for 
# use in constructing new panel functions. Examples include
# panel.abline, panel.curve, panel.loess, panel.grid.
# Below is an example of how one mught use these functions

> xyplot(Petal.Length ~ Petal.Width|Species, panel = function(x,y){ 
+ panel.grid(h = -1, v = 2)
+ panel.xyplot(x, y)
+ panel.loess(x, y, span = 0.75)}
+ )