Advanced Plotting Routines in R
# Load the Titanic data into R with the data command
# These data are R objects found in the main installation of R
> data(Titanic)
# Mosaic plots are visual display of categorical data
# The entire set of observations are first represented by a box
# This box is then divided into parrallel boxes according to
# the counts of males and females in the data (sex is the categorical
# variable used to divide up the box.
> mosaicplot(margin.table(Titanic, 2))
# A second categorical variable, survived (Y/N) then cuts each
# of theese boxes in the y-direction. If the proportion of survivors
#is independent of sex then the male/female boxes will be cut at
# the same place. we see that thhis is far from the case and that
# there are many more females that survived (proportionally) than males.
# Create regular expressions for matching the following three examples.
> mosaicplot(margin.table(Titanic,c(2, 4)), shade = TRUE)
# A third categorical variable, class (first, second, third, crew),
# can be added. It will cut along the x-axis. Note that the female
# crew is red indicating far fewer survived then expected based on
# conditional independence assumptions.
# Notice the difference between the next two mosaic plots.
# Which is easier to make the comparison of who survived?
> mosaicplot(margin.table(Titanic,c(2, 4, 1)), shade = TRUE)
> mosaicplot(margin.table(Titanic,c(2, 1, 4)), shade = TRUE)
# Next load the iris data and plot petal length and width
> data(iris)
> plot(iris$Petal.Length, iris$Petal.Width)
# It may not be obviousfrom the plot but a number of observations
# have been plotted on top of each other.
# The sunflower plot indicates this multiplicity via the petals
# in the sunflower.
> sunflowerplot(iris$Petal.Length, iris$Petal.Width)
# an alternative (preferred) approach is to jitter the data a bit by
# adding a small random amount to each of the values so that they do not
# over plot each other.
> plot(jitter(iris[[3]], amount = 0.1),jitter(iris[[4]], amount = 0.1))
# The stars plot gives one star per row in the data frame
# Each star has a "point" corresponding to a praticular variable
# in the row. The length of it is determined by the value of the
# variable for the particular record.
# similar shaped stars indicate subgroups or clusters.
> stars(iris[1:4])
# The lattice plots (also known as trellis plots) provide a family
# of conditional plots.
> library(lattice)
# The plotting formula in the call of the xyplot function:
# Sepal.Length + Sepal.Width ~ Petal.Length + Petal.Width| Species
# indicates that both sepal variables are to be plotted on the
# y-axis (not added as in traditional vector addition) and
# both petal variables are to be plotted on the x-axis.
# Here scatterplots are made conditional on species.
# This means there will be three plots, one for eacdh species.
# The layout parameter indicates that the 3 scatter plots are to be
# placed in a 2 by 2 grid, one of which will be empty.
# The scales parameter set to "free" indicates that the plots do
# not need to be on the same scale.
> xyplot(Sepal.Length + Sepal.Width ~ Petal.Length + Petal.Width| Species,
data = iris, layout = c(2,2))
> xyplot(Sepal.Length + Sepal.Width ~ Petal.Length + Petal.Width| Species,
data = iris, scales="free", layout = c(2,2))
> xyplot(Sepal.Length + Sepal.Width ~ Petal.Length + Petal.Width| Species,
data = iris, allowed.multiple = TRUE, scales="free", layout = c(2,2))
# An alternative to conditioning on Species is to group by species.
# The xyplot below groups by species, so each record belonging to a
# specific species is colored one color.
# In addition the type paramter allows us to smooth the groups of
# observations
> xyplot(iris$Petal.Length ~ iris$Petal.Width, groups = iris$Species)
> xyplot(iris$Petal.Length ~ iris$Petal.Width, groups = iris$Species,
type = c("p", "smooth", span = 0.75))
# Another plot is the scatter plot matrix, or splom.
# The plotting formula
# ~iris[,1:4]
# indicates that all pairs of the four variables (sepal and petal width
# and length are to be plotted.
# The groups parameter indicates that the records corresponding to
# each group will be plotted with the same color.
> splom(~iris[-5], groups = Species, data = iris)
# Alternatively, we can make separate scatter plot matrices, one for each species,
# as follows.
> splom(~iris[-5]| Species, data = iris)
# We can accomplish a similar plot with the pairs function,
> pairs( iris[-5] )
# To add colors we use the Species variable to index a color vector
>cols = c("red", "green", "blue")
>pairs( iris[-5], col = cols[iris$Species])
# Yet another technique for examining multiple variable conditionally
# is the parallel plot. Here each variable is represented in the same
# dimension with parallel lines. The records consist of joined colored
# line segments joining the 4 values of the variables being plotted.
> parallel(~iris[,1:4]|Species, data = iris, layout = c(3,1))
# The lattice plotting library provides pre-defined panel functions for
# use in constructing new panel functions. Examples include
# panel.abline, panel.curve, panel.loess, panel.grid.
# Below is an example of how one mught use these functions
> xyplot(Petal.Length ~ Petal.Width|Species, panel = function(x,y){
+ panel.grid(h = -1, v = 2)
+ panel.xyplot(x, y)
+ panel.loess(x, y, span = 0.75)}
+ )