#######################################
# UNIT 1: UNIX
#######################################


#######################################
# 3: Files and directories
#######################################

# login and copying to a remote machine

ssh -X bilbo
ssh -X paciorek@bilbo.berkeley.edu

pwd

ls

cd /accounts/gen/vis/paciorek/teaching/243
cd ~paciorek/teaching/243
cd ~/teaching/243
cd 243 # provided I am in 'teaching'
cd ..
cd -

cp
cp -r
cp -rp # preserves timestamps and other metainfo (VERY handy for tracing your workflows if you move files between machines)
mkdir
rm
rm -r  # recursively remove a whole directory structure
rm -rf # CAREFUL! In particular, make sure you know what directory you are in

# file transfer between machines
scp file.txt paciorek@bilbo.berkeley.edu:~/research/.
scp paciorek@bilbo.berkeley.edu:/data/file.txt ~/research/renamed.txt

# changing permissions on a file
chmod ugo+x myProg # myProg should be compiled code or a shell script

chmod ugo+rw code.q

chmod go-w myThesisCode.q


zip files.zip a.txt b.txt c.txt
gzip a.txt b.txt c.txt # will create a.txt.gz, b.txt.gz,  c.txt.gz
tar -cvf files.tar myDirectory
tar -cvzf files.tgz myDirectory  
tar -xvf files.tar
tar -xvzf files.tgz
gzip -cd file.gz | less
zcat file.zip | less

##########################################
# 4: A variety of UNIX tools/capabilities
##########################################

man cp

which R
which matlab
which maple

emacs -nw file.txt
# let's play around with some of the keystrokes in emacs

/proc/meminfo
/proc/cpuinfo
/etc/issue

grep processor /proc/cpuinfo

##########################################
# UNIT 2: The bash Shell
##########################################

################
# 1 Shell basics
################

echo $SHELL

tcsh
exit

which bash

# create a shell script and try to execute it; the first line tells the operating system what shell to use to run the script
#! /bin/bash  
# ls -al *pdf > myPdfs.txt
# we might need to change the permissions (recall from Unit 1)

#####################
# 3 Command history
#####################
!ls
!-1
!-1:p

#####################
# 4 Utilities
#####################

cd research/fusion/hei/write/revisedReport
grep pdf *q

cd ~/Desktop/243

# in R
# for(i in 1:10000)  write(mean(rpois(100, 1)), file = 'clt.txt', append = TRUE)
tail -f clt.txt

grep com  websites.txt
# any problems? can we be a bit smarter?

#####################
# 5 Redirection
#####################

# ls 
ls | head -5

cd ~/Desktop/243
cut -d',' -f2 mileage2009.csv | sort | uniq | wc
cut -d',' -f2 mileage2009.csv | sort | uniq | nl 

# you won't be able to replicate this as it uses files on my desktop
cut -b1,2,3,4,5,6,7,8,9,10,11,29,37,45,53,61,69,77,85,93,101,109,117,125,133,141,149,157,165,173,181,189,197,205,213,221,229,237,245,253,261,269  AE000041196.dly | grep "M" | more
cut -b1,2,3,4,5,6,7,8,9,10,11,29,37,45,53,61,69,77,85,93,101,109,117,125,133,141,149,157,165,173,181,189,197,205,213,221,229,237,245,253,261,269 USC*.dly | grep "M" | more

# you won't be able to replicate this as it uses files in my home directory
cd ~/research/fusion/hei/write/finalReport
ls -t *.{R,r,q} | head -4 
grep pdf `ls -t *.{R,r,q} | head -4`

files=$(ls)
echo $files


####################
# 6 Wildcards
####################

cd ~/Desktop/243

ls *{pdf,R}

# in my home directory
cd ~/teaching/243/lectures
ls *[0-9]*  
ls *[!~#]  # don't show automatically-generated backup files

echo cp filename{,old} 

#####################
# 7 Job control
#####################

R
# i = 0; while(i < 1) print(i)
C-c
C-\

R --no-save < code.q >& code.Rout &  # let's parse what this will do

ssh arwen
ps -aux | grep R

# in R
# for(i in 1:1000){
# x = matrix(rnorm(5000*5000), nr = 5000, nc = 5000)
# y = crossprod(x)
# }
nice -19 R CMD BATCH file.r Rout

# monitor on top and watch CPU and memory use
# notice the priority is 39 = 20 + 19

########################
# 8 Aliases
########################

alias ls="ls -F"
ls
\ls

# here are some aliases in my .bashrc
alias q="exit"
alias tf="tail -f"
alias m="less"
alias res="cd ~/research"
alias todo="emacs ~/todo &"
alias r="R --no-save"  
alias myjobs="ps -eafl | grep paciorek"
alias scf="ssh -X legolas.berkeley.edu"


#########################
# 9 Shell variables
#########################

name="chris"
echo $name
env
echo $HOSTNAME
echo $HOME

cd lectures
export CDPATH=.:~/research:~/teaching:~/teaching/243
cd lectures

# I put the following in my .bashrc
export PS1="\u@\h:\w> "


#########################
# 10 Functions
#########################


function putscf() {
   scp $1 paciorek@bilbo.berkeley.edu:~/$2 
}

putscf file.txt teaching/243/lectures/garbage.txt

# a few functions from my .bashrc

function mounts(){  # remotely mount filesystems I have access to
    sshfs carver.nersc.gov /accounts/gen/vis/paciorek/nersc
    sshfs bilbo.berkeley.edu: /accounts/gen/vis/paciorek/scf
}

function a(){
    acroread $1&
}

function putweb() {
    scp $1 paciorek@bilbo.berkeley.edu:/mirror/data/pub/users/paciorek/$2
}

function e() {
    emacs $1 &
}

function enw() {
    emacs -nw $1 
}

function l2h(){
    latex2html $1.tex -local_icons -long_titles 5
}


#########################
# 11 If/then/else
#########################

# niceR shortcut for nicing R jobs 
# usage: niceR inputRfile outputRfile 
# Author: Brian Caffo (Johns Hopkins Biostat)
# Date: 10/01/03 

function niceR(){
    # submits nice'd R jobs
# syntax of a function call: niceR file.r Rout
    if [ $# != "2" ]
    then 
        echo "usage: niceR inputRfile outputfile" 
    elif [ -e "$2" ]
    then 
        echo "$2 exists, I won't overwrite" 
    elif [ ! -e "$1" ]
    then 
        echo "inputRfile $1 does not exist" 
    else 
        echo "running R on $1" 
        nice -n 19 R --no-save < $1 &> $2 
    fi 
}

#########################
# 12 For loops
#########################

for file in $(ls *txt)
do
   mv $file ${file/.txt/.q} # this syntax replaces .txt with .q in $file
done


# example of bash for loop and wget for downloading a collection of files on the web

IFS=: # internal field separator
mths=jan:feb:mar:apr  
# alternatively I could do mths="jan feb mar apr" and not set IFS
for ((yr=1910; yr<=1920; yr++))
do
    for mth in $mths
    do
        wget ftp://ftp3.ncdc.noaa.gov/pub/data/3200/${yr}/3200${mth}${yr}
    done
done

# if I want to do some post-processing, do the following instead

IFS=: # internal field separator
mths=jan:feb:mar:apr  
for ((yr=1910; yr<=1920; yr++))
do
    for mth in $mths
    do
        wget ftp://ftp3.ncdc.noaa.gov/pub/data/3200/${yr}/3200${mth}${yr}
        grep PRCP 3200${mth}${yr} >> master${mth} # what does this do?
        rm 3200${mth}${yr} # clean up extraneous files
    done
done


# example of bash for loop for starting jobs

n=100 # if I want to be able to vary n from outside the R program
for(( it=1; it<=100; it++));
do
    echo "n=$n; it=$it; source('base.q')" > tmp-$n-$it.q
    R CMD BATCH --no-save tmp-$n-$it.q sim-n$n-it$it.Rout&
done
# note that base.q should NOT set either 'n' or 'it', but should make use of them, including when creating unique names for output files for each iteration
# example of bash for loop and wget for downloading a collection of files on the web
# usage: ./forloopDownload.sh
# Author: Chris Paciorek
# Date: July 28, 2011


IFS=: # internal field separator
mths=jan:feb:mar:apr  # I don't know how to specify a vector of characters in bash, so I'll get around this using the : as a delimiter and the bash for loop will automatically do the looping over the elements
for ((yr=1910; yr<=1920; yr++))
do
    for mth in $mths
    do
        wget ftp://ftp3.ncdc.noaa.gov/pub/data/3200/${yr}/3200${mth}${yr}
    done
done

# if I want to do some post-processing, do the following instead

IFS=: # internal field separator
mths=jan:feb:mar:apr  # I don't know how to specify a vector of characters in bash, so I'll get around this using the : as a delimiter and the bash for loop will automatically do the looping over the elements
for ((yr=1910; yr<=1920; yr++))
do
    for mth in $mths
    do
        wget ftp://ftp3.ncdc.noaa.gov/pub/data/3200/${yr}/3200${mth}${yr}
        grep PRCP 3200${mth}${yr} >> master${mth} # what does this do?
        rm 3200${mth}${yr} # clean up extraneous files
    done
done
# example of bash for loop for starting jobs
# usage: ./forloopJobs.sh
# Author: Chris Paciorek
# Date: July 28, 2011


n=100 # if I want to be able to vary n from outside the R program
for(( it=1; it<=100; it++));
do
    echo "n=$n; it=$it; source('base.q')" > tmp-$n-$it.q
    R CMD BATCH --no-save tmp-$n-$it.q sim-n$n-it$it.Rout
done
# note that base.q should NOT set either 'n' or 'it'# niceR shortcut for nicing R jobs 
# usage: niceR inputRfile outputRfile 
# Author: Brian Caffo 
# Date: 10/01/03 

function niceR(){
    # submits nice'd R jobs
if [ $# != "2" ]
then 
   echo "usage: niceR inputRfile outputfile" 
elif [ -e "$2" ]
then 
   echo "$2 exists, I won't overwrite" 
elif [ ! -e "$1" ]
then 
   echo "inputRfile $1 does not exist" 
else 
   echo "running R on $1" 
   nice -n 19 R --no-save < $1 &> $2 
fi 
}

##################################################
### Demo code for Unit 3 of Stat243, "Using R"
### Chris Paciorek, September 2012
##################################################

#############################
# 1: Some basic functionality
#############################

system("ls -al") # knitr/Sweave doesn't seem to show the output of system()
files <- system("ls", intern = TRUE)
files[1:5]


file.exists("file.txt")
list.files("~/research")


Sys.info()


# options()  # this would print out a long list of options
options()[1:5]
options()[c('width', 'digits')]
options(width = 120) # often nice to have more characters on screen
options(width = 60)  # for purpose of making the pdf of this document
options(max.print = 5000)
options(digits = 3)
a <- 0.123456; b <- 0.1234561
a; b; a == b

#############################
# 2: Packages
#############################

library(fields)
library(help = fields)
# library()  # I don't want to run this on SCF because so many are installed


install.packages('fields', lib = '~/Rlibs') # ~/Rlibs needs to exist!


search()
# ls(pos = 7) # for the graphics package
ls(pos = 7)[1:5] # just show the first few

#############################
# 3: Objects
#############################

# 3.1 Classes of objects

vec = c("Sam", "0923", "Sam9", "Sam is", "Sam\t9", "Sam's the man.\nNo doubt.\n")
cat(vec[5])
cat(vec[6])

print(vec[5])
print(vec[6])

# 3.2 Assignment and coercion

out = mean(rnorm(7)) # OK
system.time(out = rnorm(10000)) # NOT OK, system.time expects its argument to be a complete R expression
system.time(out <- rnorm(10000))


mean
x <- 0; y <- 0
out <- mean(x = c(3,7)) # the usual way to pass an argument to a function
out <- mean(x <- c(3,7)) # this is allowable, though perhaps not useful; what does it do?
out <- mean(y = c(3,7))
out <- mean(y <- c(3,7))


mat <- matrix(c(1, NA, 2, 3), nrow = 2, ncol = 2)
apply(mat, 1, sum.isna <- function(vec) {return(sum(is.na(vec)))})
apply(mat, 1, sum.isna = function(vec) {return(sum(is.na(vec)))}) # NOPE


vals <- c(1, 2, 3)
class(vals)
vals <- 1:3
class(vals)
vals <- c(1L, 2L, 3L)
vals
class(vals)


as.character(c(1,2,3))
as.numeric(c("1", "2.73"))
as.factor(c("a", "b", "c"))


x <- rnorm(5)
x[3] <- 'hat' # What do you think is going to happen?
indices = c(1, 2.73)
myVec = 1:10
myVec[indices]

# 3.3 Type vs. class

a <- data.frame(x = 1:2)
class(a)
typeof(a)
m <- matrix(1:4, nrow = 2) 
class(m) 
typeof(m)


me <- list(firstname = 'Chris', surname = 'Paciorek')
class(me) <- 'personClass'  # it turns out R already has a 'person' class defined
class(me)
is.list(me)
typeof(me)
typeof(me$firstname)

# 3.4 Information about objects

is(me, 'personClass')
str(me)
attributes(me)
mat <- matrix(1:4, 2)
class(mat)
typeof(mat)
length(mat) # recall that a matrix can be thought of as a vector with dimensions
attributes(mat)
dim(mat)


x <- rnorm(10 * 365)
qs <- quantile(x, c(.025, .975))
qs
qs[1] + 3


names(qs) <- NULL
qs


row.names(mtcars)[1:6]
names(mtcars)
mat <- data.frame(x = 1:2, y = 3:4)
row.names(mat) <- c("first", "second")
mat
vec <- c(first = 7, second = 1, third = 5)
vec['first']

# 3.5 The workspace

objects() # what objects are in my workspace
identical(ls(), objects()) # synonymous
dat <- 7; dat2 <- 9; subdat <- 3; obj <- 5
objects(pattern = "^dat") 
rm(dat2, subdat)
rm(list = c("dat2", "subdat")) # a bit confusing - the 'list' argument should be a character vector
rm(list = ls(pattern = "^dat")) 
exists('dat') # can be helpful when programming
dat <- rnorm(500000)
object.size(dat)
print(object.size(dat), units = "Mb") # this seems pretty clunky!
rm(list = ls()) # what does this do?

# 3.6 Missing values

3 / Inf
3 + Inf
0/0


x <- c(3, NA, 5, NaN)
is.na(x)
x == NA
x[is.na(x)] <- 0
x

# 3.7 Some other details

rnorm(10)
# .Last.value  # this should return the 10 random normals but knitr is messing things up, commented out here


x <- 1e5
log10(x)
y <- 100000
x <- 1e-8

?lm # or help(lm)
help.search('lm')
apropos('lm')
help('[[') # operators are functions too 
args(lm)

ch1 <- "Chris's\n"
ch2 <- 'He said, "hello."\n'
ch3 <- "He said, \"hello.\"\n"


##################################
# 4: Working with data structures
##################################

# 4.1 Lists and dataframes


x <- list(a = 1:2, b = 3:4, sam = rnorm(4))
x[[2]] # extracts the indicated component, which can be anything, in this case just an integer vector
x[c(1, 3)] # extracts subvectors, which since it is a list, will also be a list


lapply(x, length)
sapply(x, length) # returns things in a user-friendly way


apply(CO2, 2, class) # hmmm
sapply(CO2, class)


params <- list( a = list(mn = 7, sd = 3), b = list(mn = 6, sd =1), c = list(mn = 2, sd = 1))
sapply(params, '[[', 1)


unlist(x)


tapply(mtcars$mpg, mtcars$cyl, mean) 
tapply(mtcars$mpg, list(mtcars$cyl, mtcars$gear), mean)


aggregate(mtcars, list(cyl = mtcars$cyl), mean) # this uses the function on each column of mtcars, split by the 'by' argument
by(warpbreaks, warpbreaks$tension, function(x) {lm(breaks ~ wool, data = x)})


split(mtcars, mtcars$cyl)


myList <- list(a = 1:3, b = 11:13, c = 21:23)
rbind(myList$a, myList$b, myList$c)
rbind(myList)
do.call(rbind, myList)

# 4.2 Vectors and matrices

mat <- matrix(rnorm(500), nr = 50) 
identical(mat[1:50], mat[ , 1])
identical(mat[1:10], mat[1, ])
vec <- c(mat) 
mat2 <- matrix(vec, nr = 50) 
identical(mat,mat2) 


matrix(1:4, 2, byrow = TRUE)


mat <- matrix(1:8, 2)
mat[1, ] <- c(1,2)
mat
mat[1, ] <- 1:3


rep(seq(1, 9, by = 2), each = 2)
rep(seq(1, 9, by = 2), times = 2) # repeats the whole vector 'times' times
rep(seq(1, 9, by = 2), times = 1:5) # repeats each element based on 'times'


expand.grid(x = 1:2, y = -1:1, theta = c("lo", "hi"))


x <- c(1, 10, 2, 9, 3, 8)
which(x < 3)


set.seed(0)
vec <- rnorm(8); mat <- matrix(rnorm(9), 3)
vec; mat
vec[vec < 0]
vec[vec < 0] <- 0
vec
mat[mat[ , 1] < 0, ] # similarly for data frames
mat[mat[ , 1] < 0, 2:3]  # similarly for data frames
mat[ , mat[1, ] < 0]
mat[mat[ , 1] < 0, 2:3] <- 0
set.seed(0) # so we get the same vec as we had before
vec <- rnorm(8)
wh <- which(vec < 0)
logicals <- vec < 0
logicals; wh
identical(vec[wh], vec[logicals])
vec <- c(1L, 2L, 1L)
is.integer(vec)
vec[vec == 1L] # in general, not safe with numeric vectors
vec[vec != 3L] # nor this

# subsetting based on a 2-column matrix of {row,column} indices
mat <- matrix(rnorm(25), 5)
rowInd <- c(1, 3, 5); colInd <- c(1, 1, 4)
mat[cbind(rowInd, colInd)]  # extracts the (1,1), (3,1) and (5,4) elements

x <- matrix(1:6, nr = 2)
x
apply(x, 1, min) # by row 
apply(x, 2, min) # by column
x <- array(1:24, c(2, 3, 4))
apply(x, 2, min) # for(j in 1:3) print(min(x[ , j, ]))
apply(x, c(2, 3), min) # for(j in 1:3) {for(k in 1:4) print(min(x[ , j, k]))}


n <- 500000; nr <- 1000; nCalcs <- n/nr
mat <- matrix(rnorm(n), nr = nr)
times <- 1:nCalcs 
system.time(out1 <- apply(mat, 1, function(vec) {mod = lm(vec ~ times); return(mod$coef[2])})) 
system.time({out2 = rep(NA, nCalcs); for(i in 1:nCalcs){out2[i] = lm(mat[i, ] ~ times)$coef[2]}}) 

# 4.3 Linear algebra

X <- matrix(rnorm(9), 3)
X
X[col(X) >= row(X)]


diag(X)
diag(X) <- 1
X
d <- diag(c(rep(1, 2), rep(2, 2)))
d


X %*% Y # matrix multiplication
X * Y # direct product
x %o% y # outer product of vectors x, y: x times t(y)
outer(x, y) # same thing
outer(x, y, function(x, y) cos(y)/(1 + x^2)) # evaluation of f(x,y) for all pairs of x,y values
crossprod(X, Y) # same as but faster than t(X) %*% Y!


solve(X) # inverse of X
solve(X, y) # (inverse of X) %*% y

##################################
# 5: Functions
##################################

# 5.1 Inputs

args(lm)


pplot <- function(x, y, pch = 16, cex = 0.4, ...) {
	plot(x, y, pch = pch, cex = cex, ...)
}


myFun <- function(...){
print(..2) 
args <- list(...)
print(args[[2]])
}
myFun(1,3,5,7)


args(dgamma)

tmpf = function(x, shape, rate = 1, scale = 1/rate, log = FALSE){
  print(shape)
  print(rate)
  print(scale)
}
tmpf(1, 2, rate = 3)
tmpf(1, 2, scale = 5)
dgamma(1, 2, scale = 5)
# does it matter that the rate and scale are inconsistent?
dgamma
# why can't we use "rate = 1/scale, scale = 1/rate"? or can we?


mat <- matrix(1:9, 3)
apply(mat, 2, function(vec) vec - vec[1])
apply(mat, 1, function(vec) vec - vec[1]) # explain why the result is transposed


f <- function(x, y = 2, z = 3 / y) { x + y + z }
args(f)
formals(f)
class(formals(f))


match.call(mean, quote(mean(y, na.rm = TRUE))) # what do you think quote does?


f <- function(){
	oldpar <- par()
	par(cex = 2)
	# body of code
	par() <- oldpar
}

# 5.2 Outputs

f <- function(x){ invisible(x^2) }
f(3)
a <- f(3)
a


mod <- lm(mpg ~ cyl, data = mtcars)
class(mod)
is.list(mod)

# 5.3 Variable scope

x <- 3
f <- function() {x <- x^2; print(x)}
f(x)
x # what do you expect?
f <- function() {assign('x', x^2, env = .GlobalEnv)} # careful, this could be dangerous as a variable is changed as a side effect


x <- 3
f <- function() { 
f2 <- function() { print(x) }
f2()
} 
f() # what will happen?

f <- function() {
f2 <- function() { print(x) }
x <- 7
f2()
}
f() # what will happen?

f2 <- function() print(x)
f <- function() {
x <- 7
f2()
}
f() # what will happen?


y <- 100
f <- function(){
	y <- 10
	g <- function(x) x + y
	return(g)
}
h <- f()
h(3)


z <- 3
x <- 100
f <- function(x, y = x*3) {x+y}
f(z*5)

#############################################
# 6: Environments and frames
#############################################

# 6.1 Frames and the call stack

sys.nframe()
f <- function() cat('Frame number is ', sys.nframe(), '; parent is ', sys.parent(), '.\n', sep = '')
f()
f2 <- function() f() 
f2() 


# exploring functions that give us information the frames in the stack
g <- function(y) {
	gg <- function() {
		# this gives us the information from sys.calls(), sys.parents() and sys.frames() as one object
		print(sys.status()) 
	}
	if(y > 0) g(y-1) else gg()
}
g(3)

# 6.2 Environments and the search path

search()
searchpaths()


x <- .GlobalEnv
parent.env(x)
while (environmentName(x) != environmentName(emptyenv())) {
	print(environmentName(parent.env(x)))
	x <- parent.env(x)
}


ls(pos = 7)[1:5] # what does this do?
ls("package:graphics")[1:5]
environment(lm)


lm <- function() {return(NULL)} # this seems dangerous but isn't
x <- 1:3; y <- rnorm(3); mod <- lm(y ~ x)
mod <- get('lm', pos = 'package:stats')(y ~ x)
mod <- stats::lm(y ~ x) # an alternative

# 6.3 with() and within()

with(mtcars, cyl*mpg)
new.mtcars <- within(mtcars, crazy <- cyl*mpg)
names(new.mtcars)

####################################################
# 7: Flow control and logical operations
####################################################

# 7.1 Logical operators

x <- rnorm(10)
x[x > 1 | x < -1]
x <- 1:10; y <- c(rep(10, 9), NA)
x > 5 | y > 5 # note that TRUE | NA evaluates to TRUE


a <- 7; b <- NULL
a < 8 | b > 3
a < 8 || b > 3
a <- c(0, 3); b <- c(4, 2)
if(a < 7 & b < 7) print('this is buggy code')
if(a < 7 && b < 7) print('this is buggy code too, but runs w/o warnings')
if(a[1] < 7 && b[1] < 7) print('this code is correct and the condition is TRUE')


a <- 7; b <- 5
!(a < 8 && b < 6)

# 7.2 If statements

x <- 5
if(x > 7){
	x <- x + 3
} else{
	x <- x - 3
}


if(x > 7) 
x <- x + 3 else x <- x - 3


x <- -3
if (x > 7){
	x <- x + 3
	print(x)
} else if (x > 4){
	x <- x + 1
	print(x)
} else if (x > 0){
	x <- x - 3
	print(x)
} else{
	x <- x - 7
	print(x)
}

if(x > 7) {
	statement1 } # what happens at this point?
else{ # what happens now?
	statement2
}


x <- rnorm(6)
truncx <- ifelse(x > 0, x, 0)
truncx


vals <- c(1, 2, NA); eps <- 1e-9 
# now pretend vals comes from some other chunk of code that we don't control
if(min(vals) > eps) { print(vals) } # not good practice
minval <- min(vals) 
if(!is.na(minval) && minval > eps) { print(vals) } # better practice

# 7.3 switch()

x <- 2; y <- 10
switch(x, log(y), sqrt(y), y) 
center <- function(x, type){
	switch(type, 
	mean = mean(x), # make sure to use = and not <-
	median = median(x),
	trimmed = mean(x, trim = .1))
}
x <- rgamma(100, 1)
center(x, 'median')
center(x, 'mean')

# 7.4 Loops

nIts <- 500; means <- rep(NA, nIts)
for(it in 1:nIts){ 
	means[it] <- mean(rnorm(100))
	if(identical(it %% 100, 0)) cat('Iteration', it, date(), '\n')
}


for(state in c('Ohio', 'Iowa','Georgia')) 
	print(state.x77[row.names(state.x77) == state, "Income"])


for(i in 1:10) i


for(i in 1:10){
	if(i == 5) break
	print(i)
}
for(i in 1:5){
	if(i == 2) next
	print(i)
}

# while loop example - MLE for zero-truncated Poisson, from p. 59 of Venables and Ripley, 4th edition

yp = rpois(50, lambda = 1)
y = yp[yp > 0]

ybar = mean(y)
lam = ybar
it = 0
del = 1
tol = 0.0001
while(abs(del) > tol && (it <- it + 1) < 10){  # let's parse this
  del = (lam - ybar * (1- exp(-lam)))/(1 - ybar * exp(-lam))  # adjustment to parameter estimate
  lam = lam - del  # new parameter value
  cat(it, lam, "\n")
}


mat <- matrix(1:4, 2)
submat <- mat[mat[1, ] > 5]
for(i in 1:nrow(submat)) print(i)

#####################
# 8: Formulas
#####################

resp <- 'y ~'
covTerms <- 'x1'
for(i in 2:5){ covTerms <- paste(covTerms, '+ x', i, sep='')}
form <- as.formula(paste(resp, covTerms, sep = ''))
# lm(form, data = dat)
form
class(form)


resp <- 'y ~'
covTerms <- paste('x', 1:5, sep = '', collapse = ' + ')
form <- as.formula(paste(resp, covTerms))
form
# lm(form, data = dat)

##############################
# 9: Data storage and formats
##############################

text <- "_Melhore sua seguran\xe7a_"
iconv(text, from = "latin1", to = "UTF-8")
iconv(text, from = "latin1", to = "ASCII", sub = "???")


###########################################
# 10: Reading data from text files into R
###########################################

?read.table

setwd('~/Desktop/243/data')
dat <- read.table('RTAData.csv', sep = ',', head = TRUE) # this is data from a kaggle.com competition on road traffic on an Australian freeway
lapply(dat, class)
levels(dat[ ,2])
dat2 <- read.table('RTAData.csv', sep = ',', head = TRUE, na.strings = c("NA", "x"), stringsAsFactors = FALSE)
unique(dat2[ ,2])
# hmmm, what happened to the blank values this time?
which(dat[ ,2] == "")
dat2[which(dat[, 2] == "")[1], ] # deconstruct it!


sequ <- read.table('hivSequ.csv', sep = ',', header = T, colClasses = 	c('integer','integer','character','character','numeric','integer')) # this is data from a kaggle.com competition on HIV sequences and health outcomes
# let's make sure the coercion worked - sometimes R is obstinant
lapply(sequ, class) # this makes use of the fact that a data frame is a list


dat <- readLines('~/Desktop/243/data/precip.txt') # this is daily precipitation amounts from US weather stations, Feb. 2010
id <- as.factor( substring(dat, 4, 11) )
year <- substring(dat, 17, 20)
class(year)
year <- as.integer(substring(dat, 18, 21))
month <- as.integer(substring(dat, 22, 23))
nvalues <- as.integer(substring(dat, 28, 30))


dat <- readLines(pipe("ls -al"))
dat <- read.table(pipe("unzip dat.zip"))
dat <- read.csv(gzfile("dat.csv.gz"))
dat <- readLines("http://www.stat.berkeley.edu/~paciorek/index.html")


con <- file("~/Desktop/243/data/precip.txt", "r") # "r" for 'read' - you can also open files for writing with "w" (or "a" for appending) 
class(con)
blockSize <- 1000 # obviously this would be large in any real application 
nLines <- 300000
for(i in 1:ceiling(nLines / blockSize)){ 
	lines <- readLines(con, n = blockSize) 
	# manipulate the lines and store the key stuff
}
close(con) 


dat <- readLines('~/Desktop/243/data/precip.txt')
con <- textConnection(dat[1], "r")
read.fwf(con, c(3,8,4,2,4,2))

##################################################
# 11: Text manipulations and regular expressions
##################################################

# 11.1 Basic text manipulation

out <- paste("My", "name", "is", "Chris", ".", sep = " ")
paste(c("My", "name", "is", "Chris", "."), collapse = " ")
strsplit(out, split = ' ')

vars <- c("P", "HCA24", "SOH02")
substring(vars, 2, 3)
vars <- c("date98", "size98", "x98weights98", "sdfsd")
grep("98", vars)
gregexpr("98", vars)
gsub("98", "04", vars)

# 11.2 Regular expressions (regexp)

addresses <- c("john@att.com", "stat243@bspace.berkeley.edu", "john_smith@att.com")
grep("[[:digit:]_]", addresses)

text <- c("john","jennifer pierce","Juan carlos rey")
grep("^[[:upper:]]", text) # finds text that starts with an upper case letter
grep("[[:digit:]]$", text) # finds text with a number at the end


grep("[ \t]", text)
gregexpr("[ \t]", text)
gsub("^j", "J", text)


text <- c("hi John", "V1@gra", "here's the problem set")
grep("[[:alpha:]]+[[:digit:][:punct:]]+[[:alpha:]]*", text) # ok, we need to do a bit more work...


grep("([[:digit:]]{1,3}\\.){3}[[:digit:]]{1,3}", text)


text <- ('"H4NY07011","ACKERMAN, GARY L.","H","$13,242",,,')
gsub("([^\",]),", "\\1", text)


gregexpr("(http|ftp):\\/\\/", c("at the site http://www.ibm.com", "other text"))


text <- "Students may participate in an internship <b> in place</b> of <b> one </b> of their courses."
gsub("<.*>", "", text)

gregexpr("[[:space:]]+.+?[[:space:]]+", "the dog jumped over the blue moon")

line <- "a dog\tjumped\nover \tthe moon."
cat(line)
strsplit(line, split = "[[:space:]]+")
strsplit(line, split = "[[:blank:]]+")

##################################################
# 12: Manipulating dates
##################################################

date1 <- as.Date("03-01-2011", format = "%m-%d-%Y")
date2 <- as.Date("03/01/11", format = "%m/%d/%y")
date3 <- as.Date("05-May-11", format = "%d-%b-%y")
date1
date1 + 42
date3 - date1


julianValues <- c(1, 100, 1000)
as.Date(julianValues, origin="1990-01-01")


library(chron)
d1 <- chron("12/25/2004", "11:37:59") # default format of m/d/Y and h:m:s
d2 <- as.chron(date1)
d1; d2

##################################################
# 13: Database manipulations
##################################################

# 13.1 Databases

drv <- dbDriver("SQLite")
con <- dbConnect(drv, dbname = "myDatabase") # so we're using a connection again
mars <- dbReadTable(con, "MarsData")
dbDisconnect(con)


myQuery <- "SELECT City, State FROM Station WHERE Lat_N > 39.7"
marsSub <- dbGetQuery(con, myQuery)

# 13.2 Dataset manipulations in R

sum(mtcars$cyl == 6) # counts the number of TRUE values
mean(mtcars$cyl == 6) # proportion of TRUE values
unique(mtcars$cyl)
length(unique(mtcars$cyl))
duplicated(mtcars$cyl) # tells us which elements are repeated
table(mtcars$gear) # tabulates # of records in each unique value of mtcars$gear
table(mtcars$cyl, mtcars$gear) # cross-tabulation by two variables


table(mtcars[c("cyl", "vs", "am", "gear")])
ftable(mtcars[c("cyl", "vs", "am", "gear")])


mtcarsSorted <- mtcars[order(mtcars$cyl, mtcars$mpg), ]
head(mtcarsSorted)


load("~/Desktop/243/data/exampleMerge.RData")
fits2 <- merge(fits, master, by.x = 'stn', by.y = 'coop', all.x = TRUE, sort = FALSE)
identical(fits$stn, fits2$stn) # hmmm...., ordering has not been preserved 
which(fits2$stn != fits$stn)[1:5] 


f.merge <- function(x, y, ...){
# merges two datasets but preserves the ordering based on the first dataset
x$ordering <- 1:nrow(x)
tmp <- merge(x, y, ...)
tmp <- tmp[order(tmp$ordering), ] # sort by the indexing (note that some values may be missing if all.x = FALSE and some rows of x not matched in y)
return(tmp[,!(names(tmp) == "ordering")])
}


load('~/Desktop/243/data/prec.RData')
prec <- prec[1:1000, ] # just to make the example code run faster
precVars <- 5:ncol(prec)
precStacked <- stack(prec, select = precVars)
out <- unstack(precStacked)
# to use reshape, we need a unique id for each row since reshape considers each row in the wide format as a subject
prec <- cbind(unique = 1:nrow(prec), prec)
precVars <- precVars + 1
precLong <- reshape(prec, varying = names(prec)[precVars], idvar = 'unique', direction = 'long', sep = '')
precLong <- precLong[!is.na(precLong$prec), ]
precWide <- reshape(precLong, v.names = 'prec', idvar = 'unique', direction = 'wide', sep='')


x <- rnorm(100)
f <-  cut(x, breaks = c(-Inf, -1, 1, Inf), labels = c('low', 'medium', 'high'))
levels(f) # note that f is not explicitly ordered
f <- relevel(f, 'high') # puts high as first level
f <-  cut(x, breaks = c(-Inf, -1, 1, Inf), labels = c('low', 'medium', 'high'), ordered_result = TRUE)

###############################
# 14: Output from R
###############################

# 14.2 Printing to the screen or a file

temps <- c(12.5, 37.234324, 1342434324.79997234, 2.3456e-6, 1e10)
sprintf("%9.4f C", temps)


val <- 1.5
cat('My value is ', val, '.\n', sep = '')
print(paste('My value is ', val, '.', sep = ''))


# input
x <- 7
n <- 5
# display powers
cat("Powers of", x, "\n")
cat("exponent   result\n\n")
result <- 1
for (i in 1:n) {
	result <- result * x
	cat(format(i, width = 8), format(result, width = 10),"\n", sep = "")
}
x <- 7
n <- 5
# display powers
cat("Powers of", x, "\n")
cat("exponent result\n\n")
result <- 1
for (i in 1:n) {
	result <- result * x
	cat(i, '\t', result, '\n', sep = '')
}

#################################################################
### Demo code for Unit 5 of Stat243, "Debugging and Profiling"
### Chris Paciorek, September 2012
#################################################################


################################
# 1: Comon syntax errors
################################

mat <- matrix(1:4, 2, 2)[1, ]
dim(mat)
print(mat)

colSums(mat)

mat <- matrix(1:4, 2, 2)[1, , drop = FALSE]


################################
# 2: Debugging strategies
################################

# 2.1 Basic strategies

x <- 3
stopifnot(is(x, "matrix"))


# finding what global variables are used
library(codetools)

findGlobals(lm)

f <- function() {y = 3; print(x + y)}
findGlobals(f)

options()$warn

x = 1:3
for(i in 1:100){
  if(x > 2) print("hi")
}
warnings()

options(warn = 1)
for(i in 1:100){
  if(x > 2) print("hi")
}

options(warn = 2)
for(i in 1:100){
  if(x > 2) print("hi")
}
i

# 2.3 Using debug()

# simple use of browser via debug()

debug(lm)

n <- nrow(mtcars)
lm(mpg ~ disp, data = mtcars)

ls()
# use browser commands: c, n, Q
# let's step through this with 'n'
print(n)  # typing 'n' won't work!

undebug(lm)


# 2.4 Tracing errors in the call stack


library(MASS)

gamma.est <- function(data) {
  # this fits a gamma distribution to a collection of numbers
  m <- mean(data)
  v <- var(data)
  s <- v/m
  a <- m/s
  return(list(a=a,s=s))
}

calcVar = function(estimates){
  var.of.ests <- apply(estimates, 2, var)
  return(((n-1)^2/n)*var.of.ests)
}

gamma.jackknife.2 <- function(data) {
  # jackknife the estimation
  
  n <- length(data)
  jack.estimates = gamma.est(data[-1])
  for (omitted.point in 2:n) {
    jack.estimates = rbind(jack.estimates, gamma.est(data[-omitted.point]))
  }
  jack.var = calcVar(jack.estimates)
  return(sqrt(jack.var))
}

# using traceback()
gamma.jackknife.2(cats$Hwt) # jackknife gamma dist. estimates of cat heart weights
traceback()
# something is going on when we try to calculate the variance


# using recover() upon an error
options(error = recover)
gamma.jackknife.2(cats$Hwt)
# choose '2' to go into calcVar
estimates
apply(estimates, 2, var) # ok, so the error does occur here
var(estimates[ ,1])  # can we use var() a single time?
estimates[ ,1]
class(estimates)

options(error = NULL) # reset things

# 2.5 Using trace()

trace(calcVar, browser)
calcVar
gamma.jackknife.2(cats$Hwt)
# at this point you can use 'n', 'c', 'Q' as with debug()

###########################
# 3: Memory management 
###########################

# 3.2 Monitoring memory use

gc()
x = rnorm(1e8) # should use about 800 Mb
object.size(x)
gc()
rm(x)
gc() # note the “max used” column

###########################
# 4: Timing/Benchmarking
###########################

n <- 1000
x <- matrix(rnorm(n^2), n)
system.time({mns <- rep(NA, n); for(i in 1:n) mns[i] <- mean(x[i , ])})
system.time(rowMeans(x))

# speed of one calculation
n <- 1000
x <- matrix(rnorm(n^2), n)
benchmark(crossprod(x), replications = 10,
          columns=c('test', 'elapsed', 'replications'))

# comparing different approaches to a task
benchmark( {mns <- rep(NA, n); for(i in 1:n) mns[i] <- mean(x[i , ])},
          rowMeans(x),
          replications = 10,
          columns=c('test', 'elapsed', 'replications'))


###########################
# 5: Profiling
###########################

# here's an example of generating temporally correlated random variables
makeTS = function(param, len){
  times = seq(0, 1, length = len)
  dd = rdist(times)
  C = exp(-dd/param)
  U = chol(C)
  white = rnorm(len)
  return(crossprod(U, white))
}

Rprof("makeTS.prof")
makeTS(0.1, 2000)
Rprof(NULL)
summaryRprof("makeTS.prof")

# this takes a bit of deciphering because most of the time is in calls to C and Fortran code


##################################################
### Demo code for Unit 6 of Stat243, "R programming"
### Chris Paciorek, September 2012
##################################################

#############################
# 1: Efficiency
#############################

### 1.1 Fast initialization

options(width = 50)
n <- 10000
x <- 1; 
system.time(for(i in 2:n) x <- c(x, i))
system.time({x <- rep(as.numeric(NA), n); for(i in 1:n) x[i] <- i})


n <- 1000000
system.time(x <- rep(as.numeric(NA), n))
system.time({x <- as.numeric(NA); length(x) <- n})


nr <- nc <- 2000
system.time(x <- matrix(as.numeric(NA), nr, nc))
system.time({x <- as.numeric(NA); length(x) <- nr * nc; dim(x) <- c(nr, nc)})

### 1.2 Vectorized calculations

n <- 1e6
x <- rnorm(n)
system.time(x2 <- x^2)
x2 <- as.numeric(NA)
system.time({length(x2) <- n; for(i in 1:n){ x2[i] <- x[i]^2}}) # how many orders of magnitude slower?


line <- c("Four score and 7 years ago, this nation")
startIndices = seq(1, by = 3, length = nchar(line)/3)
substring(line, startIndices, startIndices + 1)

# how soon does the R code send stuff off to compiled C code?
`+`
mean()
mean.default()
chol.default()

# a surprise where "manual" coding is faster than a standard function
x <- rnorm(1000000)
system.time(truncx <- ifelse(x > 0, x, 0))
system.time({truncx <- x; truncx[x < 0] <- 0})
system.time(truncx <- x * (x > 0))


# surprising case where vectorization with a for loop beats apply()
rmRows1 = function(x){
    omit = F
    n = ncol(x)
    for(i in 1:n)
       omit = omit | is.na(x[ , i])
    x[!omit,]
}

rmRows2 = function(x){
  sum.isna = function(row){
    return(sum(is.na(row)))
  }
  n.isna = apply(x, 1, sum.isna)
  return(x[!n.isna, ])
}

# this seems clever; does it improve things?
rmRows3 = function(x){
  return(x[!is.na(rowMeans(x)), ])
}

### 1.3 Using apply() and specialized functions

n <- 3000; x <- matrix(rnorm(n * n), nr = n)
system.time(out <- apply(x, 1, mean))
system.time(out <- rowMeans(x))


system.time(out <- sweep(x, 2, STATS = colMeans(x), FUN = "-"))


system.time(out2 <- t(t(x) - colMeans(x)))
identical(out, out2)

### 1.4 Matrix algebra efficiency

mat <- matrix(rnorm(500*500), 500)
system.time(apply(mat, 1, sum))
system.time(mat %*% rep(1, ncol(mat))) # rewritten as matrix algebra
system.time(rowSums(mat))

# how do I take successive differences of columns of a matrix quickly?
nc = 500
nr = 500
mat = matrix(rnorm(nr * nc), nr, nc)

# try #1 via matrix algebra
system.time({A <- matrix(0, nr = nc, nc = nc - 1);
             A[row(A) == col(A)] <- 1;
             A[row(A) == col(A) + 1] <- -1;
             res <- mat %*% A} )

# try #2 via a for loop
system.time({res <- matrix(NA, nr, nc -1);
             for(j in 2:nc) res[ , j-1] = mat[ , j-1] - mat[ , j]})

# try #3 via direct matrix subtraction with clever subsetting
system.time(res <- mat[ , -nc] - mat[ , -1])


n <- 5000
A <- matrix(rnorm(5000 * 5000), 5000)
B <- matrix(rnorm(5000 * 5000), 5000)
x <- rnorm(5000)
system.time(res <- A %*% B %*% x)
# how can I force an efficient order of operations?

# how do I find a trace efficiently?
system.time(sum(diag(A %*% B)))
system.time(sum(A*t(B)))

# avoid matrix algebra with diagonal matrices when vectorization is possible
n <- 1000
X <- matrix(rnorm(n^2), n) 
diagvals <- rnorm(n)
D = diag(diagvals)
diag(X) <- diag(X) + diagvals
tmp <- diagvals * X # instead of D %*% X
tmp2 <- t(t(X) * diagvals) # instead of X %*% D

### 1.5 Fast mapping/lookup tables

vals <- rnorm(10)
names(vals) <- letters[1:10]
labs <- c("h", "h", "a", "c")
vals[labs]

### 1.6 Byte compiling

library(compiler); library(rbenchmark)
f <- function(x){
	for(i in 1:length(x)) x[i] <- x[i] + 1
	return(x)
}
fc <- cmpfun(f)
fc # notice the indication that the function is byte compiled.
benchmark(f(x), fc(x), replications = 5)

### 1.7 Challenges

# Challenge 1
p = 5
mns = rnorm(p)
sds = exp(rnorm(p))

n = 100000
y = rnorm(n)

system.time({
  lik <- matrix(NA, nr = n, nc = p);
  for(j in 1:p) lik[ , j] <- dnorm(y, mns[j], sds[j])
})
# can we do better?

#################################################
# 2: Advanced topics in working with functions
#################################################

### 2.2 Alternatives to pass by value in R

x <- rnorm(10)
f <- function(input){
	data <- input
	g <- function(param) return(param * data) 
}
myFun <- f(x)
rm(x) # to demonstrate we no longer need x
myFun(3)
x <- rnorm(1e7)
myFun <- f(x)
object.size(myFun)


x <- rnorm(10)
myFun2 <- with(list(data = x), function(param) return(param * data))
rm(x)
myFun2(3)
x <- rnorm(1e7)
myFun2 <- with(list(data = x), function(param) return(param * data))
object.size(myFun2)

### 2.3 Operators

a <- 7; b <- 3
# let's think about the following as a mathematical function
#  -- what's the function call?
a + b 
`+`(a, b)


myList = list(list(a = 1:5, b = "sdf"), list(a = 6:10, b = "wer"))
myMat = sapply(myList, `[[`, 1) 
# note that the index "1" is the additional argument to the [[ function
x <- 1:3; y <- c(100,200,300)
outer(x, y, `+`)


`%2%` <- function(a, b) {2 * (a + b)}
3 %2% 7


mat <- matrix(1:4, 2, 2)
mat[ , 1] 
mat[ , 1, drop = FALSE] # what's the difference?

### 2.4 Unexpected functions and replacement functions

if(x > 27){
	print(x)	
} else{
	print("too small") 
}


diag(mat) <- c(3, 2)
is.na(vec) <- 3
names(df) <- c('var1', 'var2')


mat <- matrix(rnorm(4), 2, 2)
diag(mat) <- c(3, 2)
mat <- `diag<-`(mat, c(10, 21))
base::`diag<-`


me <- list(name = 'Chris', age = 25)
`name<-` <- function(obj, value){
obj$name <- value
return(obj)
}
name(me) <- 'Christopher'

### 2.5 Functions as objects

x <- 3
x(2)
x <- function(z) z^2
x(2)


myFun = 'mean'; x = rnorm(10)
eval(as.name(myFun))(x)


f <- function(fxn, x){
	match.fun(fxn)(x) 
}
f("mean", x)


f <- function(x) x
f2 <- function(x) y <- x^2
f3 <- function(x) {y <- x^2; z <- x^3; return(list(y, z))}
class(f)
typeof(body(f)); class(body(f))
typeof(body(f2)); class(body(f2))
typeof(body(f3)); class(body(f3))


f4 <- function(x, y = 2, z = 1/y) {x + y + z}
args <- formals(f4)
class(args)

### 2.6 Promises and lazy evaluation

f <- function(a, b=c) {
  c <- log(a); 
  return(a*b)
}
f(7)


f <- function(x) print("hi")
system.time(mean(rnorm(1000000)))
system.time(f(3))
system.time(f(mean(rnorm(1000000)))) 

#############################################
# 3: Evaluating memory use
#############################################

### 3.1 Hidden uses of memory

x <- rnorm(1e7)
gc()
dim(x) <- c(1e4, 1e3)
diag(x) <- 1
gc()


x <- rnorm(1e7)
.Internal(inspect(x))
x[5] <- 7
.Internal(inspect(x))
gc()


x <- rnorm(1e7)
gc()
y <- x[1:(length(x) - 1)]
gc()

### 3.2 Passing objects to compiled code

f <- function(x){
  print(.Internal(inspect(x)))
  return(mean(x))
}
x <- rnorm(1e7)
class(x)
debug(f)
f(x)
f(as.numeric(x))
f(as.integer(x))

### 3.3 Lazy evaluation, delayed copying (copy-on-change) and memory use
        
f <- function(x){
  print(gc())
  z <- x[1]
  .Internal(inspect(x))
  return(x)
}
y <- rnorm(1e7)
gc()
.Internal(inspect(y))
out <- f(y)
.Internal(inspect(out))


y <- rnorm(1e7)
gc()
.Internal(inspect(y))
x <- y
gc()
.Internal(inspect(x))
x[1] <- 5
gc()
.Internal(inspect(x))
rm(x)
x <- y
.Internal(inspect(x))
.Internal(inspect(y))
y[1] <- 5
.Internal(inspect(x))
.Internal(inspect(y))


x <- rnorm(1e7) 
myfun <- function(y){ 
	z <- y 
	return(mean(z)) 
} 
myfun(x)


x <- c(NA, x) 
myfun <- function(y){ 
	return(mean(y, na.rm = TRUE))
}

### 3.5 Example

fastcount <- function(xvar,yvar) {
	naline <- is.na(xvar)
	naline[is.na(yvar)] = TRUE
	xvar[naline] <- 0
	yvar[naline] <- 0
	useline <- !naline;
	# Table must be initialized for -1's
	tablex <- numeric(max(xvar)+1)
	tabley <- numeric(max(xvar)+1)
	stopifnot(length(xvar) == length(yvar))
	res <- .C("fastcount",PACKAGE="GCcorrect",
		tablex = as.integer(tablex),tabley = as.integer(tabley),
		as.integer(xvar), as.integer(yvar),as.integer(useline),
		as.integer(length(xvar)))
	xuse <- which(res$tablex>0)
	xnames <- xuse - 1
	resb <- rbind(res$tablex[xuse], res$tabley[xuse]) 
	colnames(resb) <- xnames
	return(resb)
}

##############################################
# 4: Object-oriented programming (OOP)
##############################################

### 4.1 S3 approach

library(methods)
yb <- sample(c(0, 1), 10, replace = TRUE)
yc <- rnorm(10)
x <- rnorm(10)
mod1 <- lm(yc ~ x)
mod2 <- glm(yb ~ x, family = binomial)
class(mod1)
class(mod2)
is.list(mod1)
names(mod1)
is(mod2, "lm")
# methods associated with lm
methods(class = "lm")

# creating our own class
me <- list(firstname = 'Chris', surname = 'Paciorek', age = NA)
class(me) <- 'indiv' # there is already a 'person' class in R


indiv <- function(firstname = NA, surname = NA, age = NA){
	# constructor for 'indiv' class
	obj <- list(firstname = firstname, surname = surname, age = age)
	class(obj) <- 'indiv' 
	return(obj)
}
me <- indiv('Chris','Paciorek')


class(me) <- "silly"
class(me) <- "indiv"


mod <- lm(yc ~ x)
summary(mod)
gmod <- glm(yb ~ x, family = 'binomial')
summary(gmod)


mean
methods(mean)

# creating a generic
summarize <- function(object, ...) 
	UseMethod("summarize") 

# creating a specific method for a class
summarize.indiv <- function(object) 
	return(with(object, cat("Individual of age ", age, 
	" whose name is ", firstname, " ", surname, ".\n",sep = "")))
summarize(me)


out <- summary(mod)
out
print(out)
getS3method(f="print",class="summary.lm")


class(me) <- c('BerkeleyIndiv', 'indiv')
summarize(me) 

# class-specific operators
methods(`+`)

`+.indiv` <- function(object, incr) {
	object$age <- object$age + incr
	return(object)
}
old.me <- me + 15


`age<-` <- function(x, ...) UseMethod("age<-")
`age<-.indiv` <- function(object, value){ 
	object$age <- value
	return(object)
}
age(old.me) <- 60

### 4.2 S4 Approach

library(methods)
setClass("indiv",
	representation(
		name = "character",
		age = "numeric",
		birthday = "Date" 
	)
)
me <- new("indiv", name = 'Chris', age = 20, 
			birthday = as.Date('91-08-03'))
# next notice the missing age slot
me <- new("indiv", name = 'Chris', birthday = as.Date('91-08-03')) 
# finally, apparently there's not a default object of class Date
me <- new("indiv", name = 'Chris', age = 20)
me
me@age <- 60

# S4 methods are designed to be more structured than S3 with careful checking
setValidity("indiv",
	function(object) {
		if(!(object@age > 0 && object@age < 130)) 
			return("error: age must be between 0 and 130")
		if(length(grep("[0-9]", object@name))) 
			return("error: name contains digits")
		return(TRUE)
	# what other validity check would make sense given the slots?
	}
)
me <- new("indiv", name = "5z%a", age = 20, birthday = as.Date('91-08-03'))
me <- new("indiv", name = "Z%a B''*", age = 20, birthday = as.Date('91-08-03'))
me@age <- 150 # so our validity check is not foolproof


# generic method - needed if generic is not already defined
setGeneric("voter", function(object, ...) { standardGeneric("voter") })
# class-specific method
voter.indiv = function(object){
	if(object@age > 17){
		cat(object@name, "is of voting age.\n")
	} else cat(object@name, "is not of voting age.\n")
}
setMethod(voter, signature = c("indiv"), definition = voter.indiv)
voter(me)


setClass("BerkeleyIndiv",
	representation(
		CalID = "character"
	),
	contains = "indiv"
)
me <- new("BerkeleyIndiv", name = "Chris", age = 20, birthday = as.Date('91-08-03'), CalID = "01349542")
voter(me)
is(me, "indiv")


showClass("data.frame")

### 4.3 Reference classes

tsSimClass <- setRefClass("tsSimClass", 
	fields = list(
		n = "numeric", 
		times = "numeric",
		corMat = "matrix",
		lagMat = "matrix",
		corParam = "numeric",
		U = "matrix",
		currentU = "logical"),
	methods = list(
		initialize = function(times = 1:10, corParam = 1, ...){
			# we seem to need default values for the copy() method to function properly
			require(fields)
			times <<- times # field assignment requires using <<-
			n <<- length(times)
			corParam <<- corParam
			currentU <<- FALSE
			calcMats()
			callSuper(...) # calls initializer of base class (envRefClass)
		},
		calcMats = function(){
			# Python-style doc string
			' calculates correlation matrix and Cholesky factor ' 
			lagMat <- rdist(times) # local variable
			corMat <<- exp(-lagMat / corParam) # field assignment
			U <<- chol(corMat) # field assignment
			cat("Done updating correlation matrix and Cholesky factor\n")
			currentU <<- TRUE
		},
		changeTimes = function(newTimes){
			times <<- newTimes
			calcMats()
		},
		show = function(){ # 'print' method
			cat("Object of class 'tsSimClass' with ", n, " time points.\n", sep = '')
		}
	)
)


tsSimClass$methods(list(
	simulate = function(){
		' simulates random processes from the model ' 
		if(!currentU)
			calcMats()
		return(crossprod(U, rnorm(n)))
	})
)


master <- tsSimClass$new(1:100, 10)
master
tsSimClass$help('calcMats')
devs <- master$simulate()
plot(master$times, devs, type = 'l')
mycopy <- master
myDeepCopy <- master$copy()
master$changeTimes(seq(0,1, length = 100))
mycopy$times[1:5]
myDeepCopy$times[1:5]


master$field('times')[1:5]
# the next line is dangerous in this case, since
# currentU will no longer be accurate
master$field('times', 1:10) 

###################################################
### 5: Creating and working in an environment
###################################################

e <- new.env()
assign('x', 3, envir = e) # same as e$x = 3
e$x
get('x', envir = e, inherits = FALSE) 
# the FALSE avoids looking for x in the enclosing environments
e$y <- 5
objects(e)
rm('x', envir = e)
parent.env(e)


myWalk <- new.env(); myWalk$pos = 0
nextStep <- function(walk) walk$pos = walk$pos + sample(c(-1, 1), size = 1)
nextStep(myWalk)

# if we didn't use an environment, here's how we might do it; note the somewhat awkward reassignment
myWalk = list(pos = 0)
nextStep = function(walk){
  walk$pos = walk$pos + sample(c(-1, 1), size = 1)
  return(walk)
}
myWalk = nextStep(myWalk)


eval(quote(pos <- pos + sample(c(-1, 1), 1)), envir = myWalk)
evalq(pos <- pos + sample(c(-1, 1), 1), envir = myWalk) 

######################################
### 6: Computing on the language
######################################

### 6.1 The R interpreter

plot.xy
`%*%`

### 6.2 Parsing code and understanding language objects

obj <- quote(if (x > 1) "orange" else "apple")
as.list(obj)
class(obj)
weirdObj <- quote(`if`(x > 1, 'orange', 'apple'))
identical(obj, weirdObj)

x <- 3; typeof(quote(x))

myExpr <- expression(x <- 3)
eval(myExpr)
typeof(myExpr)


# here's what I did to gather the information for the Table in the notes
obj1 = function() {}
obj2 = quote(x)
obj3 = expression(x <- 3)
obj4 = quote(f())
obj5 = quote(if(x < 3) y = 5)
obj6 = quote(x <- 3)
class(obj1); typeof(obj1)
class(obj2); typeof(obj2)
class(obj3); typeof(obj3)
class(obj4); typeof(obj4)
class(obj5); typeof(obj5)
class(obj6); typeof(obj6)
is.call(obj6)
is.language(obj2)

# exploring language objects
e0 <- quote(3)
e1 <- expression(x <- 3) 
e1m <- expression({x <- 3; y <- 5}) 
e2 <- quote(x <- 4) 
e3 <- quote(rnorm(3))
class(e0); typeof(e0)
class(e1); typeof(e1)
class(e1[[1]]); typeof(e1[[1]])
class(e1m); typeof(e1m)
class(e2); typeof(e2)
identical(e1[[1]], e2)
class(e3); typeof(e3)
e4 <- quote(-7)
class(e4); typeof(e4) # huh? what does this imply?
as.list(e4)

eval(e1)
eval(e2)
e1mlist <- as.list(e1m)
e2list <- as.list(e2)
eval(as.call(e2list)) # here's how to do it if the language object is actually a list
eval(as.expression(e1mlist))

# delving into the list-like structure of language objects
e1 = expression(x <- 3)  # e1 is one-element list with the element an object of class '<-' whose type is 'language'
typeof(e1) # expression
class(e1)  # expression
e1[[1]]
as.list(e1[[1]])
lapply(e1[[1]], class)


y = rnorm(5)
e3 = quote(mean(y))
as.list(e3)
class(e3[[2]])

# we have recursion
e3 = quote(mean(c(12,13,15)))
as.list(e3)
as.list(e3[[2]])

### 6.3 Manipulating the parse tree

out <- quote(y <- 3)
out[[3]] <- 4
eval(out)

e1 <- quote(2 + 5)
e2 <- quote(plot(x, y))
e2[[1]] <- `+`
eval(e2)
e1[[3]] <- e2
e1
class(e1[[3]]) # note the nesting
eval(e1) # what should I get?

codeText <- deparse(out)
parsedCode <- parse(text = codeText) # works like quote except on the code in the form of a string
eval(parsedCode)
deparse(quote(if (x > 1) "orange" else "apple"))

x3 <- 7
i <- 3
as.name(paste('x', i, sep=''))
eval(as.name(paste('x', i, sep='')))
deparse(quote(x))

x3 <- 7
i <- 3
as.name(paste('x', i, sep=''))
eval(as.name(paste('x', i, sep='')))

f <- function(obj){
objName <- deparse(substitute(obj))
print(objName)
}
f(y)

### 6.4 Parsing replacement expressions

animals = c('cat', 'dog', 'rat','mouse')

`[<-`(animals,4,'cat')
animals
'[<-'(animals,4,'dog')
animals

out1 = quote(animals[4] <- 'rat')  
out2 = quote(`<-`(animals[4], 'rat'))  # same as out1
out3 = quote('[<-'(animals,4,'rat'))   # same as out2 when evaluated (see below),

# but parse tree is different (so parsing is not completely invertible)

as.list(out1)
as.list(out2)
identical(out1, out2)
as.list(out3)
identical(out1, out3)
typeof(out1[[2]])  # language
class(out1[[2]])   # call

eval(out1)
animals
animals[4] = 'dog'
eval(out3) 
animals # both do the same thing

# here's another example of a parsed replacement expression 
x = diag(rep(1, 3))
obj = quote(diag(x) <- 3)
as.list(obj)
class(obj[[2]])


### 6.5 substitute()

identical(quote(z <- x^2), substitute(z <- x^2))

e <- new.env(); e$x <- 3
substitute(z <- x^2, e)

e$z <- 5
substitute(z <- x^2, e)

f <- function(obj){
objName <- deparse(substitute(obj))
print(objName)
}
f(y)

substitute(a + b, list(a = 1, b = quote(x)))

e1 <- quote(x + y)
e2 <- substitute(e1, list(x = 3))

e2 <- substitute(substitute(e, list(x = 3)), list(e = e1))
substitute(substitute(e, list(x = 3)), list(e = e1)) # so e1 is substituted as an evaluated object, which then allows for substitution for 'x' 
e2
eval(e2)

############################################################
### Demo code for Unit 7 of Stat243, "Parallel processing"
### Chris Paciorek, October 2012
############################################################

#################################
# 2: Parallelization
#################################

### 2.2 Threading

X <- matrix(rnorm(8000^2), 8000)
system.time({
X <- crossprod(X) # X^t X produces pos.def. matrix
U <- chol(x)})  # U^t U = X 
# exit R, execute: "export OMP_NUM_THREADS=1", and restart R
system.time({
X <- crossprod(X)
U <- chol(X)})


#################################
# 3: Explicit parallel code in R
#################################

### 3.1 foreach

require(parallel) # one of the core R packages
require(doParallel)
# require(multicore); require(doMC) # alternative to parallel/doParallel
# require(Rmpi); require(doMPI) # when Rmpi is available as the back-end
library(foreach)
library(iterators)

taskFun <- function(){
	mn <- mean(rnorm(10000000))
	return(mn)
}
nCores <- 4  
registerDoParallel(nCores) 
# registerDoMC(nCores) # alternative to registerDoParallel
#
# cl <- startMPIcluster(nCores); registerDoMPI(cl) # when using Rmpi as the back-end
out <- foreach(i = 1:100, .combine = c) %dopar% {
	cat('Starting ', i, 'th job.\n', sep = '')
	outSub <- taskFun()
	cat('Finishing ', i, 'th job.\n', sep = '')
	outSub # this will become part of the out object
}

### 3.2 Parallel apply and vectorization

require(parallel)
nCores <- 4  
### using sockets
#
# ?clusterApply
cl <- makeCluster(nCores) # by default this uses sockets
nSims <- 60
testFun <- function(i){
	mn <- mean(rnorm(1000000))
	return(mn)
}
# if the processes need objects (x and y, here) from the master's workspace:
# clusterExport(cl, c('x', 'y')) 
system.time(
	res <- parSapply(cl, 1:nSims, testFun)
)
system.time(
	res2 <- sapply(1:nSims, testFun)
)
myList <- as.list(1:nSims)
res <- parLapply(cl, myList, testFun)

### using forking
system.time(
	res <- mclapply(seq_len(nSims), testFun, mc.cores = nCores) 
)


require(parallel)
nCores <- 4
cl <- makeCluster(nCores) 
library(fields)
ds <- runif(6000000, .1, 10)
system.time(
	corVals <- pvec(ds, Matern, .1, 2, mc.cores = nCores)
)
system.time(
	corVals <- Matern(ds, .1, 2)
)

### 3.3 Explicit parallel programming in R: mcparallel and forking

# mcparallel()

library(parallel)
n <- 10000000
system.time({
	p <- mcparallel(mean(rnorm(n)))
	q <- mcparallel(mean(rgamma(n, shape = 1)))
	res <- mccollect(list(p,q))
})
system.time({
	p <- mean(rnorm(n))
	q <- mean(rgamma(n, shape = 1))
})


# forking

library(fork)
# mode 1
pid <- fork(slave = myfun)
# mode 2
{ # this set of braces is REQUIRED when you don't pass a function 
  # to the slave argument of fork()
	pid <- fork(slave = NULL) 
	if(pid==0) {
		cat("Starting child process execution.\n") 
		tmpChild <- mean(rnorm(10000000))
		cat("Result is ", tmpChild, "\n", sep = "")
		save(tmpChild, file = 'child.RData') # clunky
		cat("Finishing child process execution.\n")
		exit() 
	} else {
		cat("Starting parent process execution.\n")
		tmpParent <- mean(rnorm(10000000))
		cat("Finishing parent process execution.\n")
		wait(pid)  # wait til child is finished so can read
                   # in updated child.RData below
	} 
} 
load('child.RData') # clunky
print(c(tmpParent, tmpChild))


#########################################################
### Demo code for Unit 8 of Stat243, "Computer numbers"
### Chris Paciorek, October 2012
#########################################################

############################
# 1: Basic representations
############################

a <- as.integer(3423333)
a * a


x <- rnorm(100000)
y <- 1:100000
z <- rep('a', 100000)
object.size(x) # 800040 bytes
object.size(y) # 400040 bytes - so how many bytes per integer in R?
object.size(z) # 800088 bytes - hmm, what's going on here?

############################
# 2: Floating point basics
############################

### 2.1 Representing real numbers

0.3 - 0.2 == 0.1
0.3
0.2
0.1 # Hmmm...
options(digits=22)
a <- 0.3
b <- 0.2
a - b 
0.1
1/3
# so empirically, it looks like we're accurate up to the 16th decimal place


1e-16 + 1
1e-15 + 1
2e-16 + 1
.Machine$double.eps
.Machine$double.eps + 1

.1
.5
.25
.26
1/32
1/33

### 2.2 Overflow and underflow

log10(2^1024) # whoops ... we've actually just barely overflowed
log10(2^1023)

.Machine$double.xmax
.Machine$double.xmin

### 2.3 Integers or floats

x <- 2^45
z <- 25
class(x)
class(z)
as.integer(x)
as.integer(z)
1e308
as.integer(1e308)
1e309


x <- 3; typeof(x)
x <- as.integer(3); typeof(x)
x <- 3L; typeof(x)

### 2.4 Precision

options(digits = 22)
# large vs. small numbers
.1234123412341234 
1234.1234123412341234 # not accurate to 16 places 
123412341234.123412341234 # only accurate to 4 places 
1234123412341234.123412341234 # no places! 
12341234123412341234 # fewer than no places! 

# How precision affects calculations
1234567812345678 - 1234567812345677
12345678123456788888 - 12345678123456788887
.1234567812345678 - .1234567812345677
.12345678123456788888 - .12345678123456788887
.00001234567812345678 - .00001234567812345677 
# not as close as we'd expect, should be 1e-20
.000012345678123456788888 - .000012345678123456788887

123456781234 - .0000123456781234 # the correct answer is 123456781233.99998765....

1000000.1


2^52
2^52+1
2^53
2^53+1
2^53+2
2^54
2^54+2
2^54+4


##################################################
# 3: Implications for calculation and comparisons
##################################################

### 3.1 Computer arithmetic is not mathematical arithmetic!

val1 <- 1/10; val2 <- 0.31; val3 <- 0.57
res1 <- val1*val2*val3
res2 <- val3*val2*val1
identical(res1, res2)
res1
res2

### 3.3 Comparisons

a = 12345678123456781000
b = 12345678123456782000

approxEqual = function(a, b){
  if(abs(a - b) < .Machine$double.eps * abs(a + b))
    print("approximately equal") else print ("not equal")
}

approxEqual(a,b)

a = 1234567812345678
b = 1234567812345677

approxEqual(a,b)   

### 3.4 Calculations

# catastrophic cancellation w/ large numbers
123456781234.56 - 123456781234.00
# how many accurate decimal places?

# catastrophic cancellation w/ small numbers
a = .000000000000123412341234
b = .000000000000123412340000

# so we know the right answer is .000000000000000000001234 EXACTLY

a-b  # 1.233999993151397490851e-21, which is accurate only to 8 places + 21 = 29 places, as expected from a machine epsilon calculation, since the "1" is in the 13th position, plus 16 more
# ideally we would have accuracy to 37 places on a computer since the actual result is of order 10^-21 and 37 = 21 + 16, but we've lost 8 digits of accuracy (the '12341234' cancellation) to catastrophic cancellation

x <- c(-1, 0, 1) + 1e8
n <- length(x)
sum(x^2)-n*mean(x)^2 # that's not good!
sum((x - mean(x))^2)

# adding/subtracting numbers very different in magnitude gives precision of the larger number
123456781234 - 0.000001

# part of the predictive density calculation example:
exp(-1000)

xs <- 1:100
dists <- rdist(xs)
corMat <- exp(- (dists/10)^2) # this is a p.d. matrix (mathematically)
eigen(corMat)$values[80:100]  # but not numerically


################################################################
### Demo code for Unit 9 of Stat243, "Numerical linear algebra"
### Chris Paciorek, October 2012
################################################################

########################
# 1: Preliminaries
########################

### 1.7 Some vector and matrix properties

A + B
A %*% B # matrix multiplication
A * B # Hadamard (direct) product


### 1.9 rank, LIN, basis vectors

# suppose I want to use (1, 1) and (1, -1) as basis vectors in R^2 rather than (1, 0), and (0, 1)

# express (1, 0) in terms of the basis vectors:
x = c(1, 0)
v1 = c(1, 1)
v2 = c(1, -1)
v1 = v1/norm2(v1)
v2 = v2/norm2(v2)

c1 = sum(x*v1)
c2 = sum(x*v2)

c1*v1 + c2*v2  # this should be x


### 1.10 Invertibility, etc.

mat0 = matrix(c(1,2,2,4), 2)  # symmetric, not full rank
mat1 = matrix(c(1,2,4,1), 2)  # not symmetric
mat2 = matrix(c(1,.5,.25,1), 2) # not symmetric
mat3 = matrix(c(1,1,1,1), 2)  # not full rank
mat4 = matrix(c(1,.5,.5,1), 2) # symmetric, full rank

# let's look at the eigendecompositions
eigen(mat0)

eigen(mat4) # numerically positive definite!

### 1.11 Generalized inverses

precMat <- matrix(c(1,-1,0,0,0,-1,2,-1,0,0,0,-1,2,-1,
    0,0,0,-1,2,-1,0,0,0,-1,1), 5)
e <- eigen(precMat)
e$values
e$vectors[ , 5]
# generate a realization
e$values[1:4] <- 1 / e$values[1:4]
y <- e$vec %*% (e$values * rnorm(5))
sum(y)

###########################
# 2: Computational issues
###########################

### 2.3 Ill-conditioned problems

norm2 <- function(x) sqrt(sum(x^2))
A <- matrix(c(10,7,8,7,7,5,6,5,8,6,10,9,7,5,9,10),4)
e <- eigen(A)
b <- c(32, 23, 33, 31)
bPerturb <- c(32.1, 22.9, 33.1, 30.9)
x <- solve(A, b)
xPerturb <- solve(A, bPerturb)
norm2(x - xPerturb)
norm2(b - bPerturb)
norm2(x - xPerturb)/norm2(x)
(e$val[1]/e$val[4])*norm2(b - bPerturb)/norm2(b)


x1 <- 1990:2010
x2 <- x1 - 2000 # centered
x3 <- x2/10 # centered and scaled
X1 <- cbind(rep(1, 21), x1, x1^2)
X2 <- cbind(rep(1, 21), x2, x2^2)
X3 <- cbind(rep(1, 21), x3, x3^2)
e1 <- eigen(crossprod(X1))
e1$values
e2 <- eigen(crossprod(X2))
e2$values
e3 <- eigen(crossprod(X3))
e3$values

##################################################
# 3: Matrix factorizations (decompositions) and
#      solving systems of linear equations
##################################################

### 3.1 Triangular systems

n <- 20
X <- crossprod(matrix(rnorm(n^2), n))
b <- rnorm(n)
U <- chol(crossprod(X)) # U is upper-triangular
L <- t(U) # L is lower-triangular
out1 <- backsolve(U, b) 
out2 <- forwardsolve(L, b) 
all.equal(out1, c(solve(U) %*% b)) 
all.equal(out2, c(solve(L) %*% b)) 

# 3.2 LU

# example of the impact of pivoting

# eqn 1:  .0001 x1 + x2 = 1
# eqn 2:        x1 + x2 = 2

A = matrix(c(.0001, 1, 1 ,1), nr = 2, byrow = TRUE)
b = c(1,2)

x = solve(A, b)
options(digits = 16)
print(x)

c1 = -A[2,1]/A[1,1]
A[2 , ] = c1*A[1,] + A[2, ]
b[2] = c1*b[1] + b[2]

# compute solution if numbers only available with 3 digits of accuracy
x3dig = rep(NA, 2)
x3dig[2] = round(b[2], -3)/round(A[2,2], -3) # round to 3 significant digits
x3dig[1] = (b[1] - A[1,2]*x3dig[2])/A[1,1]

print(x3dig) # whoops

# switch the rows

A = matrix(c(1, 1, .0001, 1), nr = 2, byrow = TRUE)
b = c(2,1)

c1 = -A[2,1]/A[1,1]
A[2 , ] = c1*A[1,] + A[2, ]
b[2] = c1*b[1] + b[2]

# solution based on 3 digits of accuracy with pivoting
x3dig = rep(NA, 2)
x3dig[2] = round(b[2], 3)/round(A[2,2], 3) # numbers are now less than 1 - round to 3 decimal places
x3dig[1] = (b[1] - A[1,2]*x3dig[2])/A[1,1]

print(x3dig) # now a good approximation


### 3.3 Cholesky decomposition

backsolve(U, b, transpose = TRUE) 
forwardsolve(L, b, transpose = TRUE)


backsolve(U, backsolve(U, b, transpose = TRUE))
backsolve(U, forwardsolve(t(U), b)) # equivalent but less efficient

U <- chol(A)
y <- crossprod(U, rnorm(n)) # i.e., t(U)%*%rnorm(n), but much faster

# numerically not positive definite
library(fields)
locs <- runif(100)
rho <- .1
C <- exp(-rdist(locs)^2/rho^2)
e <- eigen(C)
e$values[96:100]
U <- chol(C)
vals <- abs(e$values)
max(vals)/min(vals)
U <- chol(C, pivot = TRUE)

### 3.4 QR decomposition


### 3.4.3 Regression and the QR in R

X.qr = qr(X)
Q = qr.Q(X.qr)
R = qr.R(X.qr) 

### 3.4.4 Computing the QR

nlz = function(vec) vec/norm2(vec)
orth = function(vec, std) vec - sum(std*vec)*std
norm2 = function(x) sqrt(sum(x^2))

# modified Gram-Schmidt
X = matrix(rnorm(5*4), nr=5)
# X[ ,4] = 0.5*X[ ,1] - .7*X[ ,2] + rnorm(5, 0, .00000001)
Q = matrix(NA, 5, 4)
R = matrix(0, 4, 4)

Q[ ,1] = nlz(X[ ,1])

Q[ ,2] = nlz(orth(X[ ,2], Q[ ,1]))
Q[ ,3] = orth(X[ ,3], Q[ ,1])
Q[ ,4] = orth(X[ ,4], Q[ ,1])

Q[ ,3] = nlz(orth(Q[, 3], Q[ ,2]))
Q[ ,4] = orth(Q[ ,4], Q[ ,2])

Q[ ,4] = nlz(orth(Q[, 4], Q[ ,3]))

   
# original Gram-Schmidt
Q = matrix(NA, 5, 4)
Q2[ ,1] = nlz(X[ ,1])

Q2[ ,2] = nlz(orth(X[ ,2], Q2[ ,1]))

Q2[ ,3] = orth(X[ ,3], Q2[ ,1])
Q2[ ,3] = nlz(orth(Q2[ ,3], Q2[ ,2]))

Q2[ ,4] = orth(X[ ,4], Q2[ ,1])
Q2[ ,4] = orth(Q2[ ,4], Q2[ ,2])
Q2[ ,4] = nlz(orth(Q2[, 4], Q2[ ,3]))

### 3.5 Determinants

myqr = qr(A)
magn = sum(log(abs(diag(myqr$qr)))) 
sign = prod(sign(diag(myqr$qr)))


### 4.1 Eigendecomposition

# computing the top few eigenvalue/vector pairs

A = matrix(c(3, 1.3, .7, 1.3, 2, .5, .7, .5, 1), 3)

e = eigen(A)

z1 = c(1, 0, 0)
for(i in 1:20){
  z1 = A %*% z1
  z1 = z1/norm2(z1)
  print(z1)
}

val1 = (A %*% z1 / z1)[1, 1]

# following Gentle-NLA, p. 125, we can get the next largest eigenvalue, which is the largest eigenvalue of the matrix B
B = A - val1 * outer(c(z1), c(z1))

z2 = c(1, 0, 0)
for(i in 1:200){
  z2 = B %*% z2
  z2 = z2/norm2(z2)
  print(z2)
}

val2 = B%*%z2/z2 + val1
################################################################
### Demo code for Unit 10 of Stat243, "Simulation"
### Chris Paciorek, November 2012
################################################################

#########################################
# 2: Random number generation
#########################################

### 2.1 Generating random uniforms on a computer

n <- 100
a <- 171
m <- 30269
u <- rep(NA, n) 
u[1] <- 7306 
for(i in 2:n)
  u[i] <- (a * u[i-1]) %% m 
u <- u/m
uFromR <- runif(n) 
par(mfrow = c(2,2)) 
plot(1:n, u, type = 'l') 
plot(1:n, uFromR, type = 'l') 
hist(u, nclass = 25) 
hist(uFromR, nclass = 25)


RNGkind("Wichmann-Hill")
set.seed(0)
saveSeed <- .Random.seed
uFromR <- runif(10)
a <- c(171, 172, 170)
m <- c(30269, 30307, 30323)
xyz <- matrix(NA, nr = 10, nc = 3)
xyz[1, ] <- (a * saveSeed[2:4]) %% m
for( i in 2:10)
	xyz[i, ] <- (a * xyz[i-1, ]) %% m
for(i in 1:10)
	print(c(uFromR[i],sum(xyz[i, ]/m)%%1))
# we should be able to recover the current value of the seed
xyz[10, ]
.Random.seed[2:4]

### 2.3 RNG in R 

set.seed0)
rnorm(10)
set.seed(0)
rnorm(10)


set.seed(0)
rnorm(5)
savedSeed <- .Random.seed
tmp <- sample(1:50, 2000, replace = TRUE)
.Random.seed <- savedSeed
rnorm(5)


f <- function(args) {
  oldseed <- .Random.seed
  # other code 
  .Random.seed <<- oldseed # note global assignment!
}

### 2.4 RNG in parallel

require(parallel)
require(rlecuyer)
nSims <- 250
testFun <- function(i){
	val <- runif(1)
	return(val)
}

nCores <- 4
RNGkind()
cl <- makeCluster(nCores)
iseed <- 0
clusterSetRNGStream(cl = cl, iseed = iseed)
RNGkind() 
# hmmm... clusterSetRNGStream() should set as "L'Ecuyer-CMRG"
res <- parSapply(cl, 1:nSims, testFun)
clusterSetRNGStream(cl = cl, iseed = iseed)
res2 <- parSapply(cl, 1:nSims, testFun)
identical(res,res2)
stopCluster(cl)


RNGkind("L'Ecuyer-CMRG") 
seed <- 0
set.seed(seed) ## now start M workers 
s <- .Random.seed 
for (i in 1:M) { 
	s <- nextRNGStream(s) 
	# now send s to worker i as .Random.seed 
} 


require(parallel)
require(rlecuyer)
RNGkind("L'Ecuyer-CMRG")
res <- mclapply(seq_len(nSims), testFun, 
	mc.cores = nSlots, mc.set.seed = TRUE) 
# this also seems to auotmatically reset the seed when it is run
res2 <- mclapply(seq_len(nSims), testFun, 
	mc.cores = nSlots, mc.set.seed = TRUE) 
identical(res,res2)


###################################
# 3: Generating random variables
###################################

### 3.1 Multivariate distributions

U <- chol(covMat)
crossprod(U, nrow(covMat))

### 3.3 Rejection sampling

# generating efficiently from a truncated normal

# suppose we want to generate from X ~ N(1, 2^2) I(x > 4)

mu <- 1; sd <- 2
tau <- 4 # truncation point
samps <- rnorm(10000, mu, sd)
samps <- samps[samps > tau]
length(samps)/10000  # around 640 of 10000
hist(samps)


tauStd <- (tau - mu)/sd  # put trunc. pt on standardized scale
lamStar <- (tauStd + sqrt(tauStd^2+4))/2 # rate of exponential
c <- (1/lamStar)*exp(lamStar^2/2 - lamStar*tauStd)/(sqrt(2*pi)*(1-pnorm(tauStd)))

y <- rexp(2000, rate = lamStar) + tauStd  # + tauStd does the shifting of the exponential
u <- runif(2000)
samps2 <- mu + sd * y[u <= exp(-(y-lamStar)^2/2)]  # mu + sd part reverses the standardization
length(samps2)/2000 # empirical acceptance rate
1/c  # theoretical acceptance rate

# plots here are done on the standardized scale for simplicity
par(mfrow = c(1,3))
plot(y, u)
points(y[u <= exp(-(y-lamStar)^2/2)], u[u <= exp(-(y-lamStar)^2/2)], col = 'red')  # we accept red points and reject black

# this shows f(x) and c*g(x) on the standardized scale
yvals <- seq(tauStd, 5, len = 100)
plot(yvals, c*dexp(yvals - tauStd, rate = lamStar), type = 'l')
lines(yvals, dnorm(yvals)/(1-pnorm(tauStd)), col = 'red')
# exp is a nice envelope

yvals <- seq(5, 10, len = 100)
plot(yvals, c*dexp(yvals - tauStd, rate = lamStar), type = 'l')
lines(yvals, dnorm(yvals)/(1-pnorm(tauStd)), col = 'red')
# in tail, exp is much fatter than normal, but few proposals are in tail anyway


### 3.5 Importance sampling


# suppose we want to estimate P(X^2 < -3) for a Cauchy
# instead of drawing from a standard Cauchy, let's draw from a Cauchy shifted to be centered at -5

# write out the code below for a single estimate

m <- 1000 # number of samples for each estimator
# standard MC estimator
set.seed(0)
y <- rt(m, df = 1)
# samples for importance sampler
set.seed(0)
x <- rt(m, df = 1) - 5  # i.e sample from g(x) being a Cauchy centered at -5
f <- dt(x, df = 1)  # density of x under f
g <- dt(x + 5, df = 1)  # density of x under g (density of x under a Cauchy centered at -5 is the same as density of x+5 under Cauchy centered at 0)
w <- f/g  # weights

stdEst <- mean(y < (-3))
isEst <- mean((x < (-3))*w)

# now let's do a small simulation study of the estimator

nSims <- 100

m <- 1000 # number of samples for each estimator

isEst <- stdEst <- varIS <- varStd <- rep(NA, nSims)
set.seed(0)
for(i in 1:nSims){  # a small simulation study of the approach with m = 1000
  # note this could be done w/out looping if I were trying to be efficient

  # samples for MC estimator

  y <- rt(m, df = 1)

  # samples for importance sampler
  set.seed(0)
  x <- rt(m, df = 1) - 5  # i.e sample from g(x) being a Cauchy centered at -5
  f <- dt(x, df = 1)  # density of x under f
  g <- dt(x + 5, df = 1)  # density of x under g (density of x under a Cauchy centered at -5 is the same as density of x+5 under Cauchy centered at 0)
  w <- f/g  # weights
  
  isEst[i] <- mean((x < (-3))*w)
  stdEst[i] <- mean(y < (-3))
}

mean((isEst - pt(-3, df = 1))^2)  # variance of IS estimator based on entire sim study
mean((stdEst - pt(-3, df = 1))^2)  # variance of standard MC estimator based on entire sim study
# order of magnitude gain in efficiency


############################################
# 4: Design of simulation studies
############################################

# 4.2 Overview

devs <- rnorm(100)
tdevs <- qt(pnorm(devs), df = 1)
plot(devs, tdevs)
abline(0,1)

##############################################
# 5: Implementation of simulation studies
##############################################

thetaLevels <- c("low", "med", "hi")
n <- c(10, 100, 1000)
tVsNorm <- c("t", "norm")
levels <- expand.grid(thetaLevels, tVsNorm, n)
# example of replicate() -- generate m sets correlated normals
set.seed(0)
genFun <- function(n, theta = 1){
	u <- rnorm(n)
	x <- runif(n)
	Cov <- exp(-rdist(x)/theta)
	U <- chol(Cov)
	return(cbind(x,crossprod(U, u)))
}
m <- 20
simData <- replicate(m, genFun(100, 1))
dim(simData) # 100 observations by {x, y} values by 20 replicates


##########################################################################
### Demo code for Unit 11 of Stat243, "Integration and Differentiation"
### Chris Paciorek, November 2012
##########################################################################

#########################################
# 1: Differentiation
#########################################

### 1.1 Numerical differentiation

# compute first and second derivatives of log(gamma(x)) at x=1/2
options(digits = 9, width = 120)
h <- 10^(-(1:15)) 
x <- 1/2
fx <- lgamma(x)
# targets: actual derivatives can be computed very accurately using built-in R functions:
digamma(x) # accurate first derivative
trigamma(x) # accurate second derivative
# calculate discrete differences
fxph <- lgamma(x+h)
fxmh <- lgamma(x-h)
fxp2h <- lgamma(x+2*h)
fxm2h <- lgamma(x-2*h)
# now find numerical derivatives
fp1 <- (fxph - fx)/h # forward difference
fp2 <- (fxph - fxmh)/(2*h) # central difference
# second derivatives 
fpp1 <- (fxp2h - 2*fxph + fx)/(h*h) # forward difference
fpp2 <- (fxph - 2*fx + fxmh)/(h*h) # central difference
# table of results
cbind(h,fp1,fp2,fpp1,fpp2)

### 1.2 Numerical differentiation in R

x <- 1/2 
numericDeriv(quote(lgamma(x)), "x") 

### 1.3 Symbolic differentiation

deriv(quote(atan(x)), "x")  # derivative of simple expression
# derivative of a function; note we need to pass in an expression, 
# not the entire function
f <- function(x,y) sin(x * y)+x^3+exp(y)
newBody <- deriv(body(f), c("x", "y"), hessian = TRUE)
# now create a new version of f that provides gradient 
# and hessian as attributes of the output, 
# in addition to the function value as the return value
f <- function(x, y) {} # function template
body(f) <- newBody
# try out the new function
f(3,1)
attr(f(3,1), "gradient")
attr(f(3,1), "hessian")

### symbolic differentiation in Mathematica

### first partial derivative wrt x
# D[ Exp[x^n] - Cos[x y], x]
### second partial derivative
# D[ Exp[x^n] - Cos[x y], {x, 2}]
### partials
# D[ Exp[x^n] - Cos[x y], x, y]
### trig function example
# D[ ArcTan[x], x] 


#########################################
# 2: Integration
#########################################


f <- function(x) sin(x)
# mathematically, the integral from 0 to pi is 2
# quadrature through integrate()
integrate(f, 0, pi)
system.time(integrate(f, 0, pi))
# MC estimate
ninteg <- function(n) mean(sin(runif(n, 0, pi))*pi)
n <- 1000
ninteg(n)
system.time(ninteg(n))
n <- 10000
ninteg(n)
system.time(ninteg(n))
n <- 1000000
ninteg(n)
system.time(ninteg(n))
# that was fairly slow, 
# especially if you need to do a lot of individual integrals

### 2.2 Numerical integration in R

integrate(dnorm, -Inf, Inf, 0, .1)
integrate(dnorm, -Inf, Inf, 0, .001)
integrate(dnorm, -Inf, Inf, 0, .0001) # THIS FAILS!

### 2.3 Singularities and infinite ranges

# doing it directly with integrate()
f <- function(x)   
	exp(x)/sqrt(x)
integrate(f, 0, 1) 
# subtracting off the singularity
f <- function(x) (exp(x) - 1)/sqrt(x)
x <- seq(0,1, len = 200) 
integrate(f, 0, 1) 
# analytic change of variables, followed by numeric integration 
f <- function(u)   
	2*exp(u^2)
integrate(f, 0, 1) 

### 2.4 Symbolic integration

### in Mathematica
### one-dimensional integration
# Integrate[Sin[x]^2, x]
### two-dimensional integration
# Integrate[Sin[x] Exp[-y^2], x, y]


################################################################
### Demo code for Unit 12 of Stat243, "Optimization"
### Chris Paciorek, November 2012
################################################################

#######################################
# 3: Univariate function optimization
#######################################

### 3.3.3 How can Newton's method go wrong? 

par(mfrow = c(1,2))
fp <- function(x, theta = 1){
	exp(x*theta)/(1+exp(x*theta)) - .5
}
fpp <- function(x, theta = 1){
	exp(x*theta)/((1+exp(x*theta))^2)
}
xs <- seq(-15, 15, len = 300)
plot(xs, fp(xs), type = 'l')
lines(xs, fpp(xs), lty = 2)

# good starting point
x0 <- 2
xvals <- c(x0,rep(NA,9))
for(t in 2:10){
	xvals[t]=xvals[t-1] - fp(xvals[t-1]) / fpp(xvals[t-1])
}
print(xvals)
points(xvals, fp(xvals), pch = as.character(1:length(xvals)))

# bad starting point
x0 <- 2.5
xvals <- c(x0,rep(NA,9))
for(t in 2:10){
	xvals[t]=xvals[t-1] - fp(xvals[t-1]) / fpp(xvals[t-1])
}
print(xvals)
points(xvals, fp(xvals), pch = as.character(1:length(xvals)), col = 'red')
# whoops

# mistakenly climbing uphill
f <- function(x) cos(x)
fp <- function(x) -sin(x)
fpp <- function(x) -cos(x)
xs <- seq(0, 2*pi, len = 300)
plot(xs, f(xs), type = 'l', lwd = 2)
lines(xs, fp(xs))
lines(xs, fpp(xs), lty = 2)
x0 <- 0.2 # starting point
fp(x0) # negative
fpp(x0) # negative
x1 <- x0 - fp(x0)/fpp(x0) # whoops, we've gone uphill 
# because of the negative second derivative
xvals <- c(x0,rep(NA,9))
for(t in 2:10){
	xvals[t]=xvals[t-1]-fp(xvals[t-1])/fpp(xvals[t-1])
}
xvals
points(xvals, fp(xvals), pch = as.character(1:length(xvals)), col = 'red')
# and we've found a maximum rather than a minimum...


#######################################
# 4: Convergence ideas
#######################################

### 4.3 Convergence rates

options(digits = 10)
f <- function(x) cos(x)
fp <- function(x) -sin(x)
fpp <- function(x) -cos(x)
xstar <- pi # known minimum

# N-R
x0 <- 2
xvals <- c(x0,rep(NA,9))
for(t in 2:10){
	xvals[t] <- xvals[t-1] - fp(xvals[t-1]) / fpp(xvals[t-1])
}
print(xvals)

# bisection
bisecStep <- function(interval, fp){
	xt <- mean(interval)
	if(fp(interval[1]) * fp(xt) <= 0) interval[2] <- xt else interval[1] <- xt
	return(interval)
}
nIt <- 30
a0 <- 2; b0 <- (3*pi/2) - (xstar - a0) 
# have b0 be as far from min as a0 for fair comparison with N-R
interval <- matrix(NA, nr = nIt, nc = 2)
interval[1, ] <- c(a0, b0)
for(t in 2:nIt){
	interval[t, ] <- bisecStep(interval[t-1, ], fp)
}
rowMeans(interval)

########################################
### 5: Multivariate optimization
########################################

### 5.2 Newton-Raphson (Newton's method)

library(MASS)
head(wtloss)
attach(wtloss)

plot(Days, Weight, ylab = "Weight (kg)")

## Linear fit - not a good model
wtloss.lm <- lm(Weight ~ Days, data = wtloss)
coef(wtloss.lm) # estimates of the intercept and slope
lines(Days, fitted(wtloss.lm), col = "grey")

# we need some starting values
# guess that beta2 approx 100 since the midpoint of Days is near 100

beta2.init = 100
plot(2^(-Days/beta2.init), Weight)
tmpMod = lm(Weight ~ I(2^(-Days/beta2.init)))
beta0.init = tmpMod$coef[1]
beta1.init = tmpMod$coef[2]

plot(Days, Weight, ylab = "Weight (kg)")
lines(Days, beta0.init + beta1.init * 2^(-Days/beta2.init), col = 'red')
# not bad; let's go with that

expr = quote((Weight - (beta0 + beta1 * 2^(-Days/beta2)))^2) # objective is sum of this quantity over the observations
# let R do the differentiation of the basic expression for us (since human error in taking derivatives is very common)
# R won't deal with the summation, so we'll have to do that ourselves
deriv(expr, c("beta0", "beta1", "beta2"), function.arg = TRUE)
deriv3(expr, c("beta0", "beta1", "beta2"), function.arg = TRUE) # same as deriv()  with Hessian = TRUE

f =function(betas){
  sum((Weight - (betas[1] + betas[2] * 2^(-Days/betas[3])))^2)
}

# use the basic code based on deriv() and deriv3() 
fp = function(betas){
  # a bit sloppy as I use Days and Weight as global vars here
  beta0 = betas[1]
  beta1 = betas[2]
  beta2 = betas[3]
  .expr3 <- 2^(-Days/beta2)
  .expr6 <- Weight - (beta0 + beta1 * .expr3)
  grad = - c(sum(2*.expr6), sum(2*(.expr3*.expr6)),
             sum(2* (beta1 * (.expr3 * (log(2) * (Days/beta2^2))) * .expr6)))
  return(grad)
}

fpp = function(betas){
  # a bit sloppy as I use Days and Weight as global vars here
  n = length(Days)
  beta0 = betas[1]
  beta1 = betas[2]
  beta2 = betas[3]
  .expr3 <- 2^(-Days/beta2)
  .expr6 <- Weight - (beta0 + beta1 * .expr3)
  .expr11 <- log(2)
  .expr12 <- beta2^2
  .expr14 <- .expr11 * (Days/.expr12)
  .expr15 <- .expr3 * .expr14
  .expr16 <- beta1 * .expr15
  
  hessian = matrix(0, 3, 3)
  hessian[1, 1] <- 2*n # don't forget to do summation (i.e., multiply by n)
  hessian[1, 2] <- sum(2 * .expr3)
  hessian[1, 3] <- sum( 2 *  .expr16)
  hessian[2, 2] <- sum( 2 * (.expr3 * .expr3))
  
  hessian[2, 3] <-  - sum(2 * 
                          (.expr15 * .expr6 - .expr3 * .expr16))
  
  hessian[3, 3] <- - sum(2 * (beta1 * (.expr15 * 
                                       .expr14 - .expr3 * (.expr11 * (Days * (2 * beta2)/.expr12^2))) * 
                              .expr6 - .expr16 * .expr16))
  hessian[lower.tri(hessian)] = t(hessian[upper.tri(hessian)])
  return(hessian) 
}


nIt = 20
xvals = matrix(NA, nr = nIt, nc = 3)
xvals[1, ] = c(beta0.init, beta1.init, beta2.init)


for(t in 2:nIt){
  xvals[t, ] = xvals[t-1, ] - solve(fpp(xvals[t-1, ]), fp(xvals[t-1, ]))
  if(FALSE){ # in case not numerically p.d., use pseudoinverse
    e = eigen(fpp(xvals[t-1, ])) 
    e$val = 1/e$val
    e$val[e$val < 0] = 0 # pseudo-inverse
    xvals[t, ] = xvals[t-1, ] - e$vec%*%((t(e$vec)*e$val)%*%fp(xvals[t-1, ]))
  }
}

# let's check against R's built-in nonlinear least squares function
beta.start = xvals[1, ]
names(beta.start) = c("beta0", "beta1", "beta2")
wtloss.fm <- nls(Weight ~ beta0 + beta1*2^(-Days/beta2), 
                 data = wtloss, start = beta.start, trace = TRUE)
nlsEst = coef(wtloss.fm)

f(xvals[20, ])
f(nlsEst)

plot(Days, Weight, ylab = "Weight (kg)")
lines(Days, fitted(wtloss.fm), col = "blue")

### 5.3 Fisher scoring

# data are only involved in .expr6 and all Hessian terms involving .expr6 are linear in it, so replace with its expectation, which is simply 0

fppFS = function(betas){
  # a bit sloppy as I use Days and Weight as global vars here
  n = length(Days)
  beta0 = betas[1]
  beta1 = betas[2]
  beta2 = betas[3]
    .expr3 <- 2^(-Days/beta2)
    .expr6 <- 0 # Weight - (beta0 + beta1 * .expr3)
    .expr11 <- log(2)
    .expr12 <- beta2^2
    .expr14 <- .expr11 * (Days/.expr12)
    .expr15 <- .expr3 * .expr14
    .expr16 <- beta1 * .expr15
 
  hessian = matrix(0, 3, 3)
  hessian[1, 1] <- 2*n # don't forget to do summation (i.e., multiply by n)
    hessian[1, 2] <- sum(2 * .expr3)
    hessian[1, 3] <- sum( 2 *  .expr16)
    hessian[2, 2] <- sum( 2 * (.expr3 * .expr3))
  
    hessian[2, 3] <-  - sum(2 * 
        (.expr15 * .expr6 - .expr3 * .expr16))

    hessian[3, 3] <- - sum(2 * (beta1 * (.expr15 * 
        .expr14 - .expr3 * (.expr11 * (Days * (2 * beta2)/.expr12^2))) * 
        .expr6 - .expr16 * .expr16))
  hessian[lower.tri(hessian)] = t(hessian[upper.tri(hessian)])
  return(hessian) 
}


nIt = 20
xvals = matrix(NA, nr = nIt, nc = 3)
xvals[1, ] = c(beta0.init, beta1.init, beta2.init)

fpp(xvals[1, ])
fppFS(xvals[1, ])
# pretty similar - some terms in the observed FI didn't involve the data, so don't change anyway

for(t in 2:nIt){
  xvals[t, ] = xvals[t-1, ] - solve(fppFS(xvals[t-1, ]), fp(xvals[t-1, ]))
}

# FS seems to give slightly faster convergence here


### 5.5.1 Descent methods and Newton-like methods

# steepest descent
f <- function(x){
	x[1]^2/1000 + 4*x[1]*x[2]/1000 + 5*x[2]^2/1000
}
fp <- function(x){
	c(2 * x[1]/1000 + 4 * x[2]/1000,
	4 * x[1]/1000 + 10 * x[2]/1000)
}
lineSearch <- function(alpha, xCurrent, direction, FUN){
	newx <- xCurrent + alpha * direction
	FUN(newx)
}
nIt <- 50
xvals <- matrix(NA, nr = nIt, nc = 2)
xvals[1, ] <- c(7, -4)
for(t in 2:50){
	newalpha <- optimize(lineSearch, interval = c(-5000, 5000),
		xCurrent = xvals[t-1, ], direction = fp(xvals[t-1, ]),
		FUN = f)$minimum 
	xvals[t, ] <- xvals[t-1, ] + newalpha * fp(xvals[t-1, ])
}
x1s <- seq(-5, 8, len = 100); x2s = seq(-5, 2, len = 100)
fx <- apply(expand.grid(x1s, x2s), 1, f)
# plot f(x) surface on log scale
image.plot(x1s, x2s, matrix(log(fx), 100, 100), 
	xlim = c(-5, 8), ylim = c(-5,2)) 
lines(xvals) # overlay optimization path
# kind of slow

### 5.6 Gauss-Seidel

f <- function(x){
	return(x[1]^2/1000 + 4*x[1]*x[2]/1000 + 5*x[2]^2/1000)
}
f1 <- function(x1, x2){ # f(x) as a function of x1
	return(x1^2/1000 + 4*x1*x2/1000 + 5*x2^2/1000)
}
f2 <- function(x2, x1){ # f(x) as a function of x2
	return(x1^2/1000 + 4*x1*x2/1000 + 5*x2^2/1000)
}
x1s <- seq(-5, 8, len = 100); x2s = seq(-5, 2, len = 100)
fx <- apply(expand.grid(x1s, x2s), 1, f)
image.plot(x1s, x2s, matrix(log(fx), 100, 100))
nIt <- 49
xvals <- matrix(NA, nr = nIt, nc = 2)
xvals[1, ] <- c(7, -4)
# 5, -10
for(t in seq(2, nIt, by = 2)){
	newx1 <- optimize(f1, x2 = xvals[t-1, 2], interval = c(-40, 40))$minimum
	xvals[t, ] <- c(newx1, xvals[t-1, 2])
	newx2 <- optimize(f2, x1 = newx1, interval = c(-40, 40))$minimum
	xvals[t+1, ] <- c(newx1, newx2)
}
lines(xvals)

### 5.7 Nelder-Mead

# set up tuning factors
alpha = 1
gamma = 2
beta = .5
delta = .5

# auxiliary function to plot line segments
plotseg = function(ind1, ind2, col = 1){
  if(length(ind1) == 1){
    segments(xs[ind1, 1], xs[ind1 , 2], xs[ind2, 1], xs[ind2, 2], col = col)
  } else{
    segments(ind1[1], ind1[2], xs[ind2, 1], xs[ind2, 2], col = col)    
  }
}
  
# initial polytope
xs = matrix(c(-2,3,-6,4,-4,2),nc=2,byrow=T)

plot(xs,xlim=c(-10,5),ylim=c(-2,8))
plotseg(2, 3)
plotseg(1, 3)
plotseg(1, 2)

xbar = (xs[1,]+xs[2,])/2

points(xbar[1],xbar[2], col = 'red')

### reflection

xr = (1+alpha)*xbar - alpha*xs[3,]
points(xr[1],xr[2],col = 'red')

plotseg(xr, 1, col = 'red')
plotseg(xr, 2, col = 'red')
plotseg(1, 2, col = 'red')
# red triangle is now our proposed new polytope

### consider expansion if xr is better than all the other points

xe = gamma*xr + (1-gamma)*xbar
points(xe[1], xe[2], col = 'green')
points(xr[1], xr[2], col = 'green')
# if xe is better than xr, use xe, o.w. use xr

plotseg(xe, 1, col = 'green')
plotseg(xe, 2, col = 'green')
plotseg(1, 2, col = 'green')

### consider contraction if xr is worse than all the other points

points(xs[3,1], xs[3, 2], col = 'blue')
points(xr[1], xr[2], col = 'blue')

# set xh to be the best of these two points
xh = xs[3, ] # suppose the original point is better than the reflection
xc = beta*xh + (1-beta)*xbar
points(xc[1], xc[2], col = 'blue')
# if xc is better than xh, then contract

plotseg(xc, 1, col = 'blue')
plotseg(xc, 2, col = 'blue')
plotseg(1, 2, col = 'blue')

# if not, shrink simplex toward the best point (xs[1, ])

### shrinkage

xs[2, ] = delta*xs[2, ] + (1-delta)*xs[1, ]
xs[3, ] = delta*xs[3, ] + (1-delta)*xs[1, ]

points(xs[2, 1], xs[2, 2], col = 'purple')
points(xs[3, 1], xs[3, 2], col = 'purple')

plotseg(1, 2, 'purple')
plotseg(1, 3, 'purple')
plotseg(2, 3, 'purple')

### 5.8 Simulated annealing

x= seq(0,10,len=100)
f=function(x)  sin(x)

plot(x, f(x), type='l', ylim = c(-1, 3))
tau = 10
# plot the modified function
lines(x, exp(-f(x)/tau))   # try tau = 3, 1, .3, etc.

##############################
# 6: Optimization in R
##############################

### 6.1 Core optimization functions

# using optim()

yHundredths = scan('precipData.txt')  # precip in hundredths of inches
yHundredths = yHundredths[!is.na(yHundredths)]
y = yHundredths/100  # precip now in inches

par(mfrow = c(1, 2))
hist(y)
thresh = 3
hist(y[y > thresh])

npy=31+28+31 # number of days in winter season
cutoff = (1/25.4) # wet days defined as those with > 1 mm (1/25.4 inches) of precip
thresh = as.numeric(quantile(y[y > cutoff],.98,na.rm=T)) # 98%ile of wet days


pp.lik <- function(par, y, thresh, npy) {
  mu <- par[1]
  sc <- par[2]
  xi <- par[3]
  uInd = y > thresh
  if(sc <= 0) 
    return(10^6);
  if ((1 + ((xi * (thresh - mu))/sc)) < 0) {
    l <- 10^6; 
  }
  else {
    y <- (y - mu)/sc
    y <- 1 + xi * y
    if (min(y^uInd) <= 0){   
      l <- 10^6;
    } else{
      ytmp = y
      ytmp[!uInd]=1  # 'zeroes' out those below the threshold after applying the log in next line
      l <- sum(uInd * log(sc)) + sum(uInd * log(ytmp) * 
                                     (1/xi + 1)) + length(y)/npy * mean((1 + (xi * (thresh - mu))/sc)^(-1/xi))
    }
  }
  l
}

# optim() usage
# out <- optim(init, pp.lik, hessian = TRUE, method = "BFGS", control = list(maxit = maxit, trace = TRUE))

yExc = y[y > thresh]
in2 <- sqrt(6 * var(yExc))/pi  # have initial values depend only on those above the threshold
in1 <- mean(yExc) - 0.57722 * in2
init0 = c(in1, in2, 0.1)

# fit with Nelder-Mead (default) and BFGS
optim(init0, pp.lik, y = y, thresh = thresh, npy = npy, control = list(trace = TRUE)) # 118 fxn evals
optim(init0, pp.lik, y = y, thresh = thresh, npy = npy, method = 'BFGS', control = list(trace = TRUE)) # ~ 10 its

system.time(optim(init0, pp.lik, y = y, thresh = thresh, npy = npy))
system.time(optim(init0, pp.lik, y = y, thresh = thresh, npy = npy, method = 'BFGS'))

# different starting value 
init1 = c(mean(y[y > thresh]), sd(y[y > thresh]), -0.1)
optim(init1, pp.lik, y = y, thresh = thresh, npy = npy, control = list(trace = TRUE)) 
optim(init1, pp.lik, y = y, thresh = thresh, npy = npy, method = 'BFGS', control = list(trace = TRUE)) 

# bad starting value for BFGS
init2 = c(thresh, .01, .1)
out = optim(init2, pp.lik, y = y, thresh = thresh, npy = npy, control = list(trace = TRUE), hessian = TRUE)
solve(out$hessian)
out2 = optim(init2, pp.lik, y = y, thresh = thresh, npy = npy, method = 'BFGS', control = list(trace = TRUE), hessian = TRUE)
solve(out2$hessian)


# suppose the data were on a different scale
in2 <- sqrt(6 * var(yExc*1000))/pi  # have initial values depend only on those above the threshold
in1 <- mean(yExc*1000) - 0.57722 * in2
init3 = c(in1, in2, 0.1)

optim(init3, pp.lik, y = y*1000, thresh = thresh*1000, npy = npy, control = list(trace = TRUE)) # note that the parameters have changed even beyond a scaling effect
optim(init3, pp.lik, y = y*1000, thresh = thresh*1000, npy = npy, method = 'BFGS', control = list(trace = TRUE)) # note lack of convergence after 100 its
optim(init3, pp.lik, y = y*1000, thresh = thresh*1000, npy = npy, method = 'BFGS', control = list(trace = TRUE, maxit = 1000)) # convergence, but to values not concordant with those from fitting the original y data

# when we have y*1000, the location and scale parameters are on very different scales than the shape parameter
# can we use parscale to deal with the problems when the data are on a different scale?
optim(init3, pp.lik, y = y*1000, thresh = thresh*1000, npy = npy, control = list(trace = TRUE, parscale = c(1000,1000,1))) 
optim(init3, pp.lik, y = y*1000, thresh = thresh*1000, npy = npy, method = 'BFGS', control = list(trace = TRUE, parscale = c(1000,1000,1))) 
# yes, that works! the parameter estimates are now equivalent to those from the standard fitting of the original data

# default step size for numerical derivative is only .001; perhaps we should try with higher accuracy
optim(init0, pp.lik, y = y, thresh = thresh, npy = npy, method = 'BFGS', control = list(trace = TRUE, ndeps = rep(1e-6, 3)))
# note that we needed only 33 function evaluations instead of the original 43, presumably because of higher accuracy in the derivative

#### let's do some plotting of the objective fxn as a sanity check

# 3-d grid of location, scale, shape
locVals = seq(0, 5, len = 30)
scaleVals = seq(.1, 3, len = 30)
shapeVals = seq(-.3, .25, by = .05)
parGrid = expand.grid(loc = locVals, scale = scaleVals, shape = shapeVals)

obj=apply(parGrid, 1, pp.lik, y=y, thresh=thresh, npy=npy) # compute objective function for all parameter combos

tmp = cbind(parGrid,obj)

par(mfrow=c(3, 4), mai = c(.6,.5, .3, .1), mgp = c(1.8, .7, 0))
for( i in 1:length(shapeVals)){
  tmp2 = tmp[tmp$shape == shapeVals[i],] # slice of objective fxn for fixed shape parameter
  image.plot(locVals, scaleVals, matrix((tmp2$obj), length(locVals), length(scaleVals)), col = tim.colors(32), zlim = c(40,80), main = as.character(shapeVals[i]))
}
# note there a couple weird things about this log likelihood - (1) weird things happen when the shape parameter is very close to 0 and (2) for certain combinations the likelihood is not defined (it's set to 1e6) - this is why some of the colors go from aqua to white without showing red values - reparameterizing or optimizing with explicit constraints may be better approaches

# apart from that, it appears that optim() has probably found the minimum