# Our goal is to generate n observations from a truncated normal distribution that # has been truncated at +/- l. # To do this we will write a function. # First we identify the inputs and output of our function # Inputs: n - the number of observtions desired # l - the level of trunctation # Outputs: a numeric vector of n observations that follows # the truncated normal distribution. # Lets begin by writing a simple function that generates # n random normals truncZ0 = function( n = 1, l = 2) { rnorm(n) } # Notice how we create a function - it is simiilar to how # we create vectors, data frames, etc. by assignment. # The code in the body of the function appears in curly braces. # There is only one command here, which returns n n(0,1) observations. # since it is the last command in the body of the function, its return value # is the return value of the function. # The function below eliminates the values that fall outside (-l, l) range truncZ1 = function( n = 1, l = 2) { z = rnorm(n) z[ (z < l) & (z > -l)] } # Now the return value will only be those values that are in the appropriate range. # Another way to return a value is with the return(z[ abs(z) < l ]) function. # We can also use the invisible(z[ abs(z) < l ]) function which does not print anything # to standard out (i.e. the screen) if the funcation call is not assigned to a variable. # That is, x = truncZ1(n=100) assigns the return value of the function call to the object x # but truncZ1(n=100) either prints the return value on the screen or is "invisible" if # the invisible() function is used to return the functions value. # Next we generate more observations than n, because typically we will need more # than n because we will be throwing out observations. # We use the expected number of observations to be eliminated plus 3 * the standard error # so that most of the time we will generate more than needed. truncZ2 = function( n = 1, l = 2) { p = 2*( 1- pnorm(l)) m = n + ceiling( n*p + 3*sqrt(n*p*(1-p)) ) z = rnorm(m) z[ (z < l) & (z > -l)] } # When we do generate more than needed, we need to only return n truncZ3 = function( n = 1, l = 2) { p = 2*( 1- pnorm(l)) m = n + ceiling( n*p + 3*sqrt(n*p*(1-p)) ) z = rnorm(m) z = z[ (z < l) & (z > -l)] z[1:n] } # The while loop below checks that we have enough observations. # If not we generate a few more and add them to z. # We continue to do this until we have enough observations. # Notice the indentation of the code. Code inside a function # is indented, and code within a look is further indented. # This indentation helps us read the function -- we can tell simply # by the indentation which code is within the while loop. truncZ4 = function( n = 1, l = 2) { p = 2*( 1- pnorm(l)) fudge = ceiling( n*p + 3*sqrt(n*p*(1-p)) ) m = n + fudge z = rnorm(m) z = z[ (z < l) & (z > -l)] while (length(z) < n) { x = rnorm( fudge ) z = c(z, x[ (x -l) ]) } z[1:n] } # Finally, well written functions include comments that document # the inputs and outputs of the function, and the algorithm. truncZ = function( n = 1, l = 2) { # This function generates n observations from a truncated normal # all observations outside +/- l are thrown away # determine the tail probability corresponding to l p = 2*( 1- pnorm( abs(l) )) # generate extra observations -- we compute the # expected number to be thrown away and add to is # 3 SDs - so 99% of the time we will have generated enough fudge = ceiling( n*p + 3*sqrt(n*p*(1-p)) ) m = n + fudge z = rnorm(m) z = z[ (z < l) & (z > -l)] # If we have not generate enough observations, # continue to generate more until we have at least n while (length(z) < n) { x = rnorm( fudge ) z = c(z, x[ (x -l) ]) } # Return only n of the observations z[1:n] }