## Statistical Computing with R ## Robert B. Gramacy ## so the code will wrap appropriately in the slides options(width=45) ## Basic Operations 1 + 2 + 3 1 + 2*3 (1 + 2)*3 ## vectors and sequences c(0,1,1,2,3,5,8) 1:20 ## vector-vector operations c(1,2,3,4) + c(10,20,30,40) c(1,2,3,4) * c(10,20,30,40) 1:4 - c(1,1,1,1) ## repetition c(1,2,3,4) + 1 1/(1:5) c(1,2,3,4) + c(10,100) c(1:5) + c(10, 100) ## character vectors "Hello world." c("Hello world", "Hello R interpreter") ## comments ## ... at the beginning of a line 1 + 2 + # ... in the middle + 3 ## functions exp(1) cos(3.141593) cos(seq(-pi, pi, 1)) log(1) log(exp(1)) ## function names log(x=64, base=4) log(base=4, x=64) log(64, 4) log(4, 64) ## operators as functions 17+2 2^10 3 == 4 ## variables x <- 1 y <- 2 z <- c(x,y) z ## order of evaluation y <- 4 z ## printing assignments print(y <- 4) ## wacky assignment x = 2 c(x,y) -> z z ## elements of vectors b <- (1:12)^2 b b[7] b[1:6] b[c(1,11,6)] b[b %% 3 == 0] ## break it down b %% 3 print(b30 <- b %% 3 == 0) b[b30] ## care with = and == one <- 1 two <- 2 one = two one one <- 1 one == two ## creating functions f <- function(x,y) { c(x+1, y+1) } f(1, 2) ## for loops fib <- rep(NA, 12) fib[1:2] <- 0:1 for(i in 3:length(fib)) { fib[i] <- fib[i-1] + fib[i-2] } fib ## arrays a <- array(c(1,2,3,4,5,6,7, 8,9,10,11,12), dim=c(3,4)) a a[2,3] ## vectors and matrices as.vector(a) m <- matrix(data=c(1,2,3,4,5,6,7, 8,9,10,11,12), nrow=3, ncol=4) m ## 3d array w <- array(1:12, dim=c(2,3,2)) w w[1,3,2] w[1,3,] w[,,2] ## subsetting with vectors of integers a[1:2,] a[c(1,3),] a[array(c(1,2,3,4), dim=c(2,2))] ## try/not on slides ## lists e <- list(thing="hat", size=8.25) e ## accessing items in a list e$thing e[[1]] ## lists within lists g <- list("lists within lists", e) g ## building a data frame teams <- c("PHI", "NYM", "FLA", "ATL", "WSN") w <- c(92, 89, 94, 72, 59) l <- 162 - w nleast <- data.frame(teams, w, l) nleast ## accessing columns and rows nleast$w nleast[,2] nleast[nleast$teams == "FLA",] nleast[nleast$w > 90,] ## try/not in slides subset(nleast,nleast$teams == "FLA") nleast[which(nleast== "FLA"),] ## classes class(teams) class(w) class(nleast) class(class) ## generic add 17 + 6 d <- as.Date("2009-08-08") class(d) d + 7 ## cars data cars dim(cars) names(cars) summary(cars) ## histograms hist(cars$speed, main="") ## plotting cars plot(cars, xlab="Speed (mph)", ylab="Stopping distance (ft)") ## cars linear model cars.lm <- lm(dist~speed, data=cars) cars.lm summary(cars.lm) ## adding lines abline(cars.lm) s <- seq(min(cars$speed), max(cars$speed), length=100) sdf <- data.frame(speed=s) cars.p <- predict(cars.lm, newdata=sdf, interval="prediction") lines(s, cars.p[,2], col=2, lty=2) lines(s, cars.p[,3], col=2, lty=2) legend("topleft", c("fit", "interval"), col=1:2, lty=1:2) ## getting help help(lm) ? lm help.search("regression") ?? regression ## packages (.packages()) (.packages(all.available=TRUE)) library() library(rpart) ## install packages install.packages("tgp", dependencies=TRUE) ## apply function # Create the matrix m<-matrix(c(seq(from=-98,to=100,by=2)),nrow=10,ncol=10) # Return the product of each of the rows apply(m,1,prod) # Return the sum of each of the columns apply(m,2,sum) # Return a new matrix whose entries are those of 'm' modulo 10 apply(m,c(1,2),function(x) x%%10) ## sapply function x <- list(a = 1:10, beta = exp(-3:3), logic = c(TRUE,FALSE,FALSE,TRUE)) sapply(x,quantile) hist(replicate(100,mean(rexp(10))))