## Statistical Computing with R
## Robert B. Gramacy

## so the code will wrap appropriately in the slides
options(width=45)

## Basic Operations
1 + 2 + 3
1 + 2*3
(1 + 2)*3

## vectors and sequences
c(0,1,1,2,3,5,8)
1:20

## vector-vector operations
c(1,2,3,4) + c(10,20,30,40)
c(1,2,3,4) * c(10,20,30,40)
1:4 - c(1,1,1,1)

## repetition
c(1,2,3,4) + 1
1/(1:5)
c(1,2,3,4) + c(10,100)
c(1:5) + c(10, 100)

## character vectors
"Hello world."
c("Hello world", "Hello R interpreter")

## comments
## ... at the beginning of a line
1 + 2 + # ... in the middle
  + 3 

## functions
exp(1)
cos(3.141593)
cos(seq(-pi, pi, 1))
log(1)
log(exp(1))

## function names
log(x=64, base=4)
log(base=4, x=64)
log(64, 4)
log(4, 64)

## operators as functions
17+2
2^10
3 == 4

## variables
x <- 1
y <- 2
z <- c(x,y)
z

## order of evaluation
y <- 4
z

## printing assignments
print(y <- 4)

## wacky assignment
x = 2
c(x,y) -> z
z

## elements of vectors
b <- (1:12)^2
b
b[7]
b[1:6]
b[c(1,11,6)]
b[b %% 3 == 0]

## break it down
b %% 3
print(b30 <- b %% 3 == 0)
b[b30]

## care with = and ==
one <- 1
two <- 2
one = two
one
one <- 1
one == two

## creating functions
f <- function(x,y) { c(x+1, y+1) }
f(1, 2)

## for loops
fib <- rep(NA, 12)
fib[1:2] <- 0:1
for(i in 3:length(fib)) {
  fib[i] <- fib[i-1] + fib[i-2]
}
fib

## arrays
a <- array(c(1,2,3,4,5,6,7,
             8,9,10,11,12), dim=c(3,4))
a
a[2,3]

## vectors and matrices
as.vector(a)
m <- matrix(data=c(1,2,3,4,5,6,7,
             8,9,10,11,12), nrow=3, ncol=4)
m

## 3d array
w <- array(1:12, dim=c(2,3,2))
w
w[1,3,2]
w[1,3,]
w[,,2]

## subsetting with vectors of integers
a[1:2,]
a[c(1,3),]
a[array(c(1,2,3,4), dim=c(2,2))]  ## try/not on slides

## lists
e <- list(thing="hat", size=8.25)
e

## accessing items in a list
e$thing
e[[1]]

## lists within lists
g <- list("lists within lists", e)
g

## building a data frame
teams <- c("PHI", "NYM", "FLA", "ATL", "WSN")
w <- c(92, 89, 94, 72, 59)
l <- 162 - w
nleast <- data.frame(teams, w, l)
nleast
       
## accessing columns and rows
nleast$w
nleast[,2]
nleast[nleast$teams == "FLA",]
nleast[nleast$w > 90,]  ## try/not in slides
subset(nleast,nleast$teams == "FLA")
nleast[which(nleast== "FLA"),]

## classes
class(teams)
class(w)
class(nleast)
class(class)

## generic add
17 + 6
d <- as.Date("2009-08-08")
class(d)
d + 7

## cars data
cars
dim(cars)
names(cars)
summary(cars)

## histograms
hist(cars$speed, main="")

## plotting cars
plot(cars, xlab="Speed (mph)",
     ylab="Stopping distance (ft)")
     
## cars linear model
cars.lm <- lm(dist~speed, data=cars)
cars.lm
summary(cars.lm)

## adding lines
abline(cars.lm)
s <- seq(min(cars$speed), max(cars$speed), length=100)
sdf <- data.frame(speed=s)
cars.p <- predict(cars.lm, newdata=sdf, interval="prediction")
lines(s, cars.p[,2], col=2, lty=2)
lines(s, cars.p[,3], col=2, lty=2)
legend("topleft", c("fit", "interval"), col=1:2, lty=1:2)

## getting help
help(lm)
? lm
help.search("regression")
?? regression

## packages
(.packages())
(.packages(all.available=TRUE))
library()
library(rpart)

## install packages
install.packages("tgp", dependencies=TRUE)

## apply function
# Create the matrix
m<-matrix(c(seq(from=-98,to=100,by=2)),nrow=10,ncol=10)
# Return the product of each of the rows
apply(m,1,prod)
# Return the sum of each of the columns
apply(m,2,sum)
# Return a new matrix whose entries are those of 'm' modulo 10
apply(m,c(1,2),function(x) x%%10)

## sapply function
x <- list(a = 1:10, beta = exp(-3:3), logic = c(TRUE,FALSE,FALSE,TRUE))
sapply(x,quantile)
hist(replicate(100,mean(rexp(10))))