# CSC 2541, Fall 2004, Assignment #1. # GIBBS SAMPLING USING STATE (lambda1, lambda2, Y1, ..., Ym). # Arguments are the data matrix (X), with rows corresponding to years # and columns to courses, the means of the exponential priors for # lambda1 and lambda2 (mu1 and mu2), and the number of Gibbs sampling # iterations to do (K). The Markov chain is initialized by setting # the Y value for each year to the minimum of the X values for that # year, and lambda1 and lambda1 to 1 (though this makes no difference). # Gibbs sampling updates are then done for lambda1, lambda2, and the Y # values. The result is a list with vectors lambda1 and lambda2 and # matrix Y, holding the K+1 values from the Markov chain. The Y # matrix has K+1 rows and as many columns as courses. gibbs1 <- function (X, mu1, mu2, K) { n <- nrow(X) # n = number of years m <- ncol(X) # m = number of courses minX <- apply(X,1,min) # mininum enrollment in any course for each year # Allocate space for results. lambda1 <- rep(NA,K+1) lambda2 <- rep(NA,K+1) Y <- matrix(NA,K+1,n) # Initialize the state at the beginning. lambda1[1] <- 1 lambda2[1] <- 1 Y[1,] <- minX # Do K Gibbs sampling iterations. for (i in 1:K) { # Do Gibbs sampling updates for the lambdas. Z <- X - Y[i,] # Y gets replicated over columns here lambda1[i+1] <- rgamma (1, 1+sum(Y[i,]), n+1/mu1) lambda2[i+1] <- rgamma (1, 1+sum(Z), n*m+1/mu2) # Do Gibbs sampling updates for the Ys. Note that the range of possible # values for each Y is from 0 up to the minimum X value for its year. for (t in 1:n) { # Find "prior" probabilities for each possible value of Y. h <- minX[t] p <- dpois(0:h,lambda1[i+1]) # Multiply by likelihood for each possible Y value. for (j in 1:m) { p <- p * dpois(X[t,j]-(0:h),lambda2[i+1]) } # Sample a new value for this Y with probabilities proportional to the # prior times likelihood. Y[i+1,t] <- sample (0:h, 1, prob=p) } } list (lambda1=lambda1, lambda2=lambda2, Y=Y) } # GIBBS SAMPLING USING STATE (Y1, ..., Ym). # Arguments are the data matrix (X), with rows corresponding to years # and columns to courses, the means of the exponential priors for # lambda1 and lambda2 (mu1 and mu2), and the number of Gibbs sampling # iterations to do (K). The Markov chain is initialized by setting # the Y value for each year to the minimum of the X values for that # year. Gibbs sampling updates are then done for the Y values, with # lambda1 and lambda2 integrated away. Values for lambda1 and lambda2 # to go with each Y are sampled as well, from their conditional # distributions given Y. The result is a list with vectors matrix Y # and lambda1 and lambda2, holding the K+1 values for Y from the # Markov chain along with sampled lambdas. The Y matrix has K+1 rows # and as many columns as courses. gibbs2 <- function (X, mu1, mu2, K) { n <- nrow(X) # n = number of years m <- ncol(X) # m = number of courses minX <- apply(X,1,min) # mininum enrollment in any course for each year # Allocate space for results. lambda1 <- rep(NA,K+1) lambda2 <- rep(NA,K+1) Y <- matrix(NA,K+1,n) # Initialize the state (and lambdas) at the beginning. lambda1[1] <- 1 lambda2[1] <- 1 Y[1,] <- minX # Do K Gibbs sampling iterations. for (i in 1:K) { y <- Y[i,] # y is the current value of Y # Do Gibbs sampling updates for each y[t]. for (t in 1:n) { h <- minX[t] # Maximum possible value for y[t] p <- rep(0,h+1) # Place to store probabilities: P(y[t]=v) goes in p[v+1] # Find log of product of prior probabilities for y[i] and likelihood. # Note that X-y replicates y over columns before subtracting from X. for (v in 0:h) { y[t] <- v p[v+1] <- log.pois.prob(y,mu1) + log.pois.prob(X-y,mu2) } # Sample according to these probabilities, taking care to avoid # any damaging underflow when exponentiating from log form. y[t] <- sample (0:h, 1, prob=exp(p-max(p))) } Y[i+1,] <- y # store updated Y. # Fill in values for the lambdas to go with this Y. Z <- X - Y[i+1,] # Y gets replicated over columns here lambda1[i+1] <- rgamma (1, 1+sum(Y[i+1,]), n+1/mu1) lambda2[i+1] <- rgamma (1, 1+sum(Z), n*m+1/mu2) } list (lambda1=lambda1, lambda2=lambda2, Y=Y) } # COMPUTE PROBABILITY OF POISSON DATA, INTEGRATING OVER MEAN. Computes # the log of the probability of a vector of data points (C), integrating # over the Poisson mean for this data, with respect to an exponential prior # with mean mu. log.pois.prob <- function (C, mu) { lgamma(sum(C)+1) - sum(lgamma(C+1)) - (sum(C)+1)*log(length(C)+1/mu) - log(mu) } # DO TESTS. Does the runs and makes the plots needed for the assignment. # Assigns to global variables so the results can be manually examined later. do.tests <- function () { # Read data. data <<- as.matrix(read.table("ass1-data",head=F)) # Set up plotting. postscript("ass1-plots.ps",horizontal=T) par(mfrow=c(1,2)) # Run the two Gibbs samplers. Print time taken for each. iters <<- 5000 burnin <<- 201 set.seed(1) g1.times <<- system.time( g1 <<- gibbs1 (data, 8, 4, iters) ) set.seed(1) g2.times <<- system.time( g2 <<- gibbs2 (data, 8, 4, iters) ) # Do time plots, for convergence assessment. plot (g1$lambda1, type="l", col="red", ylim=c(0,18),xlab="iteration",ylab="lambdas") lines(g1$lambda2, type="l", col="green") title("First Gibbs Sampler") plot (g2$lambda1, type="l", col="red", ylim=c(0,18),xlab="iteration",ylab="lambdas") lines(g2$lambda2, type="l", col="green") title("Second Gibbs Sampler") # Make scatterplots of lambda1 and lambda2, discarding burning iterations. plot (g1$lambda1[burnin:iters], g1$lambda2[burnin:iters], pch=20, xlab="lambda1", ylab="lambda2", xlim=c(5,17), ylim=c(1,8)) title("First Gibbs Sampler") plot (g2$lambda1[burnin:iters], g2$lambda2[burnin:iters], pch=20, xlab="lambda1", ylab="lambda2", xlim=c(5,17), ylim=c(1,8)) title("Second Gibbs Sampler") # Plot estimated autocorrelations for lambda2 for the two Gibbs samplers. library(ts) acf (g1$lambda2[burnin:iters], lag.max=30, main="First Gibbs Sampler") acf (g2$lambda2[burnin:iters], lag.max=30, main="Second Gibbs Sampler") dev.off() }