# CSC 411, Assignment #3. # SAMPLE FROM THE PRIOR DISTRIBUTION FOR THE MODEL. # # Arguments: # # K number of mixture components # p number of variables # mu.low low limit of uniform prior for eachmu # mu.high high limit of uniform prior for each mu # sigma.low low limit of uniform prior for each sigma # sigma.high high limit of uniform prior for each sigma # # Result returned is a list with elements mu, sigma, and pr containing # the mu and sigma parameters for each mixture component, and the mixing # proportions. sample_prior = function (K, p, mu.low, mu.high, sigma.low, sigma.high) { mu = matrix(NA,K,p) sigma = matrix(NA,K,p) for (k in 1:K) { mu[k,] = runif(p,mu.low,mu.high) sigma[k,] = runif(p,sigma.low,sigma.high) } pr = rexp(K) pr = pr / sum(pr) list (mu=mu, sigma=sigma, pr=pr) } # COMPUTE THE LOG LIKELIHOOD. Arguments are the matrix of data values (on # row per case, one column per variable) and a list of parameter values. # Result is the log of the probability of the data given these parameter values. log_likelihood = function (X, pm) { K = length(pm$pr) ll = 0 for (i in 1:nrow(X)) { pri = 0 for (k in 1:K) { pri = pri + pm$pr[k] * prod(dnorm(X[i,],pm$mu[k,],pm$sigma[k,])) } ll = ll + log(pri) } ll } # SAMPLE FROM THE POSTERIOR DISTRIBUTION. # # Arguments: # # N number of points that will be sampled from the prior # M number of points sampled from the posterior # X matrix of data values # K number of mixture components # mu.low low limit of uniform prior for eachmu # mu.high high limit of uniform prior for each mu # sigma.low low limit of uniform prior for each sigma # sigma.high high limit of uniform prior for each sigma # # The result is a list of M parameter vectors sampled (approximately) from # the posterior. Note that there may be duplicates in this list. This # function also prints the effective sample size. sample_posterior = function (N, M, X, K, mu.low, mu.high, sigma.low, sigma.high) { ll = rep(NA,N) pm = list() for (r in 1:N) { pm[[r]] = sample_prior (K, ncol(X), mu.low, mu.high, sigma.low, sigma.high) ll[r] = log_likelihood (X, pm[[r]]) } sp = exp(ll-max(ll)) sp = sp / sum(sp) cat("Effective sample size:",1/sum(sp^2),"\n") pm [sample(N,M,prob=sp,replace=T)] } # PLOT PARAMETER VALUES FOR 2D DATA. Plots the data points passed as the # first argument, along with ellipses for each component. The width of # the line defining the ellipse is proportional to the mixing proportion. plot_2D_params = function (X, pms) { plot(X[,1],X[,2],pch=19,col="gray",xlab="",ylab="", xlim=c(-1.3,1.3),ylim=c(-1.3,1.3)) circx = cos((1:100)*2*pi/100) circy = sin((1:100)*2*pi/100) for (pm in pms) { for (k in 1:length(pm$pr)) { lines (circx*pm$sigma[k,1]+pm$mu[k,1], circy*pm$sigma[k,2]+pm$mu[k,2], lwd=10*pm$pr[k]) } } }