## Thursday 23 March 2017 library(gtools) alpha = c(3,6,4) N = 2000 set.seed(20170321) thetasample = rdirichlet(N,alpha) library(ggtern) myframe = function(mysample) { ( data.frame(theta1=mysample[,1], theta2=mysample[,2], theta3=mysample[,3]) ) } tickvals = seq(.2,1,.2) addmyopts = function(myplot) { ( myplot + geom_point(size=0.01) + tern_limits(breaks=tickvals,labels=tickvals) + xlab(expression(theta[1])) + ylab(expression(theta[2])) + zlab(expression(theta[3])) + theme_light() ) } thetaframe = myframe(rdirichlet(N,alpha)) addmyopts(ggtern(thetaframe,aes(theta1,theta2,theta3))) nextpt = function (mytheta,sigma=0.2) { theta1new = rnorm(1,mytheta[1],sigma) theta2new = rnorm(1,mytheta[2],sigma) theta3new = 1 - theta1new - theta2new thetanew = c(theta1new,theta2new,theta3new) r = ( ddirichlet(thetanew,alpha) / ddirichlet(mytheta,alpha) ) if ( runif(1) > r ) { thetanew = mytheta } return(thetanew) } theta = c(1/3,1/3,1/3) theta = nextpt(theta); print(theta) theta = nextpt(theta); print(theta) theta = nextpt(theta); print(theta) theta = nextpt(theta); print(theta) theta = nextpt(theta); print(theta) theta = nextpt(theta); print(theta) m = 4000 nanchain = matrix(rep(0/0,3*m),ncol=3,byrow=TRUE) chain1 = nanchain chain1[1,] = c(1/3,1/3,1/3) for (i in 2:m) { chain1[i,] = nextpt(chain1[i-1,]) } myjitter = matrix(runif(3*m,-0.01,0.01), ncol=3,byrow=TRUE) chain1frame = myframe(chain1+myjitter) addmyopts(ggtern(chain1frame[1:20,], aes(theta1,theta2,theta3)) + geom_path(alpha=0.5,col='blue')) addmyopts(ggtern(chain1frame[1:200,], aes(theta1,theta2,theta3)) + geom_path(alpha=0.5,col='blue')) addmyopts(ggtern(chain1frame[(m/2+1):m,], aes(theta1,theta2,theta3))) chain2 = nanchain chain2[1,] = c(0.05,0.05,0.9) for (i in 2:m) { chain2[i,] = nextpt(chain2[i-1,]) } chain2frame = myframe(chain2+myjitter) addmyopts(ggtern(chain2frame[1:200,], aes(theta1,theta2,theta3)) + geom_path(alpha=0.5,col='green')) addmyopts(ggtern(chain2frame[(m/2+1):m,], aes(theta1,theta2,theta3))) chain3 = nanchain chain3[1,] = c(0.9,0.05,0.05) for (i in 2:m) { chain3[i,] = nextpt(chain3[i-1,]) } chain3frame = myframe(chain3+myjitter) addmyopts(ggtern(chain3frame[1:200,], aes(theta1,theta2,theta3)) + geom_path(alpha=0.5,col='red')) addmyopts(ggtern(chain3frame[(m/2+1):m,], aes(theta1,theta2,theta3))) cummean = function(x) {return(cumsum(x)/1:length(x))} plot(cummean(thetasample[,1]),type='l',ylim=c(0,0.5)) lines(cummean(chain1frame[(m/2+1):m,1]),col='blue') lines(cummean(chain2frame[(m/2+1):m,1]),col='green') lines(cummean(chain3frame[(m/2+1):m,1]),col='red') chain04 = nanchain chain04[1,] = c(0.05,0.05,0.9) for (i in 2:m) { chain04[i,] = nextpt(chain04[i-1,],0.04) } chain04frame = myframe(chain04+myjitter) addmyopts(ggtern(chain04frame[1:200,], aes(theta1,theta2,theta3)) + geom_path(alpha=0.5,col='green')) addmyopts(ggtern(chain04frame[(m/2+1):m,], aes(theta1,theta2,theta3))) chain10 = nanchain chain10[1,] = c(0.05,0.05,0.9) for (i in 2:m) { chain10[i,] = nextpt(chain10[i-1,],1) } chain10frame = myframe(chain10+myjitter) addmyopts(ggtern(chain10frame[1:200,], aes(theta1,theta2,theta3)) + geom_path(alpha=0.5,col='green')) addmyopts(ggtern(chain10frame[(m/2+1):m,], aes(theta1,theta2,theta3))) ## Tuesday 28 March 2017 alpha = c(3,6,4) nextpt1_gibbs = function(mytheta) { theta1new = (1-mytheta[2])*rbeta(1,alpha[1],alpha[3]) mytheta[1] = theta1new mytheta[3] = 1 - sum(mytheta[-3]) return(mytheta) } nextpt2_gibbs = function(mytheta) { theta2new = (1-mytheta[1])*rbeta(1,alpha[2],alpha[3]) mytheta[2] = theta2new mytheta[3] = 1 - sum(mytheta[-3]) return(mytheta) } set.seed(20170328) theta = c(1/3,1/3,1/3) theta = nextpt1_gibbs(theta); print(theta) theta = nextpt2_gibbs(theta); print(theta) theta = nextpt1_gibbs(theta); print(theta) theta = nextpt2_gibbs(theta); print(theta) m = 4000 nanchain = matrix(rep(0/0,3*(m+1)),ncol=3,byrow=TRUE) chain1 = nanchain chain1[1,] = c(1/3,1/3,1/3) for (i in 2*(1:(m/2))) { chain1[i,] = nextpt1_gibbs(chain1[i-1,]) chain1[i+1,] = nextpt2_gibbs(chain1[i,]) } myframe = function(mysample) { ( data.frame(theta1=mysample[,1], theta2=mysample[,2], theta3=mysample[,3]) ) } chain1frame = myframe(chain1) library(ggtern) tickvals = seq(.2,1,.2) addmyopts = function(myplot) { ( myplot + geom_point(size=0.01) + tern_limits(breaks=tickvals,labels=tickvals) + xlab(expression(theta[1])) + ylab(expression(theta[2])) + zlab(expression(theta[3])) + theme_light() ) } addmyopts(ggtern(chain1frame[1:20,], aes(theta1,theta2,theta3)) + geom_path(alpha=0.5,col='blue')) addmyopts(ggtern(chain1frame[1:200,], aes(theta1,theta2,theta3)) + geom_path(alpha=0.5,col='blue')) addmyopts(ggtern(chain1frame[(m/2+1):m,], aes(theta1,theta2,theta3))) mean(chain1frame[(m/2+1):m,1]) alpha[1]/sum(alpha) mean(chain1frame[(m/2+1):m,2]) alpha[2]/sum(alpha) mean(chainframe[(m/2+1):m,3]) alpha[3]/sum(alpha) cummean = function(x) {return(cumsum(x)/1:length(x))} plot(cummean(chain1frame[(m/2+1):m,1]),type='l',col='blue', ylim=c(0,0.5)) ## Thursday 30 March 2017 library(rjags) modelString = " model { y ~ dbin(theta,n) theta ~ dbeta(1,1) n <- 4 } " filename = 'model.20170330.txt' writeLines( modelString, con=filename ) set.seed(20170330) jagsModel = jags.model( file=filename, data=list(y=3), n.chains=3 ) jagsModel update(jagsModel, n.iter=500) codaSamples = coda.samples(jagsModel, variable.names=c('theta'),n.iter=4000) plot(codaSamples) source('DBDA2Eprograms/DBDA2E-utilities.R') diagMCMC(codaObject = codaSamples, parName = 'theta') dev.copy2pdf(file='notes_comp_jags_DBDA2E.pdf', height=4, width=6) plotPost(codaSamples[,'theta'],main="theta", xlab = bquote(theta)) mean(codaSamples[[1]][,'theta']) mean(codaSamples[[2]][,'theta']) mean(codaSamples[[3]][,'theta']) y=list(list(62,60,63,59), list(63,67,71,64,65,66), list(68,66,71,67,68,68), list(56,62,60,61,63,64,63,59)) J = length(y) n = rep(0/0,J) for (j in 1:J) {n[j] = length(y[[j]])} ypacked = rep(0/0,sum(n)) for (j in 1:J) { for(i in 1:n[j]) { k = sum(n[1:j]) - (i-1) print(k) ypacked[k] = y[[j]][[i]] } } jagsModel = jags.model(file='ps08_prob2_model.txt', data=list(y=ypacked,n=n,J=J),n.chains=10) ## Tuesday 10 April 2017 library(rstan) rstan_options(auto_write = TRUE) options(mc.cores = parallel::detectCores()) modelString = " data { intn ; int y ; } parameters { real theta ; } model { y ~ binomial(n,theta) ; theta ~ beta(1,1) ; } " stanDSO = stan_model(model_code=modelString) stanFit = sampling(object=stanDSO, data=list(n=4,y=3), chains=3, warmup=500, iter=4000, seed=201704101) summary(stanFit)$summary hist(as.array(stanFit)[,1,'theta'],breaks=20, probability = TRUE) theta = seq(0,1,length.out = 100) lines(theta,dbeta(theta,4,2),col='blue') hierString = " data { int ntot; // total number of observations int J; // number of samples vector[ntot] y; // observational data int n[J]; // number of observations in a sample } parameters { real mu; // hyperparameter mean for means real tau; // hyperparameter sd for means real theta[J]; // mean for each sample real logsigma; // log-sd for data } model { int pos; theta ~ normal(mu,tau^(-2)); // Suggestion from sec 15.2 of Stan manual // 'Ragged data structures' pos = 1; for (j in 1:J) { segment(y, pos, n[j]) ~ normal(theta[j], exp(logsigma)); pos = pos + n[j]; } } " hierDSO = stan_model(model_code=hierString) y=list(list(62,60,63,59), list(63,67,71,64,65,66), list(68,66,71,67,68,68), list(56,62,60,61,63,64,63,59)) J = length(y) n = rep(0/0,J) for (j in 1:J) {n[j] = length(y[[j]])} unlist(y) hierFit = sampling(object=hierDSO, data=list(y=unlist(y),n=n,ntot=sum(n),J=J), chains=3, warmup=500, iter=4000, seed=201704102) summary(hierFit)$summary pairs(hierFit)