## Tuesday 25 April 2017 library(rethinking) data(Howell1) y = Howell1[Howell1$age < 18,'weight'] ybar = mean(y) n = length(y) X = Howell1[Howell1$age < 18,c('height','age','male')] xbar = colMeans(X) k = length(xbar) (X-xbar)[1:10,] A = matrix(rep(1,13*5),nrow = 13) b = seq(1,5) A * b t(t(A)*b) A %*% diag(b) t(t(A)-b) t(t(X)-xbar)[1:10,] X_xbar = t(t(X)-xbar) Sxx = t(X_xbar) %*% as.matrix(X_xbar) Sxx Sxxinv = solve(Sxx) Sxxinv Sxy = t(X_xbar) %*% (y-ybar) Syy = sum((y-ybar)^2) s2 = (Syy - t(Sxy) %*% Sxxinv %*% Sxy)/(n-k-1) s2 = s2[1,1] s = sqrt(s2) s betahat = Sxxinv %*% Sxy betahat s2beta = s2 * Sxxinv s2beta ## Thursday 27 April 2017 library(rethinking) data(Howell1) y = Howell1[Howell1$age < 18,'weight'] ybar = mean(y) n = length(y) X = Howell1[Howell1$age < 18,c('height','age','male')] xbar = colMeans(X) k = length(xbar) X_xbar = t(t(X)-xbar) s2x = colSums(X_xbar^2)/(n-1) zX = t(t(X_xbar)/sqrt(s2x)) zX[1:10,] # or just zX = scale(X) Sxx = t(zX) %*% as.matrix(zX) Sxx Sxxinv = solve(Sxx) Sxxinv Sxy = t(zX) %*% (y-ybar) Syy = sum((y-ybar)^2) s2 = (Syy - t(Sxy) %*% Sxxinv %*% Sxy)/(n-k-1) s2 = s2[1,1] s = sqrt(s2) s betahat = Sxxinv %*% Sxy betahat covbeta = s2 * Sxxinv covbeta varbeta = diag(covbeta) sdbeta = sqrt(varbeta) corrbeta = covbeta/outer(sdbeta,sdbeta) corrbeta cov(zX[,'height'],zX[,'age']) plot(zX[,'height'],zX[,'age']) cov(zX[,'height'],zX[,'male']) plot(zX[,'height'],zX[,'male']) ## Tuesday 2 May 2017 data = read.table('found_rm.dat') x_rm = data[,1] y_rm = data[,2] n_rm = length(y_rm) xbar_rm = mean(x_rm) plot(x_rm,y_rm) library(rstan) rstan_options(auto_write = TRUE) options(mc.cores = parallel::detectCores()) modelString = " data { int n; int y[n]; vector[n] x; real xbar; } parameters { real alpha; real beta; } model { y ~ bernoulli_logit(alpha + beta*(x-xbar)); }" DSO = stan_model(model_code=modelString) rmFit = sampling(object=DSO, data=list(y=y_rm,x=x_rm, n=n_rm,xbar=xbar_rm), chains=3, warmup=500, iter=4000, seed=20170502) rm_alpha = extract(rmFit,'alpha')$alpha rm_beta = extract(rmFit,'beta')$beta alphamin = min(rm_alpha) alphamax = max(rm_alpha) betamin = min(rm_beta) betamax = max(rm_beta) Ngrid = 101 ones = rep(1,Ngrid) alphas = seq(from=alphamin,to=alphamax,length.out=Ngrid) betas = seq(from=betamin,to=betamax,length.out=Ngrid) alphagrid = outer(alphas,ones,'*') betagrid = outer(ones,betas,'*') loglike = matrix(rep(0,Ngrid*Ngrid),ncol=Ngrid) for (i in 1:n_rm){ lami = alphagrid + betagrid * (x_rm[i]-xbar_rm) loglike = loglike + y_rm[i] * lami loglike = loglike - log(1+exp(lami)) } likelihood = exp(loglike) contour(alphas,betas,likelihood) points(rm_alpha,rm_beta,pch='.',col='blue') rm_x50 = xbar_rm - rm_alpha/rm_beta library(rethinking) dens(rm_x50) plot(rm_x50,rm_beta,type='p',pch='.') rm_p0_45 = logistic(rm_beta*(0.45-rm_x50)) hist(rm_p0_45,breaks=seq(0,1,length.out=100),freq=FALSE) rm_p0_50 = logistic(rm_beta*(0.50-rm_x50)) hist(rm_p0_50,breaks=seq(0,1,length.out=100),freq=FALSE) rm_p0_55 = logistic(rm_beta*(0.55-rm_x50)) hist(rm_p0_55,breaks=seq(0,1,length.out=100),freq=FALSE) Nx = 100 xlist = seq(from=-0.2,to=1.2,length.out=Nx) px_25 = rep(0/0,Nx) px_50 = rep(0/0,Nx) px_75 = rep(0/0,Nx) for (l in 1:Nx) { px = logistic(rm_beta*(xlist[l]-rm_x50)) px_25[l] = quantile(px,.25) px_50[l] = quantile(px,.50) px_75[l] = quantile(px,.75) } plot(x_rm,y_rm) lines(xlist,px_25,col='blue') lines(xlist,px_50,col='green') lines(xlist,px_75,col='blue') ## Thursday 4 May 2017 data = read.table('found2_rm.dat') x1_rm = data[,1] x2_rm = data[,2] y_rm = data[,3] n_rm = length(y_rm) x1bar_rm = mean(x1_rm) x2bar_rm = mean(x2_rm) plot(x1_rm,x2_rm) library(rstan) rstan_options(auto_write = TRUE) options(mc.cores = parallel::detectCores()) modelString = " data { int n; int y[n]; vector[n] x1; vector[n] x2; } parameters { real x50; real beta1; real beta2; } model { y ~ bernoulli_logit(beta1*(x1-x50) + beta2*x2); }" DSO = stan_model(model_code=modelString) rmFit = sampling(object=DSO, data=list(y=y_rm,n=n_rm, x1=x1_rm,x2=x2_rm), chains=3, warmup=500, iter=4000, seed=20170504) rm_x50 = extract(rmFit,'x50')$x50 rm_beta1 = extract(rmFit,'beta1')$beta1 rm_beta2 = extract(rmFit,'beta2')$beta2 plot(rm_x50,rm_beta1,type='p',pch='.') plot(rm_x50,rm_beta2,type='p',pch='.') plot(rm_beta1,rm_beta2,type='p',pch='.') cor(rm_beta1,rm_beta2) effmodelString = " data { int n; int y[n]; vector[n] x1; vector[n] xeff; } parameters { real xeff50; real beff; real gamma; } model { gamma ~ normal(0,1); y ~ bernoulli_logit(beff*(xeff-xeff50) + gamma*x1); }" effDSO = stan_model(model_code=effmodelString) effrmFit = sampling(object=effDSO, data=list(y=y_rm,n=n_rm, x1=x1_rm,xeff=(x1_rm+x2_rm)), chains=3, warmup=500, iter=4000, seed=201705041) effrm_xeff50 = extract(effrmFit,'xeff50')$xeff50 effrm_beff = extract(effrmFit,'beff')$beff effrm_gamma = extract(effrmFit,'gamma')$gamma Nx = 100 xlist = seq(from=-0.2,to=1.2,length.out=Nx) px_25 = rep(0/0,Nx) px_50 = rep(0/0,Nx) px_75 = rep(0/0,Nx) for (l in 1:Nx) { px = logistic(effrm_beff*(xlist[l]-effrm_xeff50)) px_25[l] = quantile(px,.25) px_50[l] = quantile(px,.50) px_75[l] = quantile(px,.75) } plot(x1_rm+x2_rm,y_rm) lines(xlist,px_25,col='blue') lines(xlist,px_50,col='green') lines(xlist,px_75,col='blue')