from __future__ import division import numpy as np from scipy import stats xi = np.array([18.0, 18.7, 14.7, 17.1, 11.2, 14.2]) yi = np.array([13.1, 14.2, 15.1, 14.9, 13.5, 11.7]) figure(); plot(xi,yi,'ko'); plot([11,19],[11,19]); savefig('notes05_sr_scatter.eps',bbox_inches='tight'); di = yi - xi; di idx = np.argsort(np.abs(di)) di[idx] n = len(xi) ranks = np.arange(n) + 1; ranks posranks = ranks[di[idx] >= 0]; posranks t = posranks.sum(); t flags = np.array([[np.right_shift(d,i) % 2 for i in xrange(n)] for d in xrange(2**n)]) flags ranks*flags (ranks*flags).shape wilco = (ranks*flags).sum(axis=-1) wmin = min(wilco) wmax = max(wilco) figure(); counts,bins = np.histogram(wilco,bins=np.arange(wmin,wmax+2)) wvals = bins[:-1] bar(wvals,counts,align='center'); xlim(wmin-0.5,wmax+0.5); xlabel('$t$'); ylabel('Number of outcomes'); title('Histogram for signed rank stat w/$n=%d$' % n); savefig('notes05_wilco_hist.eps',bbox_inches='tight'); np.sum(wilco <= t) 2*np.mean(wilco <= t) stats.wilcoxon(xi,yi) np.mean(wilco <= 2) np.mean(wilco >= 19) theta = 0. di[np.argsort(np.abs(di+theta))]+theta theta = -0.025 di[np.argsort(np.abs(di+theta))]+theta theta = -0.05 di[np.argsort(np.abs(di+theta))]+theta theta = -0.1 di[np.argsort(np.abs(di+theta))]+theta theta = -0.4 di[np.argsort(np.abs(di+theta))]+theta theta = -0.41 di[np.argsort(np.abs(di+theta))]+theta theta = 0.05 di[np.argsort(np.abs(di+theta))]+theta theta = 0.1 di[np.argsort(np.abs(di+theta))]+theta theta = 0.9 di[np.argsort(np.abs(di+theta))]+theta theta = 1.05 di[np.argsort(np.abs(di+theta))]+theta theta = 1.1 di[np.argsort(np.abs(di+theta))]+theta theta = 2.25 di[np.argsort(np.abs(di+theta))]+theta 0.5*(di[:,None]+di[None,:]) avgs = np.sort(np.array(list(flatten([[0.5*(di[i]+di[j]) for j in xrange(i,n)] for i in xrange(n)])))) avgs theta = -0.399 di[np.argsort(np.abs(di+theta))]+theta theta = -0.401 di[np.argsort(np.abs(di+theta))]+theta theta = 4.501 di[np.argsort(np.abs(di+theta))]+theta theta = 4.499 di[np.argsort(np.abs(di+theta))]+theta x <- c(18.0, 18.7, 14.7, 17.1, 11.2, 14.2) y <- c(13.1, 14.2, 15.1, 14.9, 13.5, 11.7) wilcox.test(x,y,paired=TRUE) n <- 6 t <- seq(0,n*(n+1)/2) plot(t,dsignrank(t,n)) 2*psignrank(4,n) qsignrank(0.025,n) psignrank(1,n) v <- c(180, 187, 147, 171, 113, 142) u <- c(131, 142, 151, 149, 135, 117) wilcox.test(v,u,paired=TRUE) xn <- c(18.0, 18.7, 14.7, 17.1, 11.3, 14.2) dn = y-xn dn[4] dn[5] dn[4] + dn[5] vi = xi + np.array([0,0,0,0,0.1,0]) np.median(0.5*(di[:,None]+di[None,:])) from __future__ import division import numpy as np n = 20 wilco = (flags*np.arange(n,0,-1)).sum(axis=-1) data = stats.norm(loc=0.5).rvs(size=n) W, pvalz = stats.wilcoxon(data) pval = 2.*np.mean(wilco<=W) from rpy2.robjects.packages import importr rbase = importr('base') rstats = importr('stats',on_conflict="warn") rstats.qsignrank(0.05,100) rstats.qsignrank(0.95,100) import rpy2.robjects.numpy2ri as rpyn rpyn.numpy2ri(t) rt = rpyn.numpy2ri(t) psignrank(rt,n) np.array(psignrank(rt,n)) np.array(psignrank(rt,n))*2**n np.array(dsignrank(rt,n))*2**n dsignrank = rstats.dsignrank np.array(dsignrank(rt,n))*2**n from __future__ import division import numpy as np from scipy import stats xi = np.array([18.0, 18.7, 14.7, 17.1, 11.2, 14.2]) yi = np.array([13.1, 14.2, 15.1, 14.9, 13.5, 11.7]) di = yi - xi; di n = len(di) from rpy2.robjects.packages import importr rstats = importr('stats',on_conflict="warn") print(rstats.qsignrank(0.05,6)) print(rstats.psignrank(3,6)) print(rstats.psignrank(2,6)) print(2.*(rstats.psignrank(2,6)[0])) 6/64 1-6/64 theta = 0. di[np.argsort(np.abs(di-theta))]-theta theta = 0.025 di[np.argsort(np.abs(di-theta))]-theta thetavec = np.arange(-5,3.1,0.5) figure(); for i in xrange(n): plot(di[i]-thetavec,thetavec,'g-',lw=1.5); plot(thetavec-di[i],thetavec,'r--',lw=1.5); xlim(0,8); ylim(0,0.2); theta = 0.05 di[np.argsort(np.abs(di-theta))]-theta ylim(0,1); theta = 0.4 di[np.argsort(np.abs(di-theta))]-theta ylim(-5,3); grid(True); xlabel(r'$|d_i-\theta|$'); ylabel(r'$\theta$'); savefig('notes05_sr_ci.eps',bbox_inches='tight'); 0.5*(di[:,None]+di[None,:]) avgs = np.sort(np.array(list(flatten([[0.5*(di[i]+di[j]) for j in xrange(i,n)] for i in xrange(n)])))) avgs (avgs[2-1],avgs[19-1]) from __future__ import division import numpy as np from scipy import stats xi = np.array([8.56, 5.03, 48.1, 1.31, 4.82]) yi = np.array([15.0, 12.3, 28.0, 13.9]) n = len(xi); n m = len(yi); m N = n+m; N combo = np.concatenate((xi,yi)) idx = np.argsort(combo) combo[idx] xflags = np.concatenate(([True,]*n,[False,]*m)) xflags[idx] ranks = np.arange(N)+1 xranks = ranks[xflags[idx]]; xranks Wx = np.sum(xranks); Wx yflags = np.bitwise_not(xflags) yranks = ranks[yflags[idx]]; yranks Wy = np.sum(yranks); Wy Ux = np.sum(xi[None,:] > yi[:,None]); Ux Uy = np.sum(yi[None,:] > xi[:,None]); Uy Wx + Wy N*(N+1)//2 Wx - Ux n*(n+1)//2 Wy - Uy m*(m+1)//2 Ux + Uy m*n U = np.arange(n*m+1) from rpy2.robjects.packages import importr rstats = importr('stats',on_conflict="warn") rstats.dwilcox(19-15,5,4) 126 * rstats.dwilcox(19-15,5,4)[0] rstats.pwilcox(19-15,5,4) import rpy2.robjects.numpy2ri as rpyn Upmf = np.array(rstats.dwilcox(rpyn.numpy2ri(U),n,m)) figure(); bar(U,Upmf,align='center'); xlabel(r'$U$'); ylabel(r'$p(U)$'); title(r'pmf of Mann-Whitney $U$ with $n=%d$, $m=%d$' % (n,m)); xlim(-1,n*m+1); savefig('notes05_mw_pmf.eps',bbox_inches='tight'); mu = n*(N+1)//2; mu sigma = np.sqrt(n*m*(N+1)/12.); sigma**2 print(sigma) z = (Wx - mu)/sigma; z stats.norm.cdf(z) stats.norm.cdf((Wx + 0.5 - mu)/sigma) rstats.qwilcox(0.05,5,4) rstats.pwilcox(3,5,4) rstats.pwilcox(2,5,4) diffs = np.sort(xi)[:,None]-np.sort(yi)[None,:]; diffs sorteddiffs = np.sort(list(flatten(diffs))); sorteddiffs (sorteddiffs[2],sorteddiffs[-2-1]) from __future__ import division import numpy as np from scipy import stats xij = [np.array([ 14.97, 5.80, 25.03, 5.50 ]), np.array([ 5.83, 13.96, 21.96]), np.array([ 17.89, 23.03, 61.09, 18.62, 55.51])] ni = np.array([len(x) for x in xij]); ni k = len(ni); k N = np.sum(ni); N group = np.concatenate([(i,)*ni[i] for i in xrange(k)]) xijflat = np.concatenate(xij) idx = np.argsort(xijflat) xijflat[idx] group[idx] ranks = np.arange(N)+1; ranks Rij = [ranks[group[idx]==i] for i in xrange(k)]; Rij Ri = np.array([np.sum(rij) for rij in Rij]); Ri Ri/ni 0.5*(N+1) T = 12/(N*(N+1)) * np.sum((Ri-ni*0.5*(N+1))**2/ni); T stats.chi2(df=k-1).sf(T) from __future__ import division import numpy as np from scipy import stats n = 100 theta = 0.2 c_q = np.arange(0,n+2) c_q p0 = 0.5 p1 = stats.norm(loc=theta).sf(0); p1 alpha_q = stats.binom(n,p0).sf(c_q-0.5) gamma_q = stats.binom(n,p1).sf(c_q-0.5) figure(); plot(alpha_q,gamma_q,'b-',label='sign test'); xlabel(r'$\alpha$'); ylabel(r'$\gamma$'); title(r'ROC curve for $N(%.1f,1)$, $n=%d$' % (theta,n)); xlim(0,1); ylim(0,1); grid(True); plot(alpha_q,alpha_q,'k:'); N = 10**4 x_ji = stats.norm.rvs(size=(N,n)) xbar_j = np.mean(x_ji,axis=-1) s_j = np.std(x_ji,ddof=1,axis=-1) t_j0 = xbar_j / (s_j/np.sqrt(n)) t_j1 = (xbar_j+theta) / (s_j/np.sqrt(n)) cmin = -3 cmax = 3 + theta * np.sqrt(n) c_t = np.linspace(cmin,cmax,1000) alpha_t = np.mean(t_j0[None,:] >= c_t[:,None], axis=-1) gamma_t = np.mean(t_j1[None,:] >= c_t[:,None], axis=-1) plot(alpha_t,gamma_t,'g--',label=r'$t$-test'); legend(loc='lower right'); p1 = stats.laplace(loc=theta,scale=np.sqrt(0.5)).sf(0); p1 alpha_q = stats.binom(n,p0).sf(c_q-0.5) gamma_q = stats.binom(n,p1).sf(c_q-0.5) savefig('notes05_roc_gauss.eps',bbox_inches='tight'); figure(); plot(alpha_q,gamma_q,'b-',label='sign test'); xlabel(r'$\alpha$'); ylabel(r'$\gamma$'); title(r'ROC curve for Laplace w/mean %.1f, std 1, $n=%d$' % (theta,n)); xlim(0,1); ylim(0,1); grid(True); plot(alpha_q,alpha_q,'k:'); N = 10**4 x_ji = stats.laplace(scale=np.sqrt(0.5)).rvs(size=(N,n)) xbar_j = np.mean(x_ji,axis=-1) s_j = np.std(x_ji,ddof=1,axis=-1) t_j0 = xbar_j / (s_j/np.sqrt(n)) t_j1 = (xbar_j+theta) / (s_j/np.sqrt(n)) cmin = -3 cmax = 3 + theta * np.sqrt(n) c_t = np.linspace(cmin,cmax,1000) alpha_t = np.mean(t_j0[None,:] >= c_t[:,None], axis=-1) gamma_t = np.mean(t_j1[None,:] >= c_t[:,None], axis=-1) plot(alpha_t,gamma_t,'g--',label=r'$t$-test'); legend(loc='lower right'); savefig('notes05_roc_laplace.eps',bbox_inches='tight'); figure(); semilogx(alpha_q,gamma_q,'b-',label='sign test'); semilogx(alpha_t,gamma_t,'g--',label=r'$t$-test'); semilogx(alpha_q,alpha_q,'k:'); xlabel(r'$\alpha$'); ylabel(r'$\gamma$'); title(r'ROC curve for Laplace w/mean %.1f, std 1, $n=%d$' % (theta,n)); legend(loc='upper left'); xlim(1e-3,1); ylim(0,1); grid(True); savefig('notes05_roc_laplace_log.eps',bbox_inches='tight'); from __future__ import division import numpy as np from scipy import stats alpha = 0.05 theta = 0.2 p0 = 0.5 p1 = stats.norm(loc=theta).sf(0); p1 n = np.array(np.logspace(2,5,100),dtype=int) c_q = stats.binom(n,p0).isf(alpha) beta_q = stats.binom(n,p1).cdf(c_q) figure(); loglog(beta_q,n,'b-',label='sign test'); title(r'Relative Efficiency for $N(%.1f,1)$, $\alpha=%g$' % (theta,alpha)); xlabel(r'$1-\gamma$'); ylabel(r'$n$'); grid(True); c_z = stats.norm.isf(alpha) beta_z = stats.norm(loc=theta*np.sqrt(n)).cdf(c_z) loglog(beta_z,n,'g--',label=r'$t$-test (CLT approx)'); legend(loc='lower left'); savefig('notes05_re_gauss.eps',bbox_inches='tight'); from __future__ import division import numpy as np from scipy import stats import itertools xi = np.array([15.0, 12.3, 18.0, 13.9]) yj = np.array([18.56, 25.03, 48.1, 1.31, 4.82]) n = len(xi); n m = len(yj); m N = n+m; N xbar = np.mean(xi); xbar ybar = np.mean(yj); ybar Ui = np.abs(xi - xbar); Ui Vj = np.abs(yj - ybar); Vj UVcombo = np.concatenate((Ui,Vj)) xflags = np.concatenate(([True,]*n,[False,]*m)) yflags = np.bitwise_not(xflags) ranks = stats.rankdata(UVcombo) Uranks = ranks[xflags]; Uranks Vranks = ranks[yflags]; Vranks np.sum(Uranks) np.sum(Uranks**2) np.sort([np.sum(np.array(foo)**2) for foo in itertools.combinations(range(1,N+1),n)]) np.sort([np.sum(np.array(foo)) for foo in itertools.combinations(range(1,N+1),n)]) from __future__ import division import numpy as np from scipy import stats x = np.array([9.64, 5.91, 3.22, 2.04, 5.49, 9.24, 6.38, 7.79, 0.48, 8.86]) y = np.array([5.53, 3.48, 3.16, 2.98, 7.11, 7.75, 3.37, 8.24, 3.00, 3.75]) xbar = np.mean(x); xbar ybar = np.mean(y); ybar r = np.sum((x-xbar)*(y-ybar))/np.sqrt(np.sum((x-xbar)**2)*np.sum((y-ybar)**2)); r np.corrcoef(x,y) stats.pearsonr(x,y) Rx = stats.rankdata(x) Ry = stats.rankdata(y) Rbar = np.mean(Rx); Rbar np.mean(Ry) rho = np.sum((Rx-Rbar)*(Ry-Rbar))/np.sqrt(np.sum((Rx-Rbar)**2)*np.sum((Ry-Rbar)**2)); rho np.sum((Rx-Rbar)**2) np.sum((Ry-Rbar)**2) n = len(x) (n*(n**2-1))/12. 1 - 6/(n*(n**2-1)) * np.sum((Rx-Ry)**2) np.corrcoef(Rx,Ry) stats.spearmanr(x,y) from scipy.special import factorial factorial(10) ranks = 1+np.arange(n) import itertools sumsq = np.array([np.sum((ranks-perm)**2) for perm in itertools.permutations(ranks)], dtype=int) nullrho = 1. - 6./(n*(n**2-1.)) * sumsq 2.*np.mean(nullrho >= rho) np.mean(nullrho) np.std(nullrho) 1./np.sqrt(n-1) 2*stats.norm.sf(rho*np.sqrt(n-1)) hist(nullrho,bins=100,normed=True); xlabel(r'$\rho$'); ylabel('Null pdf') savefig('notes05_rhohist.eps',bbox_inches='tight'); x[:2],y[:2] (x[0]>x[1]),(y[0]>y[1]) True^True not(True^True) not(True^False) not(False^True) not(False^False) not((x[0]>x[1])^(y[0]>y[1])) Nc = np.sum([not((x[i]>x[j])^(y[i]>y[j])) for (i,j) in itertools.combinations(xrange(n),2)]); Nc Nd = np.sum([not((x[i]>x[j])^(y[i]Rx[j])^(Ry[i]>Ry[j])) for (i,j) in itertools.combinations(xrange(n),2)]) np.sum([not((Rx[i]>Rx[j])^(Ry[i]ranks[j])^(perm[i]>perm[j])) for (i,j) in itertools.combinations(xrange(n),2)]) for perm in itertools.permutations(ranks)]) Npairs = n*(n-1)//2; Npairs Nddist = Npairs - Ncdist figure(); hist(Ncdist,bins=np.arange(Npairs+1)) xlabel(r'$N_c$'); ylabel('#'); title('# of concordant pairs (null dist)') savefig('notes05_tauhist.eps',bbox_inches='tight'); taudist = (Ncdist-Nddist)/Npairs min(taudist),max(taudist) np.mean(taudist) np.var(taudist) 2*(2*n+5)/(9*n*(n-1)) sigtau = np.sqrt(2*(2*n+5)/(9*n*(n-1))); sigtau 2*np.mean(Ncdist>=Nc) 2*np.mean(taudist>=tau) 2*stats.norm.sf(tau/sigtau) from __future__ import division import numpy as np from scipy import stats Xij = np.array([[ 2. , 19.86, 9.17], [ 1.05, 3.1 , 3.34], [ 0.14, 25.4 , 26.59], [ 14.6 , 3.93, 10.95]]) b,k = np.shape(Xij) Rij = stats.mstats.rankdata(Xij,axis=-1); Rij Rj = np.sum(Rij,axis=0); Rj T1 = (12/(b*k*(k+1)))*np.sum((Rj-0.5*b*(k+1))**2); T1 stats.chi2(df=k-1).sf(T1) stats.friedmanchisquare(Xij[:,0],Xij[:,1],Xij[:,2]) T2 = ((b-1)*T1)/(b*(k-1)-T1); T2 stats.f(k-1,(b-1)*(k-1)).sf(T2) Mi = np.max(Xij,axis=-1)-np.min(Xij,axis=-1); Mi Qi = stats.rankdata(Mi); Qi Sij = Qi[:,None]*(Rij-0.5*(k+1)); Sij Sj = np.sum(Sij,axis=0); Sj B = np.sum(Sj**2)/b; B A2 = np.sum(Sij**2); A2 b*(b+1)*(2*b+1)*k*(k**2-1)/72 T3 = (b-1)*B/(A2-B); T3 stats.f(k-1,(b-1)*(k-1)).sf(T3) import itertools perms = [perm for perm in itertools.permutations(1+np.arange(k))]; perms