# this script reconciles R version shown here line by line to Matlab intermediates run by Jeff Id # the script is more or less a transliteration of Tapio Schneider with no attempt yet at simplification # see #http://www.gps.caltech.edu/~tapio/imputation/regem.m pttls.m # companion script puts these into an iteration, but shown slowly here for reference ###FUNCTIONS #1. pttls (transliterated from Tapio Schneider) #2. anom - used on many occasions pttls= function ( V, d, colA, colB, r ) { #d here is eigenvalue NOT squared eigenvalue (Schneider) (na=nrow(V)) #23 (ma = ncol(V)) #23 # ma must exceed nd (nd = length(d)) ; #23 n = length(colA); # 23 number of columns of A (number of variables) k = length(colB); # 82 number of right-hand sides nr = length(r); #1: nr appears to be only 1 in relevant cases # initialize output variables Xr = array(0, dim=c(n,k, nr));dim(Xr) # 23 82 1 #assume k = 1 Sr = array(0,dim=c(k, k,nr));dim(Sr) # 82 82 1 #assume k =1 3D in Schneider rho = array(0,dim=c(nr,1)); dim(rho) # 1 1 if (nargout > 2) eta = array(0,dim=c(nr,1)); dim(eta)# 1 1 if (nargout==4) # compute a separate solution for each r # extra since only one r for (ir in 1:nr) { #nr rc = r[ir]; #3 V11 = V[colA, 1:rc];# 23 3 V21 = as.matrix(V[colB, 1:rc]); # 82 3 Xr[,,ir] = V11 %*% solve( t(V11) %*% V11) %*% t(V21) # dim(Xr[,,ir]) #23 82 # estimated covariance matrix of residuals dB0'*dB0 (up to a scaling factor) V22 = V[colB, (rc+1):na] #102 82 Sr[,,ir] = V22 %*% diag( d[(rc+1):na]^2) %*% t(V22) # 1,] 0 0 4.146356 0 0 # alternative formula when all left singular vectors are given (not used) rho[ir] = sqrt(sum(d[(rc+1):nd]^2)); # 8.4563 # residual norm = norm( [dA0 dB0], 'fro') eta[ir] = sqrt( ssq(Xr[,,ir]) ) # 0.5372863 #implements eta[ir] = norm(Xr[,ir], 'fro'); # solution norm = norm( Xr, 'fro') } # ir loop pttls=list(Xr=Xr,Sr=Sr,rho=rho,eta=eta) pttls } #function anom= function(x,reference=1961:1990,M=15) { tsp0=floor(tsp(x)) month= 1+round(12* c(time(x))%%1) temp=!is.na(match(reference,tsp0[1]:tsp0[2])) if(sum(temp)0),sum(A<0) ) ) # 760 436 test=t(test) test=ts(test,start=c(1957,1),freq=12) #GDD(file="d:/climate/images/2009/steig/positive_coefficient)step1.gif",type="gif",w=420,h=300) plot.ts(test[,1]/(test[,1]+test[,2]),type="l",ylab="") title("Positive Coefficient Proportion - Step 1") #dev.off() regem[[1]]=list(X=X,C= C,B=B,S=S,dXmis=dXmis) ##STEP TWO iter=2 peff_ave=0 CovRes=array(0,dim=c(p,p)) D=sqrt(diag(C)) ;D[1:5]# also sd # 0.3994596 1.7490152 2.1634429 0.4639402 1.8015410 range( diag(1/D)%*%C%*% diag(1/D) - cor(X) ) # -1.776357e-15 2.553513e-15 X=X%*%diag(1/D) #standardize to sd=1 C=diag(1/D)%*%C%*% diag(1/D) range(C-cor(X)) # -1.776357e-15 2.553513e-15 svd.ant=svd(C) p_eff=trunc svd.ant$d[1:5] # 34.816604 17.551981 13.261866 2.544238 1.972014 Xmis=array(0,dim=c(n,p)) #placeholders Xerr=array(0,dim=c(n,p)) B=list() S=list() for(j in 1:n) { test=pttls( svd.ant$v,svd.ant$d ,colA=kavlr[[j]],colB=kmisr[[j]],r=3) B[[j]]=test$Xr[,,1]; #dim(B) #23 82 row.names(B[[j]])=kavlr[[j]]; dimnames(B[[j]])[[2]]=kmisr[[j]] S[[j]]=test$Sr[,,1]; #dim(S) #82 82 Xerr[j,kmisr[[j]] ]=dofC/dofS * t (sqrt(diag(S[[j]]))) Xmis[j,kmisr[[j]] ]= X[j,kavlr[[j]]] %*%B[[j]] CovRes[ kmisr[[j]] , kmisr[[j]] ]= CovRes[ kmisr[[j]] , kmisr[[j]] ]+S[[j]] } ##rescale to original scaling # X first step X=X1a=scale(X,center=FALSE,scale=1/D) # not final yet # Xmis Xmis=scale(Xmis,center=FALSE,scale=1/D) plot(Xmis[,36],jeff$Xmis2[,36]);abline(0,1) #perfect match range(Xmis[,36]-jeff$Xmis2[,36]) # -2.336856e-05 1.459296e-05 # Xerr Xerr=scale(Xerr,center=FALSE,scale=1/D) # C, CovRes range(cov(X)-diag(D)%*%C%*% diag(D) ) # -1.154632e-14 7.993606e-15 C= diag(D)%*%C%*% diag(D) #rescale to covariance CovRes=diag(D)%*% CovRes %*% diag(D) #rescale # dXmis dXmis=sqrt( ssq( Xmis[indmis]-X[indmis]))/sqrt(nmis) c(dXmis,jeff$dXmis[iter,3]) # 0.4673552 0.4674000 # X second part X[indmis]=Xmis[indmis] #update data X=scale(X,scale=FALSE) # #recenter Mup=attributes(X)$"scaled:center";round(Mup,3)[1:7] # -0.004 -0.060 -0.034 -0.008 -0.049 -0.030 -0.052 M=M+Mup ;round(M,3)[1:7]#updated mean vector #-0.011 -0.026 -0.075 -0.052 -0.143 -0.086 -0.260 j=36;plot(X[,j]-jeff$X2[,j],type="h",col=col0,ylim=c(-.5,.5)); #this matches perfectly #barplot( apply(abs(X-jeff$X1),2,max),ylim=c(0,.5)) #perfect # C second part C=t(X)%*%X/dofC #cov(X) range(C-jeff$C2) # -0.0004164527 0.0004466800 #summarize iteration comparisons range(X-jeff$X2) # -0.0005188823 0.0006259449 range(C-jeff$C2) # -0.0004164527 0.0004466800 range(Xmis-jeff$Xmis2) # -9.410853e-05 1.098465e-04 dXmis-jeff$dXmis[iter,3] # -4.476494e-05 #examine coeffiecients barplot( B[[1]][,1],las=3) test=sapply(B, function(A) c(sum(A>0),sum(A<0) ) ) # 760 436 test=t(test) test=ts(test,start=c(1957,1),freq=12) GDD(file="d:/climate/images/2009/steig/positive_coefficient_step1.gif",type="gif",w=420,h=300) plot.ts(test[,1]/(test[,1]+test[,2]),type="l",ylab="") title("Positive Coefficient Proportion - Step 2") dev.off() regem[[2]]=list(X=X,C= C,B=B,S=S,dXmis=dXmis)