###FUNCTIONS #1. pttls (transliterated from Tapio Schneider) #2. anom - used on many occasions #pttls is used in regem_pttls with call: # test=pttls( svd.ant$v,svd.ant$d ,colA=kavlr[[j]],colB=kmisr[[j]],r=regpar) pttls= function ( V, d, colA, colB, r ) { #d here is eigenvalue NOT squared eigenvalue (Schneider) (na=nrow(V)) #23 (ma = ncol(V)) #23 # ma must exceed nd (nd = length(d)) ; #23 n = length(colA); # 23 number of columns of A (number of variables) k = length(colB); # 82 number of right-hand sides nr = length(r); #1: nr appears to be only 1 in relevant cases # initialize output variables Xr = array(0, dim=c(n,k, nr));dim(Xr) # 23 82 1 #assume k = 1 Sr = array(0,dim=c(k, k,nr));dim(Sr) # 82 82 1 #assume k =1 3D in Schneider rho = array(0,dim=c(nr,1)); dim(rho) # 1 1 if (nargout > 2) eta = array(0,dim=c(nr,1)); dim(eta)# 1 1 if (nargout==4) # compute a separate solution for each r # extra since only one r for (ir in 1:nr) { #nr rc = r[ir]; #3 V11 = V[colA, 1:rc];# 23 3 if(!(length(colB)==1)) V21 = as.matrix(V[colB, 1:rc]) else V21 = t(V[colB, 1:rc]); # 82 3 Xr[,,ir] = V11 %*% solve( t(V11) %*% V11) %*% t(V21) # dim(Xr[,,ir]) #23 82 # estimated covariance matrix of residuals dB0'*dB0 (up to a scaling factor) if(!(length(colB)==1)) V22 = as.matrix(V[colB, (rc+1):na]) else V22 = t(V[colB, (rc+1):na]) ; # 82 3 #V22 = V[colB, (rc+1):nd] #102 82 Sr[,,ir] = V22 %*% diag( d[(rc+1):nd]^2) %*% t(V22) # 1,] 0 0 4.146356 0 0 # alternative formula when all left singular vectors are given (not used) rho[ir] = sqrt(sum(d[(rc+1):nd]^2)); # 8.4563 # residual norm = norm( [dA0 dB0], 'fro') eta[ir] = sqrt( ssq(Xr[,,ir]) ) # 0.5372863 #implements eta[ir] = norm(Xr[,ir], 'fro'); # solution norm = norm( Xr, 'fro') } # ir loop pttls=list(Xr=Xr,Sr=Sr,rho=rho,eta=eta) pttls } #function ##emulation of Schneider PTTLS RegEM regem_pttls=function(X,maxiter=30,tol=.01,regpar=3,method="default",startmethod="zero") { regem=list() ## initial infill n=nrow(X);p=ncol(X) indmis=is.na(X); nmis=sum(indmis) #39551 kavlr=rep(list(NA),n) kmisr=rep(list(NA),n) for( j in 1:n) { kavlr[[j]]= (1:p)[!indmis[j,]]; kmisr[[j]]= (1:p)[indmis[j,]]} #this sets up missing values on line by line basis (dofC=n-1) #599 (trunc=min( regpar,n-1,p)) #3 (dofS=dofC-trunc) #596 if(startmethod=="zero") { X=scale(X,scale=FALSE) #center first M= attributes(X)$"scaled:center"; #M[1:5] X[indmis]=0 #zero page 6 } #startmethod if(startmethod=="average") { X=scale(X,scale=FALSE) #center first M= attributes(X)$"scaled:center"; #M[1:5] X[indmis]= array( rep(m0,p),dim=c(n,p)) [indmis] } #startmethod iter=0 dXmis=10^6 while (iter <=maxiter & dXmis>tol) { iter=iter+1 peff_ave=0 C=t(X)%*%X/dofC #this is the same as cov(X) #range( (t(X)%*%X)/dofC - cov(X)) # -7.105427e-15 1.243450e-14 CovRes=array(0,dim=c(p,p)) D=sqrt(diag(C)) ;#D[1:5]# also sd #range( diag(1/D)%*%C%*% diag(1/D) - cor(X) ) # -1.776357e-15 2.553513e-15 X=X%*%diag(1/D) #standardize to sd=1 C=diag(1/D)%*%C%*% diag(1/D) #range(C-cor(X)) # -1.776357e-15 2.553513e-15 svd.ant=svd(C) p_eff=trunc neigs=ncol(svd.ant$v) #svd.ant$d[1:5] # 34.816604 17.551981 13.261866 2.544238 1.972014 Xmis=array(0,dim=c(n,p)) #placeholders Xerr=array(0,dim=c(n,p)) B=list() S=list() for(j in 1:n) { test=pttls( svd.ant$v,svd.ant$d[1:neigs] ,colA=kavlr[[j]],colB=kmisr[[j]],r=regpar) B[[j]]=test$Xr[,,1]; #dim(B) #23 82 row.names(B[[j]])=kavlr[[j]]; dimnames(B[[j]])[[2]]=kmisr[[j]] S[[j]]=test$Sr[,,1]; #dim(S) #82 82 Xerr[j,kmisr[[j]] ]=dofC/dofS * t (sqrt(diag(S[[j]]))) Xmis[j,kmisr[[j]] ]= X[j,kavlr[[j]]] %*%B[[j]] CovRes[ kmisr[[j]] , kmisr[[j]] ]= CovRes[ kmisr[[j]] , kmisr[[j]] ]+S[[j]] } ##rescale to original scaling # X first step X=X1a=scale(X,center=FALSE,scale=1/D) # Xmis Xmis=scale(Xmis,center=FALSE,scale=1/D) #range(Xmis[,36]-jeff$Xmis2[,36]) # -2.336856e-05 1.459296e-05 # Xerr Xerr=scale(Xerr,center=FALSE,scale=1/D) # C, CovRes # range(cov(X)-diag(D)%*%C%*% diag(D) ) # -1.154632e-14 7.993606e-15 C= diag(D)%*%C%*% diag(D) #rescale to covariance CovRes=diag(D)%*% CovRes %*% diag(D) #rescale # dXmis dXmis=sqrt( ssq( Xmis[indmis]-X[indmis]))/sqrt(nmis) #c(dXmis,jeff$dXmis[iter,3]) # 0.4673552 0.4674000 # X second part X[indmis]=Xmis[indmis] #update data X=scale(X,scale=FALSE) # #recenter Mup=attributes(X)$"scaled:center";#round(Mup,3)[1:7] M=M+Mup ; #round(M,3)[1:7]#updated mean vector #j=36;plot(X[,j]-jeff$X2[,j],type="h",col=col0,ylim=c(-.5,.5)); # C second part C=t(X)%*%X/dofC #cov(X) # range(C-jeff$C2) # -0.0004164527 0.0004466800 #summarize iteration comparisons # range(X-jeff$X2) # -0.0005188823 0.0006259449 # range(C-jeff$C2) # -0.0004164527 0.0004466800 # range(Xmis-jeff$Xmis2) # -9.410853e-05 1.098465e-04 # dXmis-jeff$dXmis[iter,3] # -4.476494e-05 if(method=="verbose") regem[[iter]]=list(X=X,C=C,B=B,S=S,dXmis=dXmis) else regem[[iter]]=list(X=X,dXmis=dXmis) } #iter N=length(regem) regem[[N]]$B=B;regem[[N]]$Xmis=Xmis;regem[[N]]$M=M;regem[[N]]$C= C regem_pttls=regem regem_pttls } #function anom= function(x,reference=1961:1990,M=15) { tsp0=floor(tsp(x)) month= 1+round(12* c(time(x))%%1) temp=!is.na(match(reference,tsp0[1]:tsp0[2])) if(sum(temp)tol) { iter=iter+1 peff_ave=0 C=t(X)%*%X/dofC #this is the same as cov(X) #range( (t(X)%*%X)/dofC - cov(X)) # -7.105427e-15 1.243450e-14 CovRes=array(0,dim=c(p,p)) D=sqrt(diag(C)) ;#D[1:5]# also sd #range( diag(1/D)%*%C%*% diag(1/D) - cor(X) ) # -1.776357e-15 2.553513e-15 X=X%*%diag(1/D) #standardize to sd=1 C=diag(1/D)%*%C%*% diag(1/D) #range(C-cor(X)) # -1.776357e-15 2.553513e-15 svd.ant= svd(X,nu=regpar,nv=regpar) if(regpar>1) Xmis= svd.ant$u %*% diag(svd.ant$d[1:regpar]) %*% t(svd.ant$v) if(regpar==1) Xmis= as.matrix(svd.ant$u) %*% as.matrix(svd.ant$d[1]) %*% t(svd.ant$v) ##rescale to original scaling # X first step X=X1a=scale(X,center=FALSE,scale=1/D) # Xmis Xmis=scale(Xmis,center=FALSE,scale=1/D) #range(Xmis[,36]-jeff$Xmis2[,36]) # -2.336856e-05 1.459296e-05 # C, CovRes # range(cov(X)-diag(D)%*%C%*% diag(D) ) # -1.154632e-14 7.993606e-15 C= diag(D)%*%C%*% diag(D) #rescale to covariance CovRes=diag(D)%*% CovRes %*% diag(D) #rescale # dXmis dXmis=sqrt( ssq( Xmis[indmis]-X[indmis]))/sqrt(nmis) #c(dXmis,jeff$dXmis[iter,3]) # 0.4673552 0.4674000 # X second part X[indmis]=Xmis[indmis] #update data X=scale(X,scale=FALSE) # #recenter Mup=attributes(X)$"scaled:center";#round(Mup,3)[1:7] M=M+Mup ; #round(M,3)[1:7]#updated mean vector #j=36;plot(X[,j]-jeff$X2[,j],type="h",col=col0,ylim=c(-.5,.5)); # C second part C=t(X)%*%X/dofC #cov(X) # range(C-jeff$C2) # -0.0004164527 0.0004466800 #summarize iteration comparisons # range(X-jeff$X2) # -0.0005188823 0.0006259449 # range(C-jeff$C2) # -0.0004164527 0.0004466800 # range(Xmis-jeff$Xmis2) # -9.410853e-05 1.098465e-04 # dXmis-jeff$dXmis[iter,3] # -4.476494e-05 if(method=="verbose") regem[[iter]]=list(X=X,C=C,B=B,S=S,dXmis=dXmis) else regem[[iter]]=list(X=X,dXmis=dXmis) } #iter N=length(regem) regem[[N]]$svd=svd.ant;regem[[N]]$Xmis=Xmis regem_pca=regem regem_pca } #function