##################### ###THIS IS SCRIPT TO PRODUCE SIMULATIONS, FIGURES AND STATISTICS FOR # Stephen McIntyre and Ross McKitrick # Reply to Huybers ################################################# #PRE REQUISITES: # package sandwich is used in hac-standardized calculation but this affects only one series not used in rest of calcs ############################################### ############################################### ### ## PART 1 # ##FUNCTIONS REQUIRED FOR NARRATIVE SCRIPT TO PRODUCE SIMULATIONS, FIGURES AND STATISTICS FOR # GRL021750 # Reply to Huybers # Stephen McIntyre and Ross McKitrick # ################################################ #LOAD FUNCTIONS AND PACKAGES source("http://www.climateaudit.org/scripts/huybers/functions.txt") ##LOAD COLLATED INFO FROM MBH98 mbhdata="http://www.climateaudit.org/data/mbh98" use0="pairwise.complete.obs" #sparse series url<-"ftp://ftp.ncdc.noaa.gov/pub/data/paleo/paleocean/by_contributor/mann1998/nhem-sparse.dat" g<-read.table(url,skip=1) sparse<-ts(g[,2:3],start=g[1,1],end=g[nrow(g),1]) dimnames(sparse)[[2]]<-c("instr","recon") sparse<-sparse[,1]-mean(sparse[(1902-1853):(1980-1853),1]) T<-sparse calibration<-c(1902,1980) verification<-c(1854,1901) #temperature pcs download.file(file.path(mbhdata,"pc.tab"),"temp.dat",mode="wb");load("temp.dat") ##pc dim(pc) #[1] 92 16 #from 1902 -1993 # temperature eofs used in reconstruction download.file(file.path(mbhdata,"eof.ordermann.tab"),"temp.dat",mode="wb");load("temp.dat") ##pc eof.MBH<-eof;dim(eof) #1082 16 # temperature eigen-values and choose first 16 as only 16 PCs are used download.file(file.path(mbhdata,"tpca-eigenvals.tab"),"temp.dat",mode="wb");load("temp.dat") ##pc lambda<-lambda[1:16,2] lambda.MBH<-lambda #standard deviations download.file(file.path(mbhdata,"gridcell.stdev.ordermann.tab"),"temp.dat",mode="wb");load("temp.dat") ##pc length(gridcell.stdev.ordermann) #1082 #same as mann.scale ##GRIDCELL INFORMATION # MBH uses 1082 gridcells in their "dense" reconstruction and 219 gridcells in their "sparse" reconstruction specified here # load list of MBH "dense" grid-cells download.file(file.path(mbhdata,"gridpoints.tab"),"temp.dat",mode="wb");load("temp.dat") ##pc tempmann<-sort(grid2[,5]) manngrid<-!is.na(match(c(1:2592),tempmann)) ordermann<-order(grid2[,5]) grid2<-grid2[ordermann,] nhtemp<-(grid2[,4]>0) #logical vector for NH gridcells mu<-cos(grid2[,4]*pi/180) #used for areal weighting mu.MBH<-mu # mask for sparse subset of 219 cells #mask nowhere specified but determiend by careful inspection of diagram download.file(file.path(mbhdata,"mask.edited.tab"),"temp.dat",mode="wb");load("temp.dat") ##pc mask.logical<-!is.na(match(grid2[,5],mask.edited)) #sum(mask.logical) #219 #length(mask.logical) ##Original proxy url="http://www.nature.com/nature/journal/v430/n6995/extref/PROXY/data1400.txt" test=read.table(url) #581 23 test=ts(test[,2:23],start=1400);N=nrow(test) test=extend.persist(test) m0=apply(test[(N-78):N,],2,mean);sd0=apply(test[(N-78):N,],2,sd) proxy.MBH=ts(scale(test,center=m0,scale=sd0),start=1400) #Gaspe is series 9; PC1 is series 16 ##NOAMER NETWORK tree<-read.table("http://www.climateaudit.org/data/mbh98/UVA/NOAMER.dat",header=TRUE,sep="\t") tree<-ts( tree[,2:ncol(tree)],start=tree[1,1],end=tree[nrow(tree),1]) tree<-ts( tree[1:581,],start=1400,end=1980) tree<-extend.persist(tree) tree0<-tree dim(tree0) #581 212 tree<-tree[,!is.na(tree[1,])] name1<-dimnames(tree)[[2]] dim(tree)#581 70 name0<-unlist(strsplit(as.character(name1), "\\.")) name0<-array(name0,dim=c(2,length(name0)/2))[1,] name0[67]<-substr(name0[67],1,5) #edits wy005b to wy005 #IDENTIFY CENSORED SERIES IN MBH98 BACKTO_1400-CENSORED CALCULATION url<-"http://www.climateaudit.org/data/mbh98/UVA/TREE/ITRDB/NOAMER" roster<-read.table(file.path(url,"noamer-censored.inf"))#192 roster2<-read.table(file.path(url,"noamer.inf"))#192 series censored.list<-roster2[,2][is.na(match(roster2[,2],roster[,2]))] censored.list<-as.character(censored.list) length(censored.list) #20 temp1<-!is.na(match(name1,censored.list)) #20 items ################# #SOME NOAMER NETWORK STATISTICS ################# ##NUMBER OF SITES BY TREE TYPE AND GRIDCELL download.file("http://www.climateaudit.org/data/tree/northamerica.details.tab","temp.dat",mode="wb");load("temp.dat") dim(details) #3457 9 temp<-!is.na(match(details$id,name0));sum(temp) #70 test<-jones(details$lat[temp],details$long[temp]) tapply(test,factor(test),length) #590 660 661 662 663 664 732 733 734 735 736 738 741 807 810 812 813 # 1 5 4 6 4 1 1 13 9 12 2 2 1 2 3 3 1 tapply(details$type[temp],factor(details$type[temp]),length) #JUOC JUSC PCEN PIAR PIBA PIED PIFL PIJE PILO PIMO PIPO PISF PSME TADI # 6 1 2 5 6 2 7 1 13 3 3 1 10 10 ################################# ## REPLY TO HUYBERS FIGURE 1: DIFFERENT PC ANALYSES WITH VARIOUS SCALING METHODS #################################### pc1=ts(array(NA,dim=c(581,9)),start=1400) dimnames(pc1)[[2]]=c("MBH","cov","cor","hac","censored","mean","mean1400","nobristle","area") #1. MANNOMATIC pc1[,"MBH"]=proxy.MBH[,16] #5. CENSORED censored.PC1<-read.table("http://www.climateaudit.org/data/mbh98/UVA/TREE/ITRDB/NOAMER/BACKTO_1400-CENSORED/pc01.out") pc1[,"censored"]= scale(censored.PC1[,2]) #2. Covariance pc1[,"cov"]=scale(svd( scale(tree,center=TRUE,scale=FALSE))$u[,1]) #3. Correlation pc1[,"cor"]=scale(svd( scale(tree))$u[,1]) #4. HAC (Heteroskedasticity-Autocorrelation) CALCULATIONS library(sandwich) hac.sd<-apply(tree,2,hac) pc1[,"hac"]= scale( svd ( scale(tree,scale=hac.sd) )$u[,1]) #6. AD1400 Network Mean pc1[,"mean1400"]= scale(apply(tree,1,mean)) #7. Full Network MEan pc1[,"mean"]= scale(apply(ts(scale(tree0),start=1400),1,mean,na.rm=TRUE) ) #8. no bristle pc1[,"nobristle"]=scale(svd( scale(tree[,!temp1]))$u[,1]) #AREA WEIGHTED test<-jones(details$lat[temp],details$long[temp]) fred<-details[temp,c(1,3,4,5)] fred$jones<-factor(test) fred$count<-fred$jones levels(fred$count)<-c(tapply(test,factor(test),length)) fred$weight<-1/as.numeric(levels(fred$count))[fred$count] tree.areaweighted<-scale(scale(tree),scale=1/sqrt(fred$weight),center=FALSE) fred$speciescount<-factor(fred$type) levels(fred$speciescount)<-tapply(fred$speciescount,fred$speciescount,length) fred$speciescount<-as.numeric(levels(fred$speciescount))[fred$speciescount] pca.areaweighted<-prcomp(tree.areaweighted) #ts.plot(scale(pca.areaweighted$x[,1]) ) #not plotted, but similar to covariance ### NOW PLOT FIGURE # postscript(file.path("d:/climate/data/huybers","figure1.amended2.eps"), width = 3.3, height =4.2, # horizontal = FALSE, onefile = TRUE, paper = "special", # family = "Helvetica",bg="white",pointsize=8) nf <- layout(array(1:8,dim=c(4,2)),heights=c(1.2,1,1,1.4)) #MBH98 PC1 par(mar=c(0,4,2,0)) #MBH98 plot(1400:1980,pc1[,1],type="h",ylab="",axes=FALSE,xlab="")#,ylim=c(-6.1,4)) abline(h=0,lty=2) axis(side=2,las=2,cex.axis=1.25,font=2)#,at=seq(-3,1),labels=c("","-2","","0","")) axis(side=1,labels=FALSE) box() text(1400,2,cex=1.4,"a",pos=4,font=2) ##covariance PC1 par(mar=c(0,4,0,0)) plot(1400:1980,pc1[,"cov"],axes=FALSE,type="h",ylab="",xlab="",font.lab=2,cex.lab=1.25,ylim=c(-3.0,3.0)) axis(side=2,at=seq(-6,2,2),las=2,cex.axis=1.25,font=2) abline(h=0,lty=2) axis(side=1,labels=FALSE) box() text(1400,2.5,cex=1.4,"b",pos=4,font=2) ##correlation PC1 par(mar=c(0,4,0,0)) plot( 1400:1980,pc1[,"cor"],axes=FALSE,type="h",ylab="",xlab="",font.lab=2,cex.lab=1.25,ylim=c(-3.0,3.0)) axis(side=2,at=seq(-6,2,2),las=2,cex.axis=1.25,font=2) abline(h=0,lty=2) axis(side=1,labels=FALSE) box() text(1400,2.5,cex=1.4,"c",pos=4,font=2) ##hac PC1 par(mar=c(4,4,0,0)) plot( 1400:1980,pc1[,"hac"],axes=FALSE,type="h",ylab="",xlab="",font.lab=2,cex.lab=1.25,ylim=c(-3.0,3.0)) axis(side=2,at=seq(-6,2,2),las=2,cex.axis=1.25,font=2) abline(h=0,lty=2) axis(side=1,cex.axis=1.25,at=seq(1400,2000,100),labels=c("","1500","","1700","","1900",""),font=2) box() text(1400,2.5,cex=1.4,"d",pos=4,font=2) ##CENSORED PC1 par(mar=c(0,0,2,3)) plot( 1400:1980,pc1[,"censored"],axes=FALSE,type="h",ylab="",xlab="",font.lab=2,cex.lab=1.25,ylim=c(-3.0,3.0)) axis(side=4,at=seq(-6,2,2),las=2,cex.axis=1.25,font=2) abline(h=0,lty=2) axis(side=1,labels=FALSE) box() text(1400,2.5,cex=1.4,"e",pos=4,font=2) ##network mean par(mar=c(0,0,0,3)) plot( 1400:1980,pc1[,"mean"],axes=FALSE,type="h",ylab="SD Units",xlab="",font.lab=2,cex.lab=1.25,ylim=c(-3.0,3.0)) axis(side=4,at=seq(-6,2,2),las=2,cex.axis=1.25,font=2) abline(h=0,lty=2) axis(side=1,labels=FALSE) box() text(1400,2.5,cex=1.4,"f",pos=4,font=2) ##AD1400 subset mean par(mar=c(0,0,0,3)) plot( 1400:1980,pc1[,"mean1400"],axes=FALSE,type="h",ylab="SD Units",xlab="",font.lab=2,cex.lab=1.25,ylim=c(-3.0,3.0)) axis(side=4,at=seq(-6,2,2),las=2,cex.axis=1.25,font=2) abline(h=0,lty=2) axis(side=1,labels=FALSE) box() text(1400,2.5,cex=1.4,"g",pos=4,font=2) ##no bristlecone par(mar=c(4,0,0,3)) plot(1400:1980,pc1[,"nobristle"],axes=FALSE,type="h",ylab="SD Units",xlab="",font.lab=2,cex.lab=1.25,ylim=c(-3.0,3.0)) axis(side=4,at=seq(-6,2,2),las=2,cex.axis=1.25,font=2) abline(h=0,lty=2) axis(side=1,cex.axis=1.25,at=seq(1400,2000,100),labels=c("","1500","","1700","","1900",""),font=2) box() text(1400,2.5,cex=1.4,"h",pos=4,font=2) # dev.off() ###################################################### ### ## PART 2 ### ### BENCHMARKING THE REDUCTION OF ERROR STATISTIC FOR THE MBH98 ALGORITHM ####%%%%######################################################### ##LOAD SIMULATED PC1s from MM2005 download.file("http://www.climateaudit.org/data/mm05/sim.1.tab","temp.dat",mode="wb") load("temp.dat") #this is object named Eigen0 that as third item has 1000 simulated PC1s #this is a roster of simulated PC1s #Eigen0 dim(Eigen0[[3]]) #581 1000 ##NOMERNCLATURE OF MBH98 DATA FOR SIMULATIONS selection=1 #only one PC retained in AD1400 network U<-as.matrix(pc[1:79,selection]) Ut<-t(U) G<- t( rep(NA,22)) P<-diag (rep(1,22)) sd.obs<-sd(pc[1:79,1]) sd.rescaling=gridcell.stdev.ordermann beta<- lambda[1] * t(eof) [1,mask.logical] %*% (gridcell.stdev.ordermann[mask.logical]*sd.rescaling[mask.logical])/sum(mu[mask.logical]) beta# 5.207708 ##PLACEMARK FOR SIMULATION K<-1000 ## used in actual simulation K=100 for brevity here stat<-data.frame(array(rep(NA),dim=c(K,10))) names(stat)[1:7]<-c( "RE.cal","RE.ver","R2.cal","R2.ver","CE","sign.test","prod.test") #postscript: much of the linear algebra cancels and I would do this much more simply now; #this code reconciles to Wahl and Ammann code apples and apples - see climateaudit.org May 2005 for (i in 1:K) { proxy<-ts(array(rnorm(22*581),dim=c(581,22)),start=1400,end=1980) #network of 22 series of white noise of length 581 proxy[,1]<-Eigen0[[3]][,i] #simulated HS series from MM2005 for (j in 1:22) { proxy[,j]<-normalize(proxy[,j],1902,1980)} B<-proxy[503:581,] G<- as.matrix( solve(Ut %*% U ) %*% Ut %*% B ); #this is array of regression coefficients in calibration C <- P %*% ( t(G %*% P) %*% solve ((G %*% P) %*% t((G %*%P))) ) #calculate reconstructed temperature principal components #this is somewhat redundant H<- proxy %*% C sd.est<-sd(H[503:581]) #adjusted May 2005 H<-H*sd.obs/sd.est #V<- (H * lambda[1]) %*% t(eof) [1,] #dimension 112x1082 #allows for vector form #sparse.fitted<- V[,mask.logical] %*% (gridcell.stdev.ordermann[mask.logical]*sd.rescaling[mask.logical])/sum(mu[mask.logical]) #sparse.fitted<-ts(sparse.fitted,start=1400,end=1980) sparse.fitted<-ts(H*c(beta),start=1400,end=1980) #replces above 3 lines as algebra cancels stat[i,1:7]<-verification.stats(sparse.fitted,T,calibration,verification) } ##COLLECT QUANTILE RESULTS f=function(x) quantile(x,c(.99,.95,.9,.5)) sim.results=apply(stat[,1:7],2,f) round(sim.results,3) ##RE-RUN RESULTS # RE.cal RE.ver R2.cal R2.ver CE sign.test prod.test #99% 0.309 0.541 0.316 0.132 -0.077 32 2.296 #95% 0.239 0.470 0.239 0.085 -0.242 29 1.682 #90% 0.200 0.429 0.201 0.055 -0.340 28 1.333 #50% 0.060 -0.069 0.092 0.010 -1.506 23 -0.064 ##ORIGINAL RESULTS # RE.cal RE.ver R2.cal R2.ver CE sign.test prod.test #99% NA 0.542 NA 0.115 -0.096 0.667 2.469 #95% NA 0.454 NA 0.073 -0.280 0.625 1.698 #90% NA 0.397 NA 0.057 -0.413 0.583 1.361 #50% NA -0.074 NA 0.010 -1.517 0.479 0.029 ################################## ###ADDITIONAL CALCULATIONS AT CLIMATEAUDIT ################################ #A. CALCULATE RE AND OTHER STATS WITH TECH STOCK PC1 PLUS WHITE NOISE #obtain PC1 from Tech stock prices price<-read.table("http://www.climateaudit.org/data/mm05/technology.stock.prices.txt",skip=1,header=TRUE,fill=TRUE,sep="\t") dim(price)# [1] 2259 159 id<-scan("d:/climate/data/ritson/technology.stock.prices.txt",n=20,what="") name0<-c("date","open","high","low","close","volume","adj.close","X") test <-c(t( outer(id,name0, function(x,y) paste(x,y,sep=".") ) )) names(price)<-test[1:(length(test)-1)] price<-price[nrow(price):1,c(1, seq(5,8*19+5,8))] test<-price[1410:2000,2:ncol(price)] test<-ts(test[1:581,!is.na(test[100,])],start=1400) pca.mannomatic.tech<-prcomp(mannomatic(test) ,center=FALSE) #do MBH-style recons making up balance nework with white noise K<-100 stat<-data.frame(array(rep(NA),dim=c(K,7))) names(stat)[1:7]<-c( "RE.cal","RE.ver","R2.cal","R2.ver","CE","sign.test","prod.test") for (i in 1:K) { proxy<-ts(array(rnorm(22*581),dim=c(581,22)),start=1400,end=1980) proxy[,1]<-pca.mannomatic.tech$x[,1] for (j in 1:22) { proxy[,j]<-normalize(proxy[,j],1902,1980)} B<-proxy[503:581,] G<- as.matrix( solve(Ut %*% U ) %*% Ut %*% B ); #this is array of regression coefficients in calibration C <- P %*% ( t(G %*% P) %*% solve ((G %*% P) %*% t((G %*%P))) ) H<- proxy %*% C sd.est<-sd(H[503:581]) #adjusted May 2005 H<-H*sd.obs/sd.est sparse.fitted<-ts(H*c(beta),start=1400,end=1980) #replces above 3 lines as algebra cancels stat[i,]<-verification.stats(sparse.fitted,T,c(1902,1980),c(1854,1901)) } f=function(x) quantile(x,c(.99,.95,.9,.5)) sim.results.tech=apply(stat,2,f) round(sim.results.tech,3) #RE-RUN # RE.cal RE.ver R2.cal R2.ver CE sign.test prod.test #99% 0.300 0.509 0.305 0.117 -0.151 31.03 2.449 #95% 0.264 0.460 0.266 0.094 -0.267 29.00 1.981 #90% 0.257 0.419 0.258 0.067 -0.361 29.00 1.636 #50% 0.186 0.328 0.188 0.015 -0.575 24.00 -0.089 ############## #B. DO WITH MBH PC1/GASPE BLEND AND BALANCE WHITE NOISE x=apply(proxy.MBH[,c(9,16)],1,mean) K<-100 stat<-data.frame(array(rep(NA),dim=c(K,7))) names(stat)[1:7]<-c( "RE.cal","RE.ver","R2.cal","R2.ver","CE","sign.test","prod.test") for (i in 1:K) { proxy<-ts(array(rnorm(22*581),dim=c(581,22)),start=1400,end=1980) proxy[,1]<-x for (j in 1:22) { proxy[,j]<-normalize(proxy[,j],1902,1980)} B<-proxy[503:581,] G<- as.matrix( solve(Ut %*% U ) %*% Ut %*% B ); #this is array of regression coefficients in calibration C <- P %*% ( t(G %*% P) %*% solve ((G %*% P) %*% t((G %*%P))) ) H<- proxy %*% C sd.est<-sd(H[503:581]) #adjusted May 2005 H<-H*sd.obs/sd.est sparse.fitted<-ts(H*c(beta),start=1400,end=1980) #replces above 3 lines as algebra cancels stat[i,]<-verification.stats(sparse.fitted,T,c(1902,1980),c(1854,1901)) } sim.results.wn=apply(stat,2, function(x) quantile(x,c(.99,.95,.9,.5))) round(sim.results.wn,3) #RERUN # RE.cal RE.ver R2.cal R2.ver CE sign.test prod.test #99% 0.213 0.586 0.213 0.117 0.030 30.01 2.698 #95% 0.200 0.575 0.201 0.093 0.005 28.00 2.341 #90% 0.188 0.559 0.190 0.081 -0.033 27.10 1.938 #50% 0.127 0.501 0.139 0.026 -0.171 23.00 1.147 ############## #B. DO WITH BALANCE PROXIES ONLY proxy<-proxy.MBH[,c(1:8,10:15,17:22)] P=diag(rep(1,ncol(proxy))) for (j in 1:ncol(proxy)) { proxy[,j]<-normalize(proxy[,j],1902,1980)} B<-proxy[503:581,] G<- as.matrix( solve(Ut %*% U ) %*% Ut %*% B ); #this is array of regression coefficients in calibration C <- P %*% ( t(G %*% P) %*% solve ((G %*% P) %*% t((G %*%P))) ) H<- proxy %*% C sd.est<-sd(H[503:581]) #adjusted May 2005 H<-H*sd.obs/sd.est sparse.fitted<-ts(H*c(beta),start=1400,end=1980) #replces above 3 lines as algebra cancels verification.stats(sparse.fitted,T,c(1902,1980),c(1854,1901)) # RE.cal RE.ver R2.cal R2.ver CE sign.test prod.test #1 0.1221319 -0.1300386 0.1347229 5.612416e-05 -1.648948 26 1.165787