##FUNCTIONS USED IN HANSEN STEP 2 and DIAGNOSTICS #saved as www.climateaudit.org/scripts/station/giss/step2.functions.txt ##two-legged adjustment HAnsen style #also returns one-legged adjustment and HAnsen flags twoleg=function(x) { lshort=7;slplim=.1;slpx=.05;slpt=.02 N=length(x) stat=rep(NA,N) temp=!is.na(x) index=(1:N)[temp] fm0=lm(x[temp]~index) for (i in index[5:(length(index)-4)]) { z=data.frame(x, abs( (1:N)-i), (1:N)-i) ;names(z)=c("y","abs","x") fm=lm(y~abs+x,data=z) stat[i]=ssq(fm$residuals) } index0=(1:N)[(stat==min(stat,na.rm=TRUE))&!is.na(stat)] z=data.frame(x, abs( (1:N)-index0), (1:N)-i) ;names(z)=c("y","abs","x") fm=lm(y~abs+x,data=z) fm0=lm(y~x,data=z) sl1=coef(fm)[3]-coef(fm)[2];sl2=coef(fm)[3]+coef(fm)[2] flag=rep(NA,4) flag[1]= 1*((index0slplim)+20*(abs(sl2)>slplim) flag[3]=100*(abs(sl2-sl1)>slplim)+100*(abs(sl2-sl1)>slpx) flag[4]= 1000*( (sl1*sl2<0) & (abs(sl1)>slpt) &(abs(sl2)>slpt) ) iflag=sum(flag) #if(!(iflag==0) & !(iflag==100) ) fm$coefficients[2:3]=fm0$coefficients[2] twoleg=list(fm,index0+tsp(x)[1]-1,iflag,fm0);names(twoleg)=c("model","xmid","flag","fm0") twoleg } #Make an annual anomaly Hansen style anom=function(x,method="anom") { if( (class(x)=="ts")|(class(x)=="mts")) K=floor(tsp(x)[1]) if( is.null(dim(x))) {test=t(array(x,dim=c(12,length(x)/12)) ) if(method=="anom") apply(test[(1961:1990)-K+1,],2,mean,na.rm=TRUE) if(method=="hansen") m0=round(apply(test,2,mean,na.rm=TRUE),2) test=scale(test,center=m0,scale=FALSE) anom=ts(c(t(test)),start=c(K,1),freq=12) anom} else {anom=x; for(i in 1:ncol(x)) {test=t(array(x[,i],dim=c(12,nrow(x)/12)) ) if(method=="anom") m0=apply(test[(1961:1990)-K+1,],2,mean,na.rm=TRUE) if(method=="hansen") m0=round(apply(test,2,mean,na.rm=TRUE),2) test=round(scale(test,center=m0,scale=FALSE),2) anom[,i]=c(t(test))}} anom=list(anom,m0) anom } ### circledist #calculates great circle distance with earth radius 6378.137 km #from http://en.wikipedia.org/wiki/Great-circle_distance 2nd formula circledist =function(loc, lat,long,R=6372.795) { N=length(lat) if(N==0) circledist=NA else { pi180=pi/180; x= abs(long -loc[2]) if (N>1) delta= apply( cbind(x %%360,(360-x)%%360),1,min) *pi180 else delta= min (x %%360,(360-x)%%360) *pi180 loc=loc*pi180; lat=lat*pi180; long=long*pi180 theta= 2* asin( sqrt( sin( (lat- loc[1])/2 )^2 + cos(lat)*cos(loc[1])* (sin(delta/2))^2 )) circledist=R*theta } circledist } ###Make rural network for given id ruralstations=function(id0) { if(is.numeric(id0)) i=id0 else i= (1:7364)[!is.na(match (names(giss.dset1),id0))] temp_rur=(station_versions$urban=="R") station_versions$dist=NA lat0=stations$lat[i];long0=stations$long[i] temp1= (abs (station_versions$lat-lat0) <10) # & !is.na(station_versions$start_raw) #initial screening only by latitude: must be within 10 deg latitude to be within 1000 km test_angle= 10/sin(abs(lat0)*pi180) x= abs(station_versions$long -long0) delta= apply( cbind(x %%360,(360-x)%%360),1,min) temp2=(delta=3) ruralstations=ruralstations[temp,] ruralstations=ruralstations[order(ruralstations[,"dist"]),]#order by increasing distance ruralstations$weights= 1- ruralstations$dist/1000} ruralstations } ##HANSEN REFERENCE METHOD #do HAnsen reference on network of time series hansenref=function(rural) { K=nrow(rural) #collate rural anom anom.rural=NULL for (k in 1:K) anom.rural=ts.union(anom.rural,giss.anom[[ paste(rural$version[k]) ]]) dimnames(anom.rural)[[2]]=rural$version #cross check to binary should be OK N=nrow(anom.rural) W=t(array(rep(rural$weights,N),dim=c(length(rural$weights),N) ) ) W[is.na(anom.rural)]=NA #matrix of weights: at least 3 stations and station available long0=apply(!is.na(anom.rural),2,sum) #calculates length of the candidate rural series order1=order(long0,decreasing=TRUE);index=NULL #long0[order1] # orders by decreasing length delta=NULL fixed=NULL; available= 1:K; #start with the longest series if (max(long0)<20) reference=NA else { j0=order1[1]; reference=anom.rural[,j0] #picks longest series to insert reference.seq=reference weights1=W[,j0] fixed=j0 #sequentially adds in the rural series for (k in 2:K) { j0=order1[k] #chooses the series to be added Y=cbind(anom.rural[,j0],reference,W[,j0],weights1,reference); temp=!is.na(Y[,1])&!is.na(Y[,2]) #if(sum(temp)>=20) { bias= mean(Y[temp,1])-mean(Y[temp,2]) #in this case, there is a long separation: what happens then Y[,1]=Y[,1]-bias #these have same centers g=function(x) weighted.mean(x[1:2],x[3:4],na.rm=T) Y[,5]=apply(Y[,1:4],1,g) fixed=c(fixed,j0) reference=Y[,5] delta=c(delta,bias) reference.seq=cbind(reference.seq,reference) weights1=apply(Y[,3:4],1,sum,na.rm=T) } } count=ts(apply(!is.na(anom.rural),1,sum),start=tsp(anom.rural)[1]) hansenref=list(reference,count,anom.rural,W); names(hansenref)=c("reference","count","anom","weights") hansenref } ##FUNCTION TO EMULATE STEP 2 COMBINING #requires ... emulate_dset2=function(id0) { #collate urban target data if (is.numeric(id0) &(id0<8000)) i=id0 else i=(1:7364)[(names(giss.dset1)==id0)] dset1.monthly=ts( c(t(giss.dset1[[i]][[1]][,2:13])) ,start=c(giss.dset1[[i]][[1]][1,1],1),freq=12) dset1.annual= ts(giss.dset1[[i]][[1]][,18], start=giss.dset1[[i]][[1]][1,1]) dset1.monthly.anom=anom(dset1.monthly,method="hansen") dset1.norm=dset1.monthly.anom[[2]];dset1.monthly.anom=dset1.monthly.anom[[1]] dset1.annual.anom=hansenq(dset1.monthly.anom,method="ts")[,5] dset1.count= ts( apply(!is.na(giss.dset1[[i]][[1]][,14:17] ),1,sum,na.rm=TRUE),start=giss.dset1[[i]][[1]][1,1]) dset2.monthly=ts( c(t(giss.dset2[[i]][[1]][,2:13])) ,start=c(giss.dset2[[i]][[1]][1,1],1),freq=12) dset2.annual= ts(giss.dset2[[i]][[1]][,18], start=giss.dset2[[i]][[1]][1,1]) dset1.info=stations[i,] #collate rural station comparanda rural=ruralstations(id0) reference0=hansenref(rural) count=reference0$count reference=reference0$reference #reference Y=ts.union(dset1.annual,dset1.annual.anom,dset2.annual,dset2.annual,reference,reference,reference,reference,dset1.count,count,reference) Y=cbind(Y,c(time(Y))) dimnames(Y)[[2]]=c("dset1.book","dset1.anom","dset2.book","delta","reference","adjusted","adjustment","emulation","urban.count","dset1.count","pre_adj","year") M2=range(c(time(Y))[!is.na(Y[,"dset2.book"])],na.rm=T) Y[,c("delta","adjustment","adjusted","emulation","pre_adj")]=NA Y[,"delta"]=Y[,"dset2.book"]-Y[,"dset1.book"] #temp0=(Y[,"dset1.count"]>=3)&!is.na(Y[,"dset1.count"])&!is.na(Y[,"dset1.anom"]) temp1=(Y[,"dset1.count"]>=3)&!is.na(Y[,"dset1.count"]) temp2=!is.na(Y[,"dset1.anom"]) M0=range(time(Y)[temp1&temp2]) #range of available temp0=(time(Y)>=M0[1])&(time(Y)<=M0[2]) #seems to ignore fallback below 3 N3=sum( temp0&temp2); #number of years with both dset1 has values and count>=3 in range with count>=3 N3ext=floor(N3*1.5) temp4=(Y[,"dset1.count"]>0)&!is.na(Y[,"dset1.count"])&(Y[,"urban.count"]>0)&!is.na(Y[,"urban.count"]) M1=c(NA,max(c(time(Y))[temp4]) ) #1988 M1[1]=M1[2]-N3ext+1;M1 y=Y[,"reference"]-Y[,"dset1.anom"] adj=twoleg(ts(y[temp0],start=M0[1])) if ((adj$flag==0)|(adj$flag==100)) x=fitted(adj$model) else x= fitted(adj$fm0) #round(x,1) Y[temp0&temp2,"pre_adj"]=round(x,1) x=x-x[length(x)] Y[temp0&temp2,"adjustment"]=round(x,1) temp1A=(time(Y)>=M1[1])&!is.na(Y[,"dset1.anom"])&(time(Y)M0[2]) Y[temp1A,"adjustment"]=round(x[1],1) Y[temp1B,"adjustment"]=round(x[length(x)],1) Y[,"adjusted"]=round(Y[,"dset1.anom"]+Y[,"adjustment"],2) ##now rebuild monthly series Z=ts.union(dset1.monthly,dset1.monthly.anom,dset2.monthly,dset2.monthly,dset2.monthly,dset2.monthly,dset2.monthly) dimnames(Z)[[2]]=c("dset1.book","dset1.anom","dset2.book","adjustment","adjusted","emulation","delta") Z[,4:7]=NA temp2=(time(Y)>=tsp(Z)[1])&(time(Y)<=tsp(Z)[2]) x= c(t(array(rep(Y[temp2,"adjustment"],12),dim=c(nrow(Y[temp2,]),12)) )) Z[,"adjustment"]=c(x[2:length(x)],NA) Z[,"adjusted"]=round(Z[,"adjustment"]+Z[,"dset1.anom"],2) X= t(array(Z[,"adjusted"],dim=c(12,nrow(Z)/12) )) Z[,"emulation"]= c(t(scale(X,center= -dset1.norm,scale=FALSE) )) Z[,"delta"]=round(Z[,"dset2.book"]-Z[,"dset1.book"],2) #hansenq(Z[,"emulation"],method="ts") emulate_dset2=list(Y,Z,reference0,rural,M0,M1,M2,adj,dset1.info) names(emulate_dset2)=c("annual","monthly","reference","rural","M0","M1","M2","adj","info") emulate_dset2 } extract.log=function(id0){ index=grep(substr(id0,4,11),paparsghcn.log); test=paparsghcn.log[index[1]:min((max(index)+300),length(paparsghcn.log))] K=min(grep("possible range",test)) index=grep(substr(id0,4,11),test); test1=test[max(index):K]; a1= c( substr(test1[2],36,45), substr(test1[seq(3,length(test1)-3,2)],37,46) ) n1= as.numeric(c( substr(test1[2],32,34), substr(test1[seq(3,length(test1)-3,2)],33,35))) test1=data.frame(a1,n1);names(test1)=c("id","length") test2= paparslist[c(1,grep(substr(id0,4,11),paparslist))] test2[2]=gsub("-"," ",test2[2]) writeLines(test2,"temp.dat") test2=read.table("temp.dat",colClasses=c("character",rep("numeric",13)),skip=1) name0=c(scan("temp.dat",n=9,what=""),"M0_start","M0_end","M1_start","M1_end","flag") name0[2:3]=c("sl1","sl2") names(test2)=name0 extract.log=list(test1,test2) extract.log } ##Prior version of HAnsen reference, kept for cross check hansenref_old=function(rural) { K=length(rural$station) #collate Centigrade data chron=NULL; monthly=NULL for (k in 1:K){ n=(1:7364)[names(giss.dset1)==rural$station[k]] #test1=read.giss(paste(rural$id[k]),dset=1)[[1]] test1=giss.dset1[[paste(rural$station[k])]][[1]] chron=ts.union(chron, round(ts( test1[,"ANN"],start=test1[1,1]), 2) ) monthly=ts.union(monthly, ts( c(t(giss.dset1[[n]][[1]][,2:13])) ,start=c(giss.dset1[[n]][[1]][1,1],1),freq=12)) } tsp0=tsp(chron)#1881 2008 dimnames(chron)[[2]]=rural$station dimnames(monthly)[[2]]=rural$station #this collects annual network of rural stations using Hansen interpolation #collate anomaly version Hansen-annualized monthly.anom=monthly; for (i in 1:K) monthly.anom[,i]=anom(monthly[,i],method="hansen")[[1]] qtr.anom=rep(list(NA),K) for(i in 1:K) qtr.anom[[i]]=hansenq(monthly.anom[,i],method="ts") anom.rural=NULL for (i in 1:K) anom.rural=ts.union(anom.rural,qtr.anom[[i]][,5]) dimnames(anom.rural)[[2]]=rural$station #cross check to binary should be OK N=nrow(anom.rural) W=t(array(rep(rural$weights,N),dim=c(length(rural$weights),N) ) ) W[is.na(anom.rural)]=NA #matrix of weights: at least 3 stations and station available long0=apply(!is.na(anom.rural),2,sum) #calculates length of the candidate rural series order1=order(long0,decreasing=TRUE);index=NULL #long0[order1] # orders by decreasing length delta=NULL fixed=NULL; available= 1:K; #start with the longest series if (max(long0)<20) reference=NA else { j0=order1[1]; reference=anom.rural[,j0] #picks longest series to insert reference.seq=reference weights1=W[,j0] fixed=j0 #sequentially adds in the rural series for (k in 2:K) { j0=order1[k] #chooses the series to be added Y=cbind(anom.rural[,j0],reference,W[,j0],weights1,reference); temp=!is.na(Y[,1])&!is.na(Y[,2]) #if(sum(temp)>=20) { bias= mean(Y[temp,1])-mean(Y[temp,2]) #in this case, there is a long separation: what happens then Y[,1]=Y[,1]-bias #these have same centers g=function(x) weighted.mean(x[1:2],x[3:4],na.rm=T) Y[,5]=apply(Y[,1:4],1,g) fixed=c(fixed,j0) reference=Y[,5] delta=c(delta,bias) reference.seq=cbind(reference.seq,reference) weights1=apply(Y[,3:4],1,sum,na.rm=T) } } count=ts(apply(!is.na(anom.rural),1,sum),start=tsp0[1]) hansenref_old=list(reference,count,anom.rural,W); names(hansenref_old)=c("reference","count","anom","weights") hansenref_old } #MISC FUNCTIONS trim=function (chron) { M0=range(time(chron)[!is.na(chron)]) trim=ts(chron[ (time(chron)>=M0[1])&(time(chron)<=M0[2]) ],start=M0[1]) trim} long=function(x){ x=trim(x); M0=range(time(x));long=M0[2]-M0[1]+1;long} hansen_delta=function(x,reference){ combine=ts.union(x,reference) temp=!is.na(combine[,1])&!is.na(combine[,2]) m0=apply(combine[temp,],2,mean) hansen_delta=m0[1]-m0[2] hansen_delta} ssq=function(x){ssq=sum(x^2,na.rm=TRUE);ssq} ##FUNCTIONS USED FOR DIAGNOSTICS #this does a simple plot to see if the emulated adjustment matches the book adjustment plot1=function(W) { plot.ts(W$monthly[,"delta"]-W$monthly[,"adjustment"],ylim=c(-.4,.4),main=paste(W$info$name," dset2 (monthly delta)")) } #this is a more complicated diagnostic plot (illustrated in CA posts) showing various adjustment versions plot2=function(W,log0,sgn0=1) { year=c(time(W$annual)) layout(1);par(mar=c(3,3,2,1)) plot(year,W$annual[,"reference"]-W$annual[,"dset1.anom"],type="l",ylim=c(-1,1)) lines(year,W$annual[,"delta"],lty=2,lwd=2) lines(year,W$annual[,"adjustment"],lty=3,col=2) lines(year,W$annual[,"pre_adj"],lty=2,col=2) temp0=(time(W$annual)>=W$M0[1])&(time(W$annual)<=W$M0[2]) temp2=!is.na(W$annual[,"dset1.anom"]) lines(year[temp0&temp2],W$adj$model$fitted.values,col=3,lty=1) lines(year[temp0&temp2],W$adj$fm0$fitted.values,col=3,lty=1) a=log0[[2]];xmid=a$knee points(a$knee,sgn0*a$Yknee/10,pch=19,col=5) x=W$M0[1]:W$M0[2]; temp=(x <= a$knee);sum(temp) points( x[temp], sgn0*0.1*(a$Yknee + a$sl1*(x[temp]-xmid) ) ,col=5,pch=19,cex=.1) temp=(x >=a$knee);sum(temp) points( x[temp], sgn0*0.1*(a$Yknee - a$sl2*(x[temp]-xmid) ) ,col=5,pch=19,cex=.1) #note that this is minus sl2 and plus sl1 title(paste(W$info$name," Adjustments")) legend(locator(1),fill=c(1,5,3,2),legend=c("Book","Log","Fitted","Adjustment"),cex=.7,box.lwd=0) } #this shows selected rural stations, comparing to Hansen logs ruralcompare=function(W,log0){ ruralcompare=rep(list(NA),3) ruralcompare[[1]]=W$rural #compare roster in W$rural to HAnsen log a1= log0[[1]]$id a2=substr(W$rural$version,4,12) ruralcompare[[2]]=c( a2[is.na(match(a2,a1))], a1[is.na(match(a1,a2))] ) #compare lengths ruralcompare[[3]]=cbind(W$M1,W$M2) dimnames(ruralcompare[[3]])[[2]]=c("W","log0") ruralcompare } #STEP 1 FUNCTIONS ###2. hansenq ## from monthly data with missing values, this function calculates quarterly and annual averages the "Hansen" way ## monthly averages are calculated for all months and also quarterly averages ("normal"); over available dates not a reference period ##quarters are DJF, MAM, JJA, SON ## if there are 2 or 3 months in a quarter, the quarterly average is the average anomaly in available months plus the quarterly normal ##if there are 3 or 4 quarters, the hansenq=function(X,method="hansen") { if(method=="hansen") year=X$year else year= floor(tsp(X)[1]):floor(tsp(X)[2]) N=length(year);start0=min(year) #32 18 if(method=="hansen") X=as.matrix(X[,2:13]) else X=t( array(X,dim=c(12,length(X)/12) ) ) m0=apply(X,2,mean,na.rm=TRUE) #calculate mean over all values: possibly different q0= apply( array(m0[c(12,1:11)],dim=c(3,4)),2,mean) #calculate quarterly mean q0=c(q0,mean(m0) ) #append annual mean Xanom=scale(X,center=m0,scale=FALSE) #calculate anomaly version calendar year x=c(t(Xanom));x=c(NA,x[1:(length(x)-1)]);Xanom=t(array(x,dim=c(12,N) ) ) #re-shape to Dec-Nov Xanom=t(array(t(Xanom),dim=c(3,N*4)) ) #re-shape array in quarter-rows rather than annual-rows count=apply(!is.na(Xanom),1,sum) #calculate non-missing values by quarter Qanom= apply(Xanom,1,mean,na.rm=TRUE); Qanom[count<2]=NA #calculate anomaly by quarter; set to NA if less than 2 values Qanom= t( array(Qanom,dim=c(4,length(Qanom)/4) ) ) #reshape into array of dimension nyear x 4 count=apply(!is.na(Qanom),1,sum) #calculate non-missing values as with quarters Qanom=cbind(Qanom,apply(Qanom,1,mean,na.rm=TRUE) ); dimnames(Qanom)[[2]]=c("DJF","MAM","JJA","SON","ANN") Qanom[count<3,"ANN"]=NA #calculate annual anomaly as average of available quarters; set to NA if fewer than 3 qtrs available Qanom=scale(Qanom,center=-q0,scale=FALSE) #re-center based on "normals" hansenq=ts(Qanom ,start=start0) #make an annual ts object hansenq } #test=hansenq(station[[i]]);test1=station[[i]][,14:18];data.frame(test,test1) #x=ts( c(t(station[[1]][,2:13])),start=c(station[[1]][1,1],1),freq=12);hansenq(x,method="ts")