##EMULATION OF HANSEN STEP 1 #this is the combination of combining dset=0 versions to make a dset=1 version ##TEXT #Two records are combined as shown in Figure 2, if they have a period of overlap. The mean #difference or bias between the two records during their period of overlap (dT) is used to adjust one #record before the two are averaged, leading to identification of this way for combining records as the #“bias” method (HL87) or, alternatively, as the “reference station” method [Peterson et al., 1998b]. #The adjustment is useful even with records for nominally the same location, as indicated by the #latitude and longitude, because they may differ in the height or surroundings of the thermometer, in #their method of calculating daily mean temperature, or in other ways that influence monthly mean #temperature. Although the two records to be combined are shown as being distinct in Figure 2, in the #majority of cases the overlapping portions of the two records are identical, representing the same #measurements that have made their way into more than one data set. #A third record for the same location, if it exists, is then combined with the mean of the first #two records in the same way, with all records present for a given year contributing equally to the #mean temperature for that year (HL87). This process is continued until all stations with overlap at #a given location are employed. #If there are additional stations without overlap, these are also #combined, without adjustment, provided that the gap between records is no more than 10 years and #the mean temperatures for the nearest five year periods of the two records differ by less than one #standard deviation. Stations with larger gaps are treated as separate records. ##there are several functions described below: #hansen_bias #hansenq #recent #overlap_hansen #emulate_dset1 #following are some tests ################## ##FUNCTIONS ################### ###LOAD ANCILLARY FUNCTIONS source("http://www.climateaudit.org/scripts/station/hansen/read.giss.txt") #this loads the read.giss function used for downloading dset=0 and dset=1 series #requires provenance url="http://www.climateaudit.org/data/giss" provenance=NULL;stat=rep(NA,3) id=c("mcdw","ushcn","sumofday") ;K=length(id) for (i in 1:K) { loc=file.path(url,paste(id[i],"tbl",sep=".")) test=read.table(loc) test=data.frame(test);names(test)=c("wmo","id","dup");test$source=id[i]; stat[i]=nrow(test)#1502 provenance=rbind(provenance,test) } # 1502 1221 371 = 3094 provenance$dup=paste(provenance$id,provenance$dup,sep="") ##1. hansen_bias #this function calculates the "Hansen bias" between two time series by comparing the annual averages #calculated by "Hansen summing" in an overlap period #two formats are allowed for: default method="hansen" uses the table layout used in GISTEMP: year, 12 months, 4 quarters, annual #method = "ts" will compare two ANNUAL time series hansen_bias=function(X,Y,method="hansen") { if(method=="hansen"){ temp1=!is.na(X$ANN); temp2=!is.na(Y$ANN) test=match(Y$year[temp2],X$year[temp1]);temp=!is.na(test) A=Y$ANN[temp2][temp];#A B= X$ANN[temp1][test[temp]] a=sum(A-B) K=length(A) hansen_bias= a/K } else { temp1=!is.na(X); temp2=!is.na(Y) test=match(c(time(Y))[temp2],c(time(X))[temp1]);temp=!is.na(test) A=Y[temp2][temp];#A B= X[temp1][test[temp]] #a= floor(100*A -100*B); a=sum(a)/100 a=sum(A-B) K=length(A) hansen_bias= a/K } hansen_bias } #hansen_bias(station[[2]],station[[5]]) ###2. hansenq ## from monthly data with missing values, this function calculates quarterly and annual averages the "Hansen" way ## monthly averages are calculated for all months and also quarterly averages ("normal"); over available dates not a reference period ##quarters are DJF, MAM, JJA, SON ## if there are 2 or 3 months in a quarter, the quarterly average is the average anomaly in available months plus the quarterly normal ##if there are 3 or 4 quarters, the hansenq=function(X,method="hansen") { if(method=="hansen") year=X$year else year= floor(tsp(X)[1]):floor(tsp(X)[2]) N=length(year);start0=min(year) #32 18 if(method=="hansen") X=as.matrix(X[,2:13]) else X=t( array(X,dim=c(12,length(X)/12) ) ) m0=apply(X,2,mean,na.rm=TRUE) #calculate mean over all values: possibly different q0= apply( array(m0[c(12,1:11)],dim=c(3,4)),2,mean) #calculate quarterly mean q0=c(q0,mean(m0) ) #append annual mean Xanom=scale(X,center=m0,scale=FALSE) #calculate anomaly version calendar year x=c(t(Xanom));x=c(NA,x[1:(length(x)-1)]);Xanom=t(array(x,dim=c(12,N) ) ) #re-shape to Dec-Nov Xanom=t(array(t(Xanom),dim=c(3,N*4)) ) #re-shape array in quarter-rows rather than annual-rows count=apply(!is.na(Xanom),1,sum) #calculate non-missing values by quarter Qanom= apply(Xanom,1,mean,na.rm=TRUE); Qanom[count<2]=NA #calculate anomaly by quarter; set to NA if less than 2 values Qanom= t( array(Qanom,dim=c(4,length(Qanom)/4) ) ) #reshape into array of dimension nyear x 4 count=apply(!is.na(Qanom),1,sum) #calculate non-missing values as with quarters Qanom=cbind(Qanom,apply(Qanom,1,mean,na.rm=TRUE) ); dimnames(Qanom)[[2]]=c("DJF","MAM","JJA","SON","ANN") Qanom[count<3,"ANN"]=NA #calculate annual anomaly as average of available quarters; set to NA if fewer than 3 qtrs available Qanom=scale(Qanom,center=-q0,scale=FALSE) #re-center based on "normals" hansenq=ts(Qanom ,start=start0) #make an annual ts object hansenq } #test=hansenq(station[[i]]);test1=station[[i]][,14:18];data.frame(test,test1) #x=ts( c(t(station[[1]][,2:13])),start=c(station[[1]][1,1],1),freq=12);hansenq(x,method="ts") ##3. recent # a little function that calculates the most recent value in a time series; used in ordering recent=function(x) { temp=!is.na(x);recent=max(time(x)[temp]); recent} ##4. overlap_hansen # a little function that calculates the length of overlap between two ANNUAL time series #used in ordering overlap_hansen=function(x,y){ temp1=!is.na(x);temp2=!is.na(y);year1=c(time(x));year2=c(time(y)) temp=!is.na(match(year1[temp1],year2[temp2])) overlap_hansen=sum(temp) overlap_hansen } ###5. trim=function (chron) { M0=range(time(chron)[!is.na(chron)]) trim=ts(chron[ (time(chron)>=M0[1])&(time(chron)<=M0[2]) ],start=M0[1]) trim} ###6. long=function(x){ x=trim(x); M0=range(time(x));long=M0[2]-M0[1]+1;long} ##7. emulate_dset1 #this is a function that emulates Hansen's calculation of dset=1 from dset=0 #the parameter id0 is the 11-digit GISS id for a station: I find it best to use characters #the default setting will download the station data from GISS #this is CCCWWWWWIII - 3 -digit country; 5-digit WMO; 3-digit local #this will download current dset0 and dset1 from GISS #you can use available station versions as long as they are in Hansen-format as station0 (dset=0) and station1(dset=1) # variety of objects returned for diagnostic purposes # $station0 the input dset0 as monthly ts # $annual0 -input dset0 as annual # $station1 - input dset1 monthly as ts # $annual1 - input dset1 annual as ts # $adjusted -Hansen adjusted monthly # $adjusted_annual - Hansen adjusted annual # $reference - closing Hansen quarterly as ts # delta - adjustments for each version #index - order of selection emulate_dset1=function(id0,station0=NA,station1=NA,download0=TRUE,prov=provenance){ #optional downloading if (download0) { if (class(id0)=="numeric") { id0=as.character(id0); J=nchar(id0); if (J<11) id0=paste(rep("0",11-J),id0,sep="") } station0=read.giss(id0,dset=0) station1=read.giss(id0,dset=1)[[1]] } #this will dowload GISS dset=0 and GISS dset=1 versions #collation into time-series objects annual1=ts(station1[,18],start=station1[1,1]) station1=ts( c(t(station1[,2:13])),start=c(station1[1,1],1),freq=12) K=length(station0) #calculate number of dset=0 versions X=NULL; for (i in 1:K) {test= station0[[i]]; y=ts( c(t(test[,2:13])) ,start=c(test[1,1],1),freq=12); X=ts.union(X,y)} if(K==1) X= ts(as.matrix(X),start=c(test[1,1],1),freq=12); dimnames(X)[[2]]=names(station0) #collate dset=0 monthly data into a time-series object annual=NULL for (i in 1:K) {test= station0[[i]]; y=ts(test[,18],start=test[1,1]); annual=ts.union(annual,y)} if(K==1) annual= ts(as.matrix(annual),start=test[1,1]); dimnames(annual)[[2]]=names(station0) #collate dset=0 annual data into a time-series object #iterative procedure for combining dset=0 series #set-up initial iteration fixed=1; # set as the first if( K>1) { long0=apply(annual,2,long) fixed=min ( (1:K)[long0==max(long0,na.rm=T)],na.rm=T) temp=!is.na(match(names(station0),prov$dup[prov$source=="sumofday"]) ); if(sum(temp)==1) fixed=(1:K)[temp] temp=!is.na(match(names(station0),prov$dup[prov$source=="ushcn"]) ); if(sum(temp)==1) fixed=(1:K)[temp] temp=!is.na(match(names(station0),prov$dup[prov$source=="mcdw"]) ); if(sum(temp)==1) fixed=(1:K)[temp] } available=(1:K)[is.na(match(1:K,fixed))] #set all other columns as available fixed_stations=X[,fixed]; #matrix of included stations: starts with column K only interim=fixed_stations #interim value starts with the one series in the matix fixed_annual=annual[,fixed] #annual starts with column K reference=hansenq(interim,method="ts") # calculates Hansen quarters+annual delta=rep(NA,K);delta[fixed]=0 #set up vector to store deltas index=fixed #do iteration for (K-1) series; if(K>1) { for (j in 2:K) { if( !(is.null(available) )) { M=length(available); overlap0=rep(NA,M); for (i in 1:M){ overlap0[i]= overlap_hansen(x=reference[,5],y=annual[,available[i] ])} #calculates overlap with "fixed" recent0=rep(NA,M);recent0=apply(as.matrix(annual[,available]),2,recent) #calculates most recent #check if there is an overlap of at least 4 if (max(overlap0)<4) available=NULL else { j0=available[order(overlap0,recent0,decreasing=TRUE)][1] #chooses the series to be added delta[j0]=hansen_bias(annual[,j0], reference[,5],method="ts");#calculates Hansen delta index=c(index,j0) fixed=c(fixed,j0) #add new inclusion to fixed and then same with time series available=available[is.na(match(available,j0))];if(length(available)==0) available=NULL #calculate new available fixed_stations=cbind(fixed_stations,X[,j0]+delta[j0]);dimnames(fixed_stations)[[2]]=dimnames(X)[[2]][fixed] fixed_annual=cbind(fixed_annual,annual[,j0]+delta[j0]);dimnames(fixed_annual)[[2]]=dimnames(X)[[2]][fixed] interim=ts( apply(fixed_stations,1,mean,na.rm=TRUE) , start=c( tsp(annual)[1],1) ,freq=12) #re-calculate interim reference=hansenq(interim,method="ts") #calculate Hansen quarters for interim } } #available- if clause } # j-iteration } #endif emulate_dset1=list(X,annual,station1,annual1,fixed_stations,fixed_annual,reference,delta,index) names(emulate_dset1)=c("station0","annual0","station1","annual1","adjusted","adjusted_annual","reference","delta","index") emulate_dset1 } ################# ##CASE STUDY #id0="22340405000"#Gassim, #station0=read.giss(id0) #test=hansenq(station0[[1]]) #round(test,2)- as.matrix(station0[[1]][,14:18]) #all values are within 0.05 #test=emulate_dset1(id0); #round(test$delta,2) # 0.15 0.00 #round(test$delta,2) # -0.34 -0.16 0.00 #id0="22230309000"#bratsk #station0=read.giss(id0) #test=emulate_dset1(id0); #x=round(test$reference[,5],2); #combine=data.frame(test$annual1,x,test$annual1-x);combine #matches closely #round(test$delta,2) #Hansen "biases" for each version #[1] -0.01 -0.07 -0.07 -0.12 0.00 #id0="22230554000" #bagdarin #test=emulate_dset1(id0); #round(test$delta,2) # -0.34 -0.16 0.00 #id0="22220674000" #id0="22223022001" #WAigatz #id0="22231829000";#cerdyn