############################# ###GISS VERSION - CURRENT # also see scripts at http://x256.org/~hb/giss.r2 # data sets 0 ;0- raw uncombined; 1- combined; 2 -adjusteed download_html <- function(url) { download.file(url, "temp.html"); html_handle <- file("temp.html", "rt"); html_data <- readLines(html_handle); close(html_handle); unlink("temp.html"); return(html_data); } url_escape <- function(string) { string <- gsub(" ", "%20", string) string <- gsub("#", "%23", string) string <- gsub("&", "%26", string) string <- gsub("=", "%3D", string) string <- gsub("\\?", "%3F", string) return(string); } # station_name can either be a name, or a 5-digit WMO number. # If there is no match, NA is returned. # This should work as long as NASA don't change the URLs they use for providing data. download_giss_data.old <- function(station_name, match_num=1,dset=0) { giss_url <- paste("http://data.giss.nasa.gov/cgi-bin/gistemp/findstation.py?datatype=gistemp&data_set=",dset,"&name=", url_escape(station_name), "&submit=1", sep = ""); my_data <- download_html(giss_url); urls <- grep("gistemp_station.py", my_data, value=1); # if the station_name is actually a WMO number, throw away any URLs that match the number in the wrong place of the extended number scheme. if( length(grep("^[0-9]{5}$", station_name)) != 0 ) { urls <- grep(paste("\\?id=[0-9]{3}", station_name, sep=""), urls, value=1); } if( length(urls) == 0 || match_num > length(urls) ) return(NA); K<-length(urls) url <- paste("http://data.giss.nasa.gov", gsub("^.*\"(/cgi-bin/gistemp/gistemp_station.py[^\"]*)\".*$", "\\1", urls), sep=""); #varied from Nicholas to leave as vector id<-substr(url,65,76) urls.new<-rep(NA,K);url.new<-urls.new; download_data<-rep(list(NA),K) for (j in 1:K){ my_data <- download_html(url[j]); urls.new[j] <- grep("monthly data as text", my_data, value=1); url.new[j] <- gsub("^.*\"(http://data.giss.nasa.gov/work/gistemp/STATIONS/[^\"]*)\".*$", "\\1", urls.new[j]); my_handle <- url(url.new[j], "rt"); download_data[[j]] <- read.table(my_handle, skip=1);#instead of NIcholas method names(download_data[[j]])[1]<-"year" close(my_handle); } names(download_data)<-id download_giss_data<-rep(list(NA),3);names(download_giss_data)<-c("raw","anom","normal") tsite<-NULL for (j in 1:K) { temp<-(download_data[[j]] == 999.9); download_data[[j]][temp]<-NA tt<-fill.array(download_data[[j]][,1:13]) #if(method=="annual") tsite<-ts.union(tsite,ts.annavg(tt,M=6)) tsite<-ts.union(tsite,ts(as.matrix(c(t(tt))),start=c(min(as.numeric(row.names(tt))),1),freq=12) )#if(method=="monthly") } tsite<-cbind(tsite,apply(tsite,1,mean,na.rm=TRUE)) dimnames(tsite)[[2]]<-c(id,"avg") download_giss_data[[1]]<-tsite years<-floor(tsp(tsite)[1]):floor(tsp(tsite)[2]) N<-nrow(tsite)/12 chron<- t( array(tsite[,"avg"],dim=c(12,N))) temp<-!is.na(match(years,1961:1990)) m0<- apply(chron[temp,],2,mean,na.rm=TRUE) months<-rep(1:12,N) names(m0)<-month0 download_giss_data[[3]]<-m0 tsite.anom<-round(tsite-m0[months],1) download_giss_data[[2]]<-tsite.anom; download_giss_data } download_giss_data <- function(station_name, match_num=1,dset=1) { giss_url <- paste("http://data.giss.nasa.gov/cgi-bin/gistemp/findstation.py?datatype=gistemp&data_set=",dset,"&name=", url_escape(station_name), "&submit=1", sep = ""); my_data <- download_html(giss_url); urls <- grep("gistemp_station.py", my_data, value=1); # if the station_name is actually a WMO number, throw away any URLs that match the number in the wrong place of the extended number scheme. if( length(grep("^[0-9]{5}$", station_name)) != 0 ) { urls <- grep(paste("\\?id=[0-9]{3}", station_name, sep=""), urls, value=1); } if( length(urls) == 0 || match_num > length(urls) ) return(NA); K<-length(urls) url <- paste("http://data.giss.nasa.gov", gsub("^.*\"(/cgi-bin/gistemp/gistemp_station.py[^\"]*)\".*$", "\\1", urls), sep=""); #varied from Nicholas to leave as vector id<-substr(url,65,76) urls.new<-rep(NA,K);url.new<-urls.new; download_data<-rep(list(NA),K) for (j in 1:K){ my_data <- download_html(url[j]); urls.new[j] <- grep("monthly data as text", my_data, value=1); url.new[j] <- gsub("^.*\"(http://data.giss.nasa.gov/work/gistemp/STATIONS/[^\"]*)\".*$", "\\1", urls.new[j]); my_handle <- url(url.new[j], "rt"); download_data[[j]] <- read.table(my_handle, skip=1);#instead of NIcholas method names(download_data[[j]])[1]<-"year" close(my_handle); } names(download_data)<-id download_giss_data<-NA tsite<-NULL for (j in 1:K) { temp<-(download_data[[j]] == 999.9); download_data[[j]][temp]<-NA tt<-fill.array(download_data[[j]][,1:13]) #if(method=="annual") tsite<-ts.union(tsite,ts.annavg(tt,M=6)) tsite<-ts.union(tsite,ts(as.matrix(c(t(tt))),start=c(min(as.numeric(row.names(tt))),1),freq=12) )#if(method=="monthly") } dimnames(tsite)[[2]]<-id download_giss_data<-tsite download_giss_data } read.giss <- function(station_name, match_num=1,dset=0) { giss_url <- paste("http://data.giss.nasa.gov/cgi-bin/gistemp/findstation.py?datatype=gistemp&data_set=",dset,"&name=", url_escape(station_name), "&submit=1", sep = ""); my_data <- try(download_html(giss_url)) if (class(my_data)=="try-error") read.giss=NA else { urls <- grep("gistemp_station.py", my_data, value=1); # if the station_name is actually a WMO number, throw away any URLs that match the number in the wrong place of the extended number scheme. if( length(grep("^[0-9]{5}$", station_name)) != 0 ) { urls <- grep(paste("\\?id=[0-9]{3}", station_name, sep=""), urls, value=1); } if( length(urls) == 0 || match_num > length(urls) ) return(NA); K<-length(urls) url <- paste("http://data.giss.nasa.gov", gsub("^.*\"(/cgi-bin/gistemp/gistemp_station.py[^\"]*)\".*$", "\\1", urls), sep=""); #varied from Nicholas to leave as vector id<-substr(url,65,76) urls.new<-rep(NA,K);url.new<-urls.new; download_data<-rep(list(NA),K) for (j in 1:K){ my_data <- try(download_html(url[j])); if(class(my_data)=="try-error") download_data[[j]]=NA else { urls.new[j] <- grep("monthly data as text", my_data, value=1); url.new[j] <- gsub("^.*\"(http://data.giss.nasa.gov/work/gistemp/STATIONS/[^\"]*)\".*$", "\\1", urls.new[j]); my_handle <- url(url.new[j], "rt"); test <- try(read.table(my_handle, skip=1));#instead of NIcholas method if (class(test)=="try-error") download_data[[j]]=NA else {download_data[[j]]=test names(download_data[[j]])=c("year","jan","feb","mar","apr","may","jun","jul","aug","sep","oct","nov","dec","DJF","MAM","JJA","SON","ANN") temp=(download_data[[j]] == 999.9);download_data[[j]][temp]=NA} close(my_handle); } } names(download_data)<-id read.giss=download_data } read.giss } ##function to collate information giss.collate=function(id){ giss.collate=rep(list(NA),3) names(giss.collate)=c("dset0","dset1","dset2") if(is.numeric(id)) id=as.character(id) giss.collate$dset0=read.giss(id,dset=0) giss.collate$dset1=read.giss(id,dset=1) giss.collate$dset2=read.giss(id,dset=2) giss.collate } ##FUNCTION TO ESTIMATE QUARTERS AND COMPARE TO HANSEN #X is a data.frame from giss.collate typically hansenq=function(X) { year=X$year;N=nrow(X) #32 18 hansenq=rep(list(NA),4);names(hansenq)=c("X","Q","X_est","Q_est") Q=as.matrix(X[,14:18]); X=as.matrix(X[,2:13]) x=c(t(X));x=c(NA,x[1:(length(x)-1)]);X=t(array(x,dim=c(12,N) ) ) #re-shape to Dec-Nov row.names(X)=year #X[2,] # 22.2 NA NA 24.4 21.9 NA 19.1 18.6 NA 20.8 19.9 20.8 m0=round(apply(X,2,mean,na.rm=TRUE),1) #calculate mean over all values: possibly different #m0 # 19.9 20.5 21.2 21.5 20.2 19.4 18.4 17.9 18.5 19.6 19.0 19.2 Xanom=scale(X,center=m0,scale=FALSE) #calculate anomaly version #Xanom[2,] # 2.3 NA NA 2.9 1.7 NA 0.7 0.7 NA 1.2 0.9 1.6 1955 #estimate anomaly for quarters with only 2 readings g=function(x){ g=round(c( mean(x[2:3]),mean(x[c(1,3)]),mean(x[1:2]) ),1);g} Qanom=t(array(t(Xanom),dim=c(3,N*4)) ) #re-shape array in quarter-rows rather than annual-rows #Qanom[5,] #[1,] 2.3 NA NA Qanom.new=Qanom #shapeholding array for new varation G=Qanom; for (j in 1:nrow(Qanom)){ G[j,]=g(Qanom[j,])} #fill each cell with average of other 2 months in quarter (NOT using na.rm=T); values only if 2-3 months temp=is.na(Qanom); Qanom.new[temp]=G[temp] #substitute G-values into new version #Qanom.new #128 3 Qanom.new= t( array(c(t(Qanom.new)) ,dim=c(12,N)) ) #reshape to annual format Q.rescaled=scale(Qanom.new,scale=FALSE,center=-m0) #rescale anomaly version to absolute values #Q.rescaled[2,] #[1] 22.2 NA NA 24.4 21.9 21.7 19.1 18.6 19.2 20.8 19.9 20.8 #substitute new estimates for missing values in original matrix Xnew=X temp=is.na(X) Xnew[temp]=Q.rescaled[temp] #Xnew[2,]# 22.2 NA NA 24.4 21.9 21.7 19.1 18.6 19.2 20.8 19.9 20.8 ##calculate new quarterly and annual averages Qnew= t(array(c(t(Xnew)),dim=c(3, N*4) ) );#reshape for calculations only #Qnew[5,] # 22.2 NA NA Qnew=round(apply(Qnew,1,mean),1); #calculate quarterly means NOT using na.rm=T #rounding may not further analysis to reconcile Qnew=t(array(Qnew,dim=c(4,N))) #reshape to same shape as original Qnew=cbind(Qnew, rep(NA,N) ); dimnames(Qnew)[[2]]=c("DJF","MAM","JJA","SON","ANN") #make space for annual Qnew[,5]=round(apply(Qnew[,1:4],1,mean),1) #without using na.rm=T #check against actual #Qnew[2,] # NA 22.7 19.0 20.5 hansenq=list(X,Q,Xnew,Qnew);names(hansenq)=c("X","Q","Xnew","Qnew") ##do annual with same sort of fill q0=round(apply(array(m0,dim=c(3,4)),2,mean),2) #[1] 20.5 20.4 18.3 19.3 Qnew.anom= round(scale (Qnew[,1:4],center=q0,scale=FALSE) ,2) g=function(x){ g=round(c ( mean(x[2:4]),mean(x[c(1,3,4)]),mean(x[c(1,2,4)]),mean(x[1:3]) ),2);g} G=Qnew.anom; for (j in 1:nrow(Qnew.anom)){ G[j,]=g(Qnew.anom[j,])} temp=is.na(Qnew.anom); Qnew.anom[temp]=round(G[temp],2) Qnew=scale(Qnew.anom,center=-q0,scale=FALSE) Qnew=cbind(Qnew,round(apply(Qnew,1,mean),2)) hansenq$Qnew[,5]=Qnew[,5] hansenq } ##FUNCTION TO ESTIMATE QUARTERS AND COMPARE TO HANSEN #X is a data.frame from giss.collate typically hansenq=function(X,method="hansen") { if(method=="hansen") year=X$year else year= floor(tsp(X)[1]):floor(tsp(X)[2]) N=length(year);start0=min(year) #32 18 if(method=="hansen") X=as.matrix(X[,2:13]) else X=t( array(X,dim=c(12,length(X)/12) ) ) m0=apply(X,2,mean,na.rm=TRUE) #calculate mean over all values: possibly different q0= apply( array(m0[c(12,1:11)],dim=c(3,4)),2,mean) q0=c(q0,mean(m0) ) Xanom=scale(X,center=m0,scale=FALSE) #calculate anomaly version calendar year #reshape to Dec- Nov x=c(t(Xanom));x=c(NA,x[1:(length(x)-1)]);Xanom=t(array(x,dim=c(12,N) ) ) #re-shape to Dec-Nov Xanom=t(array(t(Xanom),dim=c(3,N*4)) ) #re-shape array in quarter-rows rather than annual-rows count=apply(!is.na(Xanom),1,sum) Qanom= apply(Xanom,1,mean,na.rm=TRUE); Qanom[count<2]=NA Qanom= t( array(Qanom,dim=c(4,length(Qanom)/4) ) ) count=apply(!is.na(Qanom),1,sum) Qanom=cbind(Qanom,apply(Qanom,1,mean,na.rm=TRUE) ) dimnames(Qanom)[[2]]=c("DJF","MAM","JJA","SON","ANN") Qanom[count<3,"ANN"]=NA Qanom=scale(Qanom,center=-q0,scale=FALSE) #Qanom[,1:4]=round(Qanom[,1:4],2); #Qanom[,5]=round(Qanom[,5],2) hansenq=ts(Qanom ,start=start0) hansenq } #test=hansenq(station[[i]]);test1=station[[i]][,14:18];data.frame(test,test1) ##EXAMPLE #source("http://www.climateaudit.org/scripts/station/read.giss.txt") #station=giss.collate(id="117635330000") #Neghili #test00=hansenq(station[[1]][[1]]) #dset0, version 0 #cbind(test00$Q,test00$Qnew) #this has only been tested in 2 column situations so far hansen_bias=function(X,Y,method="hansen") { if(method=="hansen"){ temp1=!is.na(X$ANN); temp2=!is.na(Y$ANN) test=match(Y$year[temp2],X$year[temp1]);temp=!is.na(test) A=Y$ANN[temp2][temp];#A B= X$ANN[temp1][test[temp]] #a= floor(100*A -100*B); a=sum(a)/100 a=sum(A-B) K=length(A) #hansen_bias= sign(a) *round(floor( (abs(a) + 5*K)/K)/100,2) hansen_bias= a/K } else { temp1=!is.na(X); temp2=!is.na(Y) test=match(c(time(Y))[temp2],c(time(X))[temp1]);temp=!is.na(test) A=Y[temp2][temp];#A B= X[temp1][test[temp]] #a= floor(100*A -100*B); a=sum(a)/100 a=sum(A-B) K=length(A) #hansen_bias= sign(a) *round(floor( (abs(a) + 5*K)/K)/100,2) hansen_bias= a/K } hansen_bias } #station=giss.collate(id="61111520000") #PRaha #X=station[[1]][[1]];Y=station[[1]][[2]] #hansen_bias(X,Y) #0.1