############################## ###GISS VERSION - CURRENT # also see scripts at http://x256.org/~hb/giss.r2 # data sets 0 ;0- raw uncombined; 1- combined; 2 -adjusteed download_html <- function(url) { download.file(url, "temp.html"); html_handle <- file("temp.html", "rt"); html_data <- readLines(html_handle); close(html_handle); unlink("temp.html"); return(html_data); } url_escape <- function(string) { string <- gsub(" ", "%20", string) string <- gsub("#", "%23", string) string <- gsub("&", "%26", string) string <- gsub("=", "%3D", string) string <- gsub("\\?", "%3F", string) return(string); } # station_name can either be a name, or a 5-digit WMO number. # If there is no match, NA is returned. # This should work as long as NASA don't change the URLs they use for providing data. download_giss_data <- function(station_name, match_num=1,dset=0) { giss_url <- paste("http://data.giss.nasa.gov/cgi-bin/gistemp/findstation.py?datatype=gistemp&data_set=",dset,"&name=", url_escape(station_name), "&submit=1", sep = ""); my_data <- download_html(giss_url); urls <- grep("gistemp_station.py", my_data, value=1); # if the station_name is actually a WMO number, throw away any URLs that match the number in the wrong place of the extended number scheme. if( length(grep("^[0-9]{5}$", station_name)) != 0 ) { urls <- grep(paste("\\?id=[0-9]{3}", station_name, sep=""), urls, value=1); } if( length(urls) == 0 || match_num > length(urls) ) return(NA); K<-length(urls) url <- paste("http://data.giss.nasa.gov", gsub("^.*\"(/cgi-bin/gistemp/gistemp_station.py[^\"]*)\".*$", "\\1", urls), sep=""); #varied from Nicholas to leave as vector id<-substr(url,65,76) urls.new<-rep(NA,K);url.new<-urls.new; download_data<-rep(list(NA),K) for (j in 1:K){ my_data <- download_html(url[j]); urls.new[j] <- grep("monthly data as text", my_data, value=1); url.new[j] <- gsub("^.*\"(http://data.giss.nasa.gov/work/gistemp/STATIONS/[^\"]*)\".*$", "\\1", urls.new[j]); my_handle <- url(url.new[j], "rt"); download_data[[j]] <- read.table(my_handle, skip=1);#instead of NIcholas method names(download_data[[j]])[1]<-"year" close(my_handle); } names(download_data)<-id download_giss_data<-rep(list(NA),3);names(download_giss_data)<-c("raw","anom","normal") tsite<-NULL for (j in 1:K) { temp<-(download_data[[j]] == 999.9); download_data[[j]][temp]<-NA tt<-fill.array(download_data[[j]][,1:13]) #if(method=="annual") tsite<-ts.union(tsite,ts.annavg(tt,M=6)) tsite<-ts.union(tsite,ts(as.matrix(c(t(tt))),start=c(min(as.numeric(row.names(tt))),1),freq=12) )#if(method=="monthly") } tsite<-cbind(tsite,apply(tsite,1,mean,na.rm=TRUE)) dimnames(tsite)[[2]]<-c(id,"avg") download_giss_data[[1]]<-tsite years<-floor(tsp(tsite)[1]):floor(tsp(tsite)[2]) N<-nrow(tsite)/12 chron<- t( array(tsite[,"avg"],dim=c(12,N))) temp<-!is.na(match(years,1961:1990)) m0<- apply(chron[temp,],2,mean,na.rm=TRUE) months<-rep(1:12,N) names(m0)<-month0 download_giss_data[[3]]<-m0 tsite.anom<-round(tsite-m0[months],1) download_giss_data[[2]]<-tsite.anom; download_giss_data } # station.giss<-download_giss_data(id0) ##########################