##THIS COLLATES GISS DSET VERSIONS #returns a list of length 7364 read_sint <- function(handle) { return(readBin(handle, "int", 1, size=4, signed = TRUE, endian = "big")); } read_uint <- function(handle) { return(readBin(handle, "int", 1, size=4, signed = FALSE, endian = "big")); } read_header <- function(handle) { len <- read_uint(handle); if( len < 9*4 ) { print("invalid file format"); return(NA); } vers <- read_sint(handle); unk1 <- read_sint(handle); unk2 <- read_sint(handle); MTOT <- read_sint(handle); MTOT2 <- read_sint(handle); iyear1 <- read_sint(handle); inv1 <- read_sint(handle); inv2 <- read_sint(handle); MTOT_again <- read_sint(handle); title <- rawToChar(readBin(handle, "raw", len-9*4)); if( read_uint(handle) != len ) { print("invalid file format"); return(NA); } return(pairlist(vers=vers, unk1=unk1, unk2=unk2, MTOT=MTOT, MTOT2=MTOT2, iyear1=iyear1, inv1=inv1, inv2=inv2, MTOT_again=MTOT_again, title=title)); } read_record <- function(handle) { len <- read_uint(handle); if( length(len) == 0 ) { # no more data return(NA); } if( len <= 44+16 ) { print("invalid file format"); return(NA); } temp_data <- readBin(handle, "int", (len-44-16)/4, size=4, signed = TRUE, endian = "big"); lato <- read_uint(handle); longo <- read_uint(handle); id <- read_uint(handle); ht <- read_uint(handle); name <- rawToChar(readBin(handle, "raw", 36)); vers <- read_uint(handle); MTOT <- read_uint(handle); if( read_uint(handle) != len ) { print("invalid file format"); return(NA); } return(pairlist(temp_data=temp_data, lato=lato, longo=longo, id=id, ht=ht, name=name, vers=vers, MTOT=MTOT)); } read_GISS_data <- function(handle) { header_data <- read_header(handle); records <- list(); while(TRUE) { record <- read_record(handle); if( length(record) == 1 ) { return(pairlist(header=header_data, records=records)); } else { records <- append(records, record); } } } #COLLATION FUNCTIONS ##function to collate the 6 segments f=function(Data) { N=length(Data[[2]]) station=Data[[2]][ seq(1,N-7,8)] index=(1:N)[is.na(match(1:N, seq(1,N-7,8)))] N=length(index); Data[[2]]=c(Data[[1]][c("vers","MTOT")] , Data[[2]][index] ) info=array(NA,dim=c(N/7,7));info=data.frame(info) for(i in 1:7) info[,i]= sapply(Data[[2]][i+seq(0,N-7,7)], function(x) x) names(info)=names(Data[[2]])[1:7] f=list(info=info,station=station) f } # function to collate 6 segments - tracking info and data collate.giss=function(m,month=10,year=2008) { tag=c("","PA.") working=rep(list(NA),6) if(nchar(month)==1) month=paste("0",month,sep="") url=paste("ftp://data.giss.nasa.gov/pub/gistemp/GISS_Obs_analysis/ARCHIVE/",year,"_",month,"/binary_files",sep="") for(k in 1:6) { loc=file.path(url, paste("Ts.GHCN.CL.",tag[m],k,sep="")) download.file(loc, "temp.bin",mode="wb") handle <- file("temp.bin", "rb"); Data <- read_GISS_data(handle); close(handle) working[[k]]=f(Data) } info=NULL; for (i in 1:6) info=rbind(info,working[[i]][[1]]) #6268 station=NULL; for (i in 1:6) station=c(station,working[[i]][[2]]) info$id[nchar(info$id)==8]=paste("0",info$id[nchar(info$id)==8],sep="") info$start= 1880+ (info$vers-1)/12 #info id's are not the same as giss.info id's; match them trial1=substr(giss.info$id,4,11) info$id=as.character(info$id) trial2=substr(info$id,1,nchar(info$id)-1) h=function(x) match( x,trial1) test=rep(list(NA),nrow(info)) for(i in 1:nrow(info)) test[[i]]=h(trial2[i]) unique(sapply(test,length)) #1 #unique matches info$gissid=NA for(i in 1:nrow(info)) info$gissid[i]=giss.info$id[ h(trial2[i])] #confirm that everything matches at least once #sum(is.na(info$gissid)) #0 #unique(tapply(!is.na(info$gissid),info$gissid,sum)) # 1 2 #some series are in two segments #MAKE DSET1 which is a list of time series organized by giss.info id dset=rep(list(NA),7364) names(dset)=giss.info$id id=unique(info$gissid); K=length(id) h=function(x) if(x%%12 ==0) 12 else x%%12 for(i in 1:K) { temp=(info$gissid==id[i]);M=sum(temp) index=(1:nrow(info))[temp] dset[[paste(id[i])]]=list() for( j in 1:M) { x= ts(station[[index[j] ]],start=c(floor(info$start[index[j]]), h(info$vers[index[j]]) ),freq=12) x[x== 9999]=NA;x=x/10 dset[[paste(id[i])]][[j]] =x } #j } #i collate.giss=dset collate.giss } ##REQUIRES giss.info #source("http://www.climateaudit.org/scripts/station/giss/collate.dset.nicholas.txt") #download.file("http://www.climateaudit.org/data/giss/giss.info.tab","temp.dat",mode="wb");load("temp.dat") #dset1=collate.giss(m=1,month=10,year=2008) #save(dset1,file="d:/climate/data/station/giss/200810/dset1.tab") #dset2=collate.giss(m=2,month=10,year=2008) #save(dset2,file="d:/climate/data/station/giss/200810/dset2.tab")