###TREE RING UTILITIES #make.crn #makes R time series from url #ftp://ftp.ncdc.noaa.gov/pub/data/paleo/treering/chronologies/southamerica/arge048.crn load("d:/climate/data/tree/country.list.tab") #dummy<-array(NA,dim=c(3,3));dummy<-data.frame(dummy);names(dummy)<-names(country.list);dummy[1,]<-c("leba","asia","asia"); #dummy[2,]<-c("syri","asia","asia");dummy[3,]<-c("kore","asia","asia");country.list<-rbind(country.list,dummy) #save(country.list,file="d:/climate/data/tree/country.list.tab") make.crn<-function(loc,widths0=c(6,4,rep( c(4,3),10)),N= -1,readtype="REG",url0="ftp://ftp.ncdc.noaa.gov/pub/data/paleo/treering/chronologies") { #may need some more quality control on oddballs but will work for STNDRD, SCHW, Long if(url0=="ftp://ftp.ncdc.noaa.gov/pub/data/paleo/treering/updates") url<-file.path(url0,paste(loc,"crn",sep=".")) else url<-file.path(url0,countrycode(loc),paste(loc,"crn",sep=".")) #see if it is a Schweingruber format by checking fifth row schwein<-c("RAW","RES","STD") h<-try(readLines(url,n=5)) if (class(h)=="try-error") make.crn<-NA else { if( substr(h[4],6,6)=="-") readtype<-"LONG" type<-substr(h,83,85) if ( !is.na( match(type[5],schwein) ) ) readtype<-"SCHW"} #read in different ways for STNDRD and SCHW formats #SCHW format has a number of options; only the RAW version is read here #this is controlled by calculating N in Schwein case; otherwise N= -1 in STNDRD if(readtype=="SCHW") { h<-readLines(url); temp<-(substr(h,83,85)=="STD") h=h[temp] h=h[4:length(h)] write.table(h,"temp.dat",row.names=FALSE,quote=FALSE,col.names=FALSE) test=read.fwf("temp.dat",widths=c(6,4,rep(c(4,3),10))) start0=floor(test[1,2]/10)*10 test=test[,seq(3,21,2)];dim(test) test=ts(c(t(test)),start=start0) temp=(test>=9990);test[temp]=NA temp=!is.na(test);r0=range( time(test)[temp]) temp=(time(test)>=r0[1])&(time(test)<=r0[2]) make.crn=ts(test[temp],start=r0[1]) } if(readtype=="LONG") widths0[1:2]<-c(5,5) if(!(readtype=="SCHW")) { h<-read.fwf(url,widths=widths0,skip=3,nrow=N,blank.lines.skip=TRUE,colClasses=c("factor",rep("numeric",21))) #blan.lines.skip added June 2006 start1<-h[1,2] #} temp<-!is.na(h[,1]) h<-h[temp,] #adjusts for blank lines if (h[nrow(h),2]< h[(nrow(h)-1),2]) h<-h[1:(nrow(h)-1),] #this adjusts for some versions which have statistics in last row chron.crn<-h[,seq(3,21,2)] if (start1>2000) print(loc, "ERROR 1 - START AFTER 2000") temp<-(chron.crn==9990) chron.crn[temp]<-NA chron.crn<- c(t(chron.crn)) temp<-!is.na(chron.crn); make.crn<- ts(chron.crn[temp],start=start1) } make.crn } #EXAMPLE #chron.crn<-make.crn("arge048") #chron.crn<-make.crn("cana157") #chron.crn<-make.crn("cana857") countrycode<-function(id) { temp<-match(substr(id,1,4),country.list[,1]) if(is.na(temp)) countrycode<-"northamerica/usa" else countrycode<- country.list[temp,2] countrycode } ##make.rwl #this is a very old script and can probably be improved, #it's been re-edited to post on internet and has not been checked yet against oddball formats #earlier edition was thoroughly checked # a few oddball formats need to be checked out #FUNCTION TO READ RWL FILE AND MAKE LONG-FORM VERSION #returns list # tree, #two verification lists #the parameter long adjusts format when dates are -1000AD are less make.rwl<-function(loc,long=FALSE,method="wdcp",skip0=3,url0="ftp://ftp.ncdc.noaa.gov/pub/data/paleo/treering/measurements") { trailer.verify<- 999 trailing.marker<- (-9999) if(!(url0=="ftp://ftp.ncdc.noaa.gov/pub/data/paleo/treering/updates")&!(method=="local")) url0<-file.path(url0,countrycode(loc)) url<-file.path(url0,paste(loc,"rwl",sep=".")) h<-try(readLines(url,n=5)) if (class(h)=="try-error") make.rwl<-NA else { if( substr(h[4],6,6)=="-") long<-TRUE #if (long) widths0<- c(8,4,rep(6,10)) ##else widths0<- c(7,5,rep(6,10)) test<-as.numeric(substr(h,9,12)) skip0<-sum(is.na(test)) # if(!is.numeric(tree[2,4])) tree<-read.fwf(file=url,widths=c(7,6,rep(6,10)),fill=TRUE,skip=3,blank.lines.skip=TRUE) #usually FALSE #if(!is.numeric(tree[1,3])) tree<-read.fwf(file=url,widths=c(7,5,rep(6,10)),fill=TRUE,skip=4,blank.lines.skip=TRUE) #usually FALSE #if(!is.numeric(tree[1,3])) tree<-read.fwf(file=url,widths=c(7,5,rep(6,10)),fill=TRUE,skip=5,blank.lines.skip=TRUE) #usually FALSE tree<-read.fwf(file=url,widths=widths0,fill=TRUE,skip=skip0,blank.lines.skip=TRUE) #slightly different format when long if(class(tree[,2])=="factor") {widths0<- c(8,4,rep(6,10)); tree<-read.fwf(file=url,widths=widths0,fill=TRUE,skip=skip0,blank.lines.skip=TRUE) } tree[,1]<-factor(tree[,1]) levels(tree[,1])<-sub(" +$", "",levels(tree[,1])); #check for oddball input formats temp<-is.na(tree[,1]) #typically 0 and next segment is not invoked if(sum(temp)>0) tree<-tree[!temp,] # if (sum(temp)>1) {h<-readLines(url); # h<-h[!temp]; # write.table(h,file="d:/temp/tree.txt",quote=FALSE,col.names=FALSE,row.names=FALSE); # tree<-read.fwf(file="d:/temp/tree.txt",widths=c(8,4,rep(6,10)),fill=TRUE,skip=3,blank.lines.skip=TRUE) # } #tests for oddball format #class(tree$V4)<-"numeric" #change from integer #tree<-tree[!is.na(tree[,1]),] #remove NA values in ID if any; usually not test3=sapply(tree,class)[2:12]; if (sum(!is.na(match(test3,"factor")))>0) { fred=readLines(url); fred=fred[!(fred=="")]; fred=fred[4:length(fred)]; n=nchar(fred);treeid=substr(fred,1,8);treeid=gsub(" +$","",treeid) fred=substr(fred,9,n) fred=gsub("-9999"," -99 ",fred) write.table(fred,"temp.dat",quote=FALSE,row.names=FALSE,col.names=FALSE) tree=read.table("temp.dat",fill=TRUE) tree[tree== -99]= -9999 tree=data.frame(treeid,tree) } #check for trailing 999 markers; very annoying; also -9999 trailers n<-nrow(tree) temp.marker<-(tree[2:n,1]==tree[1:(n-1),1]) temp.marker<-c(temp.marker,FALSE) temp.marker<-!temp.marker test1<- apply( tree[temp.marker,3:12],1,min,na.rm=TRUE) temp.trailer1<- (test1== -9999) id1<-tree[temp.marker,1][temp.trailer1] test2<- apply( tree[temp.marker,3:12],1,max,na.rm=TRUE) temp.trailer2<- (test2== 999) id2<-tree[temp.marker,1][temp.trailer2] temp.trailer<- ((tree[temp.marker,3:12]==999)|(tree[temp.marker,3:12]== -9999)) &!is.na(tree[temp.marker,3:12]) tree[temp.marker,3:12][temp.trailer]<-NA #usually results in substitutions ##IF MIXED if ( (sum(temp.trailer1)>0) & (sum(temp.trailer2)>0) ) trailer.verify<-0 if ( !(sum(temp.trailer1)>0) & (sum(temp.trailer2)>0) ) trailer.verify<-0 print(paste(loc,trailer.verify)) if (trailer.verify ==0) { temp<- !is.na(match( tree[,1], id2)) tree[temp,3:12]<-10* tree[temp,3:12]} ### m<-length(levels(tree[,1])) tree<-cbind(tree,c(rep(0,n))) #make dummy column line.tree.no<-match(tree[,1],levels(tree[,1])) #this is index for tree year.start<-tapply(tree[,2],tree[,1],min) #NOTE: this doesn't work if ID is 8-digits (nv514) line.start<-year.start[line.tree.no] #this gives v- vector tree[,13]<-line.start #tree<-cbind(tree[gl(n,1,10*n),1],tree[gl(n,1,10*n),2],tree[gl(n,1,10*n),13],stack(tree[1:nrow(tree),3:12])[,1] );##reshapes NGDC to long form tree<-data.frame(tree[gl(n,1,10*n),1],tree[gl(n,1,10*n),2],tree[gl(n,1,10*n),13],stack(tree[1:nrow(tree),3:12])[,1] );##reshapes NGDC to long form temp2<-t(array(rep(1:10,n),dim=c(10,n))); tree<-cbind(tree,c(temp2));##sets tag for column of NGDC entry to add back in long format #reshape into long format with dim = n*10 x 9 #one column keeps indicator for potential reconstruction #id<-as.character(tree[,1])#scrapped tree<-data.frame(tree[,1],tree[,2]+tree[,5]-1,tree[,2]+tree[,5]-tree[,3],tree[,4]); dimnames(tree)[[2]]<-c("id","year","age","rw") #use NA removal to remove trailing values on first and last records, but not missing values with na.marker temp<-!is.na(tree[,4]) tree<-tree[temp,]; #set negative values as NA temp<-(tree[,4]<0) if (sum(temp)>0) { verify<-tree[temp,]; temp1<- (verify[,4] == -999); verify.NA<-verify[temp1,]; verify.negative<-verify[!temp1,]; tree[temp,4]<-NA } else {verify.NA <-NULL; verify.negative<-NULL } #order by tree and by year order.tree<-order(tree[,1],tree[,2]); make.rwl<-tree[order.tree,]; #possibly put into dataframe and edit column names } make.rwl } #tree<-make.rwl("russ022w") make.rwl_new =function(url,skip0=0,widths0 =c(8,4,rep(6,10))) { trailer.verify<- 999 trailing.marker<- (-9999) h<-try(readLines(url,n=5)) if (class(h)=="try-error") make.rwl<-NA else { tree<-read.fwf(file=url,widths=widths0,fill=TRUE,skip=skip0,blank.lines.skip=TRUE) levels(tree[,1])<-sub(" +$", "",levels(tree[,1])); temp<-is.na(tree[,1]) #typically 0 and next segment is not invoked if(sum(temp)>0) tree<-tree[!temp,] (test3=sapply(tree,class)[2:12]); if (sum(!is.na(match(test3,"factor")))>0) { fred=readLines(url); fred=fred[!(fred=="")]; fred=fred[4:length(fred)]; n=nchar(fred);treeid=substr(fred,1,8);treeid=gsub(" +$","",treeid) fred=substr(fred,9,n) fred=gsub("-9999"," -99 ",fred) write.table(fred,"temp.dat",quote=FALSE,row.names=FALSE,col.names=FALSE) tree=read.table("temp.dat",fill=TRUE) tree[tree== -99]= -9999 tree=data.frame(treeid,tree) } #check for trailing 999 markers; very annoying; also -9999 trailers id=levels(tree[,1]);K=length(id);K #1022 names(tree)[1:2]=c("id","year") long=NULL for(i in 1:K) { temp=(tree[,1]== levels(tree[,1])[i]) test=tree[temp,] A=outer(test[,2], 0:9, function(x,y) x+y) working=cbind( c(t(A)), c(t(test[,3:12])) ) M=nrow(working);M temp1=!is.na(match(1:M,1:10)) & is.na(working[,2]);sum(temp1) temp2=!is.na(match(1:M,(M-9):M )) & (is.na(working[,2])|(working[,2]== -9999)) ;sum(temp2) working=working[!temp1&!temp2,] working=data.frame( working); working$id=levels(tree[,1])[i] long=rbind(long,working) } long=long[,c(3,1,2)];names(long)=c("id","year","rw") make.rwl_new=long make.rwl_new } } #tree<-make.rwl("russ022w") #save(tree,file="d:/climate/data/briffa.2008/TornFin.rwl.tab") #collate #FUNCTION TO COLLATE LONG TO TIME SERIES FORM #this takes long-form and collates to time series #FUNCTION TO COLLATE LONG TO TIME SERIES FORM #this takes long-form and collates to time series #FUNCTION TO COLLATE LONG TO TIME SERIES FORM #this takes long-form and collates to time series collate<-function(tree,START=1) { treef<-factor(tree[,1]) M<-length(levels(treef)) recon<-NULL name<-NULL for (j in 1:M) { temp<-(tree$id==levels(treef)[j])&(tree$year>START) if (sum(temp)>0) { treetest<-tree[temp,]; treetemp<-ts ( rep(NA,max(treetest[,2])- min(treetest[,2])+1), start=min(treetest[,2]), end=max(treetest[,2]) ) ; test5<-match(treetest[,2],c(tsp(treetemp)[1]:tsp(treetemp)[2])) treetemp[test5]<-treetest[,4] ; recon<-ts.union(recon,treetemp); name<-c(name,j) } } dimnames(recon)[[2]]<- sub(" +$", "", levels(treef)[name])#removes trailing blanks collate<-recon collate } #treecalc treecalc<-function(url,long=FALSE,skip0=3) { treecalc<-make.rwl(url,long=FALSE,skip0=3) trecalc} #this is just a change of name to reconcile with older scripts #note: this includes some quality control checking on NGDC data - #the tag verify2 keeps track of few negative values, of which there are a few #the tag start0 and end0 keeps track of start-dates and end-dates. #some "floating series" intentionally have start-dates way into the future #a couple of series had RES as well as STD series and the RES data was manually deleted #these entries are not relevant to the MBH subsets # see script "ngdc.qc" ##make.chron.are make.chron.ars<-function(loc.in) { h<-readLines(loc.in,n=-1) start0<-as.numeric(substr(h[2],13,18)) length0<-as.numeric(substr(h[2],4,8)) h<-h[3:length(h)] fred<-as.numeric (substr(h,22,24)) N<-length(h) test<-c(1:N)[is.na(fred)] #this looks for first alphanumeric header h<-h[1:(min(test)-1)] write.table(h,file="d:/temp/h.txt",row.names=FALSE,quote=FALSE) g<-read.fwf("d:/temp/h.txt",skip=1, widths=rep( c(4,2),13)) g<-g[,seq(1,25,2)] g<-c(t(g)) make.chron.ars<-ts(g,start=start0,end=start0+length0-1) make.chron.ars } ### update details update.details<-function(loc,url0="ftp://ftp.ncdc.noaa.gov/pub/data/paleo/treering/measurements") { if(!(url0=="ftp://ftp.ncdc.noaa.gov/pub/data/paleo/treering/updates")) url0<-file.path(url0,countrycode(loc)) url<-file.path(url0,paste(loc,"rwl",sep=".")) h<-try(readLines(url,n=5)) if (class(h)=="try-error") h<-readLines(file.path("ftp://ftp.ncdc.noaa.gov/pub/data/paleo/treering/chronologies",countrycode(loc),paste(loc,"crn",sep=".")) ,n=5) dummy<-details[1,] dummy[1,]<-NA dummy$id<-loc dummy$location<-sub(" +$", "",substr(h[1],10,40)) dummy$type<-substr(h[1],62,65) dummy$long<-as.numeric(substr(h[2],52,57)) dummy$long<-dummy$long/100 dummy$lat<-as.numeric(substr(h[2],47,51)) dummy$lat<-dummy$lat/100 dummy$altitude<-as.numeric(substr(h[2],41,44)) dummy$altitude<-dummy$altitude dummy$start<-as.numeric(substr(h[2],67,71)) dummy$start<-dummy$start dummy$end<-as.numeric(substr(h[2],72,76)) dummy$end<-dummy$end dummy$author<-sub(" +$", "",substr(h[3],10,50)) update.details<-dummy update.details } country.out<-function(id) { temp<-match(substr(id,1,4),country.list[,1]) if(is.na(temp)) country.out<-"northamerica" else country.out<- country.list[temp,2] if ((substr(id,1,4)=="cana")|(substr(id,1,4)=="mexi")) country.out<-"northamerica" country.out } ##CHECK MEASUREMENT CODE check.rwl<-function(loc,long=FALSE,skip0=3,url0="ftp://ftp.ncdc.noaa.gov/pub/data/paleo/treering/measurements") { trailer.verify<- 999 trailing.marker<- (-9999) if(!(url0=="ftp://ftp.ncdc.noaa.gov/pub/data/paleo/treering/updates")) url0<-file.path(url0,countrycode(loc)) url<-file.path(url0,paste(loc,"rwl",sep=".")) h<-try(readLines(url,n=5)) if (class(h)=="try-error") check.rwl<-NA else { if( substr(h[4],6,6)=="-") long<-TRUE #if (long) widths0<- c(8,4,rep(6,10)) ##else widths0<- c(7,5,rep(6,10)) test<-as.numeric(substr(h,9,12)) skip0<-sum(is.na(test)) # if(!is.numeric(tree[2,4])) tree<-read.fwf(file=url,widths=c(7,6,rep(6,10)),fill=TRUE,skip=3,blank.lines.skip=TRUE) #usually FALSE #if(!is.numeric(tree[1,3])) tree<-read.fwf(file=url,widths=c(7,5,rep(6,10)),fill=TRUE,skip=4,blank.lines.skip=TRUE) #usually FALSE #if(!is.numeric(tree[1,3])) tree<-read.fwf(file=url,widths=c(7,5,rep(6,10)),fill=TRUE,skip=5,blank.lines.skip=TRUE) #usually FALSE tree<-read.fwf(file=url,widths=widths0,fill=TRUE,skip=skip0,blank.lines.skip=TRUE) #slightly different format when long if(class(tree[,2])=="factor") {widths0<- c(8,4,rep(6,10)); tree<-read.fwf(file=url,widths=widths0,fill=TRUE,skip=skip0,blank.lines.skip=TRUE) } tree[,1]<-factor(tree[,1]) levels(tree[,1])<-sub(" +$", "",levels(tree[,1])); #check for oddball input formats temp<-is.na(tree[,1]) #typically 0 and next segment is not invoked if(sum(temp)>0) tree<-tree[!temp,] #check for trailing 999 markers; very annoying; also -9999 trailers n<-nrow(tree) temp.marker<-(tree[2:n,1]==tree[1:(n-1),1]) temp.marker<-c(temp.marker,FALSE) temp.marker<-!temp.marker test1<- apply( tree[temp.marker,3:12],1,min,na.rm=TRUE) temp.trailer1<- (test1== -9999) id1<-tree[temp.marker,1][temp.trailer1] test2<- apply( tree[temp.marker,3:12],1,max,na.rm=TRUE) temp.trailer2<- (test2== 999) id2<-tree[temp.marker,1][temp.trailer2] temp.trailer<- ((tree[temp.marker,3:12]==999)|(tree[temp.marker,3:12]== -9999)) &!is.na(tree[temp.marker,3:12]) tree[temp.marker,3:12][temp.trailer]<-NA #usually results in substitutions ##IF MIXED if ( (sum(temp.trailer1)>0) & (sum(temp.trailer2)>0) ) trailer.verify<-0 if ( !(sum(temp.trailer1)>0) & (sum(temp.trailer2)>0) ) trailer.verify<-0 check.rwl<-trailer.verify } check.rwl }