#SOME UTILITIES #make df from ts df=function(x,lab="O18") {work=data.frame(year=tsp(x)[1]:tsp(x)[2],x=as.numeric(x)); names(work)[2]=lab;return(work)} scalex=function(A) ts(scale(A),start=tsp(A)[1]) ts.unionx=function (A,y) {D=ts.union(A,y); dimnames(D)[[2]][1:ncol(A)]=c(dimnames(A)[[2]],as.character(bquote(y))) return(D) } circledist =function(x,R=6372.795) { #fromlat,fromlong, lat,long pi180=pi/180; y= abs(x[4] -x[2]) y[y>180]=360-y[y>180] y[y<= -180]= 360+y[y<= -180] delta= y *pi180 fromlat=x[1]*pi180; tolat=x[3]*pi180; tolong=x[4]*pi180 theta= 2* asin( sqrt( sin( (tolat- fromlat)/2 )^2 + cos(tolat)*cos(fromlat)* (sin(delta/2))^2 )) circledist=R*theta circledist } redblueplot=function(x) { year=c(time(x)) K=length(year) plot(x,type="l",axes=FALSE,xlab="",ylab="",add) work= x; work[work<0]=0 polygon(xy.coords(x=c(year,rev(year)),y=c(work,rep(0,K))),col="red",border=NA) work= x; work[work>0]=0 polygon(xy.coords(x=c(year,rev(year)),y=c(work,rep(0,K))),col="blue",border=NA) axis(side=2,las=1); axis(side=1); box() } average=function(x,K=30,del=0) { #average of time series in steps K with del being offset to even division y= K* floor( (del+c(time(x))) /K) -del return(ts( unlist( tapply(x, factor(y) ,mean,na.rm=T) ) , start= (K/2)+ min(y) ,freq=1/K)) } tag=function(A) { B=data.frame(A); row.names(B)=paste("#",row.names(B)); return(B) } get_pangaea=function(id,n=100) { loc=paste("http://doi.pangaea.de/10.1594/PANGAEA.",id,"?format=textfile",sep="") download.file(loc,dest) fred=readLines(dest,n=n) index=min((1:length(fred))[fred=="*/"]) work=try( read.table(dest,skip =index, sep="\t",header=TRUE)) if(class(work)=="try-error") { work=read.table(dest,skip =index+1, sep="\t",header=FALSE) m=scan(dest,skip=index,n=ncol(work),what="",sep="\t") names(work)=m} m=names(work) if( length(grep("BP",m))>0) { m[min(grep("BP",m))]="bp" names(work)=m work$bp=1000*work$bp} return(work) } #resolution - of a data frame resol=resolution=function(A) { work=NA if ( length( grep("bp",names(A)))>0) work= (diff(range(A$bp))+1)/nrow(A) if ( length( grep("year",names(A)))>0) work= (diff(range(A$year))+1)/nrow(A) return(work) } decadal=function(x) ts( unlist( tapply(x, factor(10* floor( c(time(x))/10)) ,mean,na.rm=T) ) , start= 5+ min(10*floor( c(time(x))/10) ) ,freq=.1) ##PLOT IN DENSE LATTICE plotf3=function(x,k,Nrow=4,Ncol=2,ylab="",ylim0=c(-2,2),col0=c("grey80","black"),lwd0=c(1,2),xlim0=c(300,2000),tag=NULL,verbose=FALSE,showsmooth=TRUE,main="", start.med=900,end.med=1100,start.mod=1900,end.mod=2020,leftmar=5) { column=1+floor( (k-1)/Nrow) row=k-Nrow*(column-1) par0= c(3,0,0,0) if(column==1) par0[2]=leftmar else par0[2]=0 if(row==Nrow) par0[1]=leftmar else par0[1]= 0 if(row==1) par0[3]=leftmar else par0[3]=0 if(column==Ncol) par0[4]=1 else par0[4]=0 par(mar=par0) plot(x,type="l",col=col0[1],xlim=xlim0,ylim=ylim0,xaxs="i",axes=FALSE,ylab=ylab,xlab="",lwd=lwd0[1],main=main) usr <- par("usr") rect(usr[1],usr[3],start.med ,usr[4],border=NA,col="lavender") # - the part used to fit the model rect(start.med ,usr[3],usr[2],end.med,border=NA,col="lemonchiffon") # - the part used to make the forecast rect(end.med ,usr[3],usr[2],end.med,border=NA,col="lavender") # - the part used to make the forecast rect(start.mod ,usr[3],usr[2],end.med,border=NA,col="lemonchiffon") # - the part used to make the forecast rect(end.mod ,usr[3],usr[2],end.med,border=NA,col="lavender") # - the part used to make the forecast lines(x,col=col0[1],lwd=lwd0[1]) if(showsmooth) lines(f(x),col=col0[2],lwd=lwd0[2]) grid() if(!is.null(tag)) text(xlim0[1],.9*ylim0[2],pos=4,font=2,tag) abline(h=0,lty=3) if(row==Nrow) axis(side=1) else axis(side=1,labels=FALSE) if (column==1) axis(side=2,at= -2:2,las=1) else axis(side=2,labels=FALSE) text(300,1.7,ylab,pos=4,font=2) box() if(verbose) return(list(row=row,col=col,par=par0)) } ##PLOT IN DENSE LATTICE plotf=function(x,k,Nrow=4,Ncol=2,ylab="",ylim0=c(-2,2),col0=c("grey80","black"),lwd0=c(1,2),xlim0=c(300,2000),tag=NULL,verbose=FALSE,showsmooth=TRUE) { column=1+floor( (k-1)/Nrow) row=k-Nrow*(column-1) par0= c(3,0,0,0) if(column==1) par0[2]=4 else par0[2]=0 if(row==Nrow) par0[1]=3 else par0[1]= 0 if(row==1) par0[3]=3 else par0[3]=0 if(column==Ncol) par0[4]=1 else par0[4]=0 par(mar=par0) plot(x,type="l",col=col0[1],xlim=xlim0,ylim=ylim0,axes=FALSE,ylab="",xlab="",lwd=lwd0[1]) if(showsmooth) lines(f(x),col=col0[2],lwd=lwd0[2]) grid() if(!is.null(tag)) text(xlim0[1],.9*ylim0[2],pos=4,font=2,tag) abline(h=0,lty=3) if(row==Nrow) axis(side=1) else axis(side=1,labels=FALSE) if (column==1) axis(side=2,at= seq( floor(ylim0[1]),floor(ylim0[2]),2),las=1) else axis(side=2,labels=FALSE) text(300,1.7,ylab,pos=4,font=2) box() if(verbose) return(list(row=row,col=col,par=par0)) } #Nrow0=5 #ylim1=c(-2,2) #nf=layout(array(1:10,dim=c(5,2)),widths=c(1.3,1.1),heights=c(1.3,1,1,1,1.3)) #plotf(proxy[proxy$bp<1950,],k=1,ylim=m+ylim1,start.med=1950-900,end.med=1950-1100,xlim=c(2000,-50)); #text(1950,m+d,font=2,pos=4,"Siple Dome") jones<-function(lat,long) { i<-18-floor(lat/5); j<- 36+ceiling(long/5); jones<-72*(i-1) + j jones } ##jones.inv jones.inv<-function(k) { j<- k%%72 temp<-k-j i<-(temp/72)+1 long<-(j-36)*5-2.5 lat<-(18-i)*5+2.5 jones.inv<-c(lat,long) jones.inv } ts.annavg=function(X,M=6) { if(class(X)=="ts") {start0<-floor(tsp(X)[1]);X<-ts.array(X)} else if( ((class(X)=="matrix")|(class(X)=="data.frame") ) &(ncol(X)==12)) start0<-as.numeric(row.names(X)[1]) else {X start0<-X[1,1]; X<-X[,2:13]} count<-apply(!is.na(X),1,sum);temp<-(count=start & time(x)<= (end+1) levels(anom)=unlist(tapply(x[temp],month[temp],mean,na.rm=TRUE)) anom= round(x- as.numeric(as.character(anom)),Round) return(anom) } ##annavg #CALCULATE ANNUAL AVERAGE FROM MONTHLY TIME SERIES #even number of months annavg<-function(x) { x=window(x, start=ceiling(time(x)[1]) )#,end=floor(time(x)[length(x)]) ) y<- array(x,dim=c(12,length(x)/12)) test<-ts(apply(y,2,mean),start=tsp(x)[1]) return(test) } ##strip #FUNCTION TO LISTFILES AND REMOVE SUFFIXES strip<-function(h,n) { h<-unlist(strsplit(h, "\\.")) h<-array(h,dim=c(n,length(h)/n)) h<-t(h) strip<-h[,1] strip } ##split split<-function(h,char) { y<-unlist(strsplit(h,paste("\\",char,sep=""))) y<-array(y,dim=c(2,length(y)/2)) split<-t(y) split<-split[,1] split } ##smooth and pad: requires no gaps filter.combine.pad=function(x,a=truncated.gauss.weights(21),M=NA,method="norm") { temp<-!is.na(x) w<-x[temp] N<-length(w) if(is.na(M)) M<-trunc (length(a)/2) if(method=="norm") w<-c (rep(mean(w[1:M]),M),w,rep(mean(w[(N-M+1):N]),M) ) #if(method=="reflect") w<-c ( w[M:1],w,w[M:(N-M+1):N]) if(method=="mannomatic") {w<-c( c (w[1]-(w[(M+1):2]-w[1])),w, c(w[N]- (w[(N-1):(N-M)]-w[N]) ) ) } if(method=="mann2") w<-c (rep(w[1],M),w,rep(w[N],M) ) if(method=="linear"){ fm1=lm (w[1:M]~I(1:M)); fm2= lm (w[N+1-(1:M)]~I(N+1-(1:M))); w<-c (fm1$coef[1]+fm1$coef[2]*( -(M-2):0) ,w,fm2$coef[1]+fm2$coef[2]*( N+M-((M-1):1)) ) } y<-filter(w,a,method="convolution",sides=2) y<-y[(M+1): (length(y)-M)] z<- rep(NA,length(x)) z[temp]<-y if(class(x)=="ts") z<-ts(z,start=tsp(x)[1],freq=tsp(x)[3]) filter.combine.pad<-cbind(x,z) #filter.combine<-y[,2] filter.combine.pad } f=function(x) filter.combine.pad(x, truncated.gauss.weights(21))[,2] ##gaussian.filter.weights gaussian.filter.weights<-function(N,year) { #N number of points; year - is number of years sigma<-year/6 i<-( (-(N-1)/2):((N-1)/2) ) /sigma w<- ((2*pi)^-0.5) * exp(-0.5* i^2) gaussian.filter.weights<-w/sum(w) gaussian.filter.weights } #GAUSSIAN FILTER #see http://www.ltrr.arizona.edu/~dmeko/notes_8.pdf #The appropriate Gaussian distribution therefore has standard deviation # s = ? / 6, # where ? is the desired wavelength at which the amplitude of frequency response is 0.5. The # filter weights are obtained by sampling the pdf of the standard normal distribution at # values i/s where i={-(N-1)/s, ..., 0, .. (N-1)/s } # MEKo has values to verify for case of N=21, year=10 #this is verified ##truncated.gauss.weights truncated.gauss.weights<-function(year) { a<-gaussian.filter.weights(2*year+1,year) temp<-(a>0.05*max(a)) a<-a[temp] a<-a/sum(a) truncated.gauss.weights<-a truncated.gauss.weights } maketable=function(X,align=-999,widths=-999,out= "d:/temp/temp.txt"){ N=nrow(X);M=ncol(X); if (align[1]== -999) right=rep(TRUE,M) else right=align code=rep("",M);code[right]="

" if(widths[1]== -999) pct= floor(100/M) else pct=widths write.table("",file=out,col.names=FALSE,row.names=FALSE,quote=FALSE) for (j in 1:N){ write.table("",append=TRUE,file=out,row.names=FALSE,col.names=FALSE,quote=FALSE) for(i in 1:M){ write.table( paste("",sep=""),append=TRUE,file=out,row.names=FALSE,col.names=FALSE,quote=FALSE) } write.table("",append=TRUE,file=out,row.names=FALSE,col.names=FALSE,quote=FALSE) } write.table("
",code[i],X[j,i],"
", append=TRUE,file=out,row.names=FALSE,col.names=FALSE,quote=FALSE) } #TO EXTEND SERIES BY PERSISTENCE extend.persist<-function(tree) { extend.persist<-tree for (j in 1:ncol(tree) ) { test<-is.na(tree[,j]) end1<-max ( c(1:nrow(tree)) [!test]) test2<-( c(1:nrow(tree))>end1) & test extend.persist[test2,j]<-tree[end1,j] } extend.persist } make.ts=function(A,h=identity,interp =TRUE,freq=1) { names(A)=c("year","proxy") A=A[!is.na(A$year),] if(! interp) { time0=min(A$year):max(A$year) x=ts(A$proxy[match(time0,A$year)],start=min(A$year)) return(x) } if(interp) { h=approxfun(A$year,A$proxy) start0=ceiling(min(A$year));end0=floor(max(A$year)) x=ts( h(start0:end0),start= start0) return(x) } } make.tsm=function(A) { names(A)=c("year","month","proxy") A=A[!is.na(A$year),] target= sort(outer( min(A$year):max(A$year),1:12, function(x,y) 100*x+y) ) A$time=100*A$year+A$month target=data.frame(time=target) target$x=NA target$x=A$proxy[match(target$time,A$time)] y=ts(target$x,start=min(A$year),freq=12) return(y) } #4. FUNCTION TO RETURN VERIFICATION PERIOD STATISTICS #calibration and verificaiton are both vectors of length 2 with start and end period #estimator and observed are time series which may have different start and end periods use0="pairwise.complete.obs" verification.stats<-function(estimator,observed=sparse,calibration=c(1902,1980),verification=c(1854,1901)) { combine<-ts.union(estimator,observed) index.cal<- (calibration[1]-tsp(combine)[1]+1):(calibration[2]-tsp(combine)[1]+1) index.ver<-(verification[1]-tsp(combine)[1]+1):(verification[2]-tsp(combine)[1]+1) xmean<-mean ( combine[index.cal,"observed"],na.rm=TRUE ) mx<-mean( combine[index.ver,"observed"],na.rm=TRUE ) # test.ver<-cov ( combine[index.ver,],use=use0) test<-cov(combine[index.cal,],use=use0) R2.cal<-(test[1,2]*test[2,1])/(test[1,1]*test[2,2]) R2.ver<-(test.ver[1,2]*test.ver[2,1])/(test.ver[1,1]*test.ver[2,2]) RE.cal<-1-sum( (combine[index.cal,"estimator"]- combine[index.cal,"observed"]) ^2,na.rm=TRUE)/sum( (xmean- combine[index.cal,"observed"]) ^2,na.rm=TRUE) RE.ver<-1-sum( (combine[index.ver,"estimator"]- combine[index.ver,"observed"]) ^2,na.rm=TRUE)/sum( (xmean- combine[index.ver,"observed"]) ^2,na.rm=TRUE) RE.risk<- - sum( combine[index.ver,"estimator"]^2,na.rm=TRUE)/sum( (xmean- combine[index.cal,"observed"]) ^2,na.rm=TRUE) RE.bias<- 2*length(index.ver) * mean(combine[index.ver,"estimator"],na.rm=TRUE)*mean(combine[index.ver,"observed"],na.rm=TRUE)/sum( (xmean- combine[index.cal,"observed"]) ^2,na.rm=TRUE) RE.covar<- 2* (length(index.ver)-1)* cov(combine[index.ver,],use="pairwise.complete.obs")[1,2]/sum( (xmean- combine[index.ver,"observed"]) ^2,na.rm=TRUE) CE<- 1-sum( (combine[index.ver,"estimator"]- combine[index.ver,"observed"]) ^2,na.rm=TRUE)/sum( (mx- combine[index.ver,"observed"]) ^2,na.rm=TRUE) test<-sign(diff(combine[index.ver,],lag=1)) temp<- ( test[,1]*test[,2] ==1)&!is.na(test[,1])&!is.na(test[,2]) sign.test<-sum(temp)/length(index.ver) test1<-scale(combine[index.ver,],scale=FALSE) test1<-test1[!is.na(test1[,1])&!is.na(test1[,2]),] temp<-sign(test1) temp<-(temp[,1]*temp[,2]==1) test2<-test1[,1]*test1[,2] if(sum(!temp) >0) prod.test<- ( mean(test2[temp])+mean(test2[!temp]))/ sqrt( var(test2[temp])/sum(temp) + var(test2[!temp])/sum(!temp) ) else prod.test<-NA verification.stats<-data.frame(RE.cal,RE.ver,R2.cal,R2.ver,CE)#,sign.test,prod.test,RE.risk,RE.bias,RE.covar) names(verification.stats)<-c( "RE.cal","RE.ver","R2.cal","R2.ver","CE")#,"sign.test","prod.test","RE.risk","RE.bias","RE.covar") verification.stats } trend =function(x,method="AR1") { #returns trend per decade for time series if(!class(x)=="ts") x=ts(x) if (method=="ols") { fm= lm(x~ I(c(time(x))/10)) work= round( c(unlist(summary(fm)$coef[2,1:3]),fm$df) [c(1,4,2,3)],4) names(work)=c("trend","df","se","t") return(work) } if (method=="AR1") { fm= lm(x~ I(c(time(x))/10)) se.ols=summary(fm)$coef[2,2] N=length(x) (trend.obs= fm$coef[2])# 1.116659 fm.arima=arima(fm$residuals,order=c(1,0,0));fm.arima r=fm.arima$coef[1] neff= N * (1-r)/(1+r) ;neff # 102.2358 se.obs= sqrt((N-2)/(neff-2))* se.ols; se.obs # 0.071758 work= round(c(trend=unlist(trend.obs), neff=neff, se=se.obs,t=trend.obs/se.obs),4) names(work)=c("trend","neff","se","t") return(work) } } read.fwf2=function(working,widths,colClasses="character") { widths1=c(0,cumsum(widths));K=length(widths1) widths1=cbind(widths1[1:(K-1)]+1,widths1[2:K]) K=K-1 N=length(working) X= array(NA,dim=c(N,K)); X=data.frame(X) for(i in 1:K) X[,i]=substr(working,widths1[i,1],widths1[i,2]) temp=colClasses=="numeric" for (i in (1:K)[temp]) X[,i]=as.numeric(X[,i]) X[,temp][X[,temp]==na_char]=NA for (i in (1:K)[!temp]) X[,i]=gsub(" +$","",X[,i]) return(X) } #this extracts an underlying decadal series from a linear smoothed time series extractf=function(x,units=1,tol=2) { r1=range(time(x)) year=c(time(x)) y=diff(x) z=round(diff(y),tol) temp=!(z==0)&!is.na(z) time0=sort(c(r1,time(z)[temp]-1)) rx=round(x,2) temp=!is.na(match(year, time0));sum(temp) test=data.frame(time(x)[temp],rx[temp]/units) names(test)=c("year","proxy") #sum(!is.na(test$proxy[test$year>=1850])) #9 extractf=test# list(data=test,x=x,y=y,z=z,r1=r1) extractf } ##k=202;units=1;x=units*g(mannorig[[details$old[k] ]]) #collects according to X$year cumulative to end irrmatch=function(A,y) { gridded=data.frame(c(time(y)),y);names(gridded)=c("year","target") h= function( x) if(x[1]<=max(A$year)) min(A$year[ A$year>=x[1] ] ) else NA gridded$d= apply(gridded,1,h) working=tapply(gridded$target,gridded$d,mean,na.rm=T) test=match(A$year,as.numeric(names(working)));temp=!is.na(test) A$grid=NA;A$grid[temp]=working[test[temp]] irrmatch=A irrmatch} ssq=function(x){ssq=sum(x^2,na.rm=TRUE);ssq} twoleg=function(x) { N=length(x) stat=rep(NA,N) temp=!is.na(x) index=(1:N)[temp] for (i in index[2:(length(index)-1)]) { z=data.frame(x, abs( (1:N)-i), (1:N)-i) ;names(z)=c("y","abs","x") fm=lm(y~abs+x,data=z) stat[i]=ssq(fm$residuals) } index0=(1:N)[(stat==min(stat,na.rm=TRUE))&!is.na(stat)] z=data.frame(x, abs( (1:N)-index0), (1:N)-i) ;names(z)=c("y","abs","x") twoleg=lm(y~abs+x,data=z) twoleg } ########## ##8. BIWEIGHT MEAN ################ MAD= function(x) median ( abs( x-median(x,na.rm=TRUE))) biweight.mean=function(x) { x=x[!is.na(x)] K=length(x) ; if(K<4) return(NA) else { w=rep(0,K); delta=xnew=median(x); S=MAD(x); if(S==0) return(xnew) else { iter=1; tol=.01;c=6 while (delta 0) { atan(y/x) } else { if(x < 0 & y != 0) { atan(y/x) + sign(y) * pi } else { if(x < 0 & y == 0) { pi } else { if(y != 0) { (sign(y) * pi)/2 } else { NA } } } } } download_html <- function(url) { download.file(url, "temp.html"); html_handle <- file("temp.html", "rt"); html_data <- readLines(html_handle); close(html_handle); unlink("temp.html"); return(html_data); } url_escape <- function(string) { string <- gsub(" ", "%20", string) string <- gsub("#", "%23", string) string <- gsub("&", "%26", string) string <- gsub("=", "%3D", string) string <- gsub("\\?", "%3F", string) return(string); }