#REPLICATION OF JONES ET AL (1998)  
   ##VERIFY TEMPERATURE CALCS IN JONES ET AL 1998

	col1<-c("salmon","black","black")
	col2<-c("black","red","blue")
	a<-truncated.gauss.weights(50)
	use0<-"pairwise.complete.obs"

###################3
#FUNCTIONS
######################

    seasavg<-function(x,NHindex,seasonindex) {
	n<-length(x)
	if(n > 12*trunc(n/12)) x<-c(x, rep(NA,12-( n-12*floor(n/12))))
	years<-length(x)/12
	x<-t (array(x,dim=c(12,years))) #148 12
	x<-ts(x,start=0,end=nrow(x)-1)
	if (NHindex) seasavg<-apply(x[,seasonindex],1,mean,na.rm=TRUE)
   		else { past<- seasonindex [(seasonindex>6)]; 
   	present<-seasonindex[(!seasonindex>6)];
   	y<-ts.union(x[,present],lag(x[,past],-1))
   	seasavg<-apply(y[1:(nrow(y)-1),],1,mean,na.rm=TRUE)
 		}
 	seasavg
 	}

   jones<-function(lat,long) {
	i<-18-floor(lat/5);
	j<- 36+ceiling(long/5);
	jones<-72*(i-1) + j
	jones
	}

   decadalaverage<-function (x) {
	n<-length(x)
	x<-c(x, rep(NA,10-( n-10*floor(n/10))))
	decades<-length(x)/10
	x<-array(x,dim=c(10,decades))
	decadalavg<-apply(x,2,mean,na.rm=TRUE)
	return(decadalavg)
	}

#####################
##A.  INPUT DATA
##########################
   #these use datasets collated elsewhere
	
   #LOAD COLLATED JONES DATA
	url<-"http://www.climateaudit.org/data"
	load("c:/climate/data/jones98/jser.tab") #this is previously assembled
	jser<-read.table(file.path(url,"jser.txt"),header=TRUE,sep="\t")
	name0<-c("year","Fenno","Urals","Jasper","Svalbard","CEng","CEur","Kameda.melt","Jacoby.treeline","Briffa.WUSA","Crete.O18","Tasmania.92","Lenca","Alerce","Law.Dome","GBR.5","Galapagos","New.Caledonia")
	dimnames(jser)[[2]]<-name0
	
   #NORMALIZE TO 1901-1950 per Jones 1998 Figure 3
   	#this is described on page 461 col 2
	m1<-apply(jser[902:951,],2,mean,na.rm=TRUE)
	sd1<-apply(jser[902:951,],2,sd,na.rm=TRUE)
	js<-jser
	for (k in 1:ncol(jser)) {
	  js[,k]<-(jser[,k]-m1[k])/sd1[k]
	  }
	#these were plotted and compared to Figure 1
	
   #INPUT LOCATION PARAMETERS OF JONES SITES
   	#from Jones Table 3
	season<-list(c(4:8),c(5:9),c(4:8),c(6:8),c(6:8),c(6:8),c(6:8),c(1:12),c(5:9),c(6:8),
	c(5:10),c(6:9),c(6:8),c(5:10),c(1:12),c(1:12),c(1:12)) 
		  	#SH dates are transposed by  6 months #taken into consideration laterseason,
			#Galapagos differs
	NHindex0<-c(rep(TRUE,10),rep(FALSE,7))

  #LOCATION OF CELLS
	#locations from Table 3 Jones et al 1998
	jones.loc<-read.table(file.path(url,"jones1998.location.txt"),sep="\t",header=TRUE);jones.loc

	#    X               Name   Lat   Long
	#1   1     N Fennoscandia  68.0   22.0
	#2   2            N Urals  66.0   65.1
	#3   3             Jasper  52.3 -117.0
	#4   4           Svalbard  79.0   15.1
	#5   5       Cent England  52.0    2.0
	#6   6     Cerntal Europe  46.5    8.0
	#7   7        S Greenland  64.9  -45.1
	#8   8         N Treeline  58.0  -95.1
	#9   9              W USA  44.9 -115.1
	#10 10   Crete, Greenland  68.9  -38.1
	#11 11           Tasmania -42.0  146.5
	#12 12              Lenca -41.5  -72.6
	#13 13             Alerce -41.2  -71.8
	#14 14           Law Dome -66.7  112.8
	#15 15 Great Barrier Reef -19.9  149.9
	#16 16          Galapagos   0.1  -91.0
	#17 17      New Caledonia -22.0  166.0


	loc<-rep(NA,17)
	for (k in 1:17) {
		loc[k]<-jones(jones.loc[k,3],jones.loc[k,4])
		}
	loc
  	#[1]  329  337  517  184  541  614  387  449  589  317 1938 1894 1894 2291 1578 1242 1654


   ##MAKE TEMPERATURE SUBSETS
     #LOAD RAW JONES DATA AND MAKE SUBSET FOR CORRELATIONS USING NEW CRU DATA
     #this is script to make stored table 
	#load("c:/climate/data/jones/hadcruv.tab") ###month 553 is 1902 Jan;
	#vjones98<-v[,loc]#1769x17
	#dim(vjones98); 
	#save(vjones98,file="c:/climate/data/jones98/vjones98.tab")

	#load("c:/climate/data/jones98/vjones98.tab")
	#w<-vjones98 #1769x17
	#compare<-array(rep(NA,148*17),dim=c(148,17))
	#for (k in 1:17) {
	#	compare[,k]<-seasavg (w[,k],NHindex0[k],season[[k]])
	#	}
	#compare<-ts(compare, start=1856,end=jser[nrow(jser),"year"])
	#compare<-cbind(1856:1994,compare)
	#tsp(compare) # 1856 1994    1
	#dimnames(compare)[[2]]<-name0
	#write.table(compare,file="c:/climate/data/jones98/jones98.temperature.txt",sep="\t")
	
	compare<-read.table(file.path(url,"jones98.temperature.txt"),header=TRUE,sep="\t")	

	rbar(compare[,1:10],10) #fails

  ##ALTERNATE TEMPERATURE DATASET
    #USING MBH CORRIGENDUM ARCHIVE - AN OLDER VERSION OF CRU DATASET PREVIOUSLY COLLATED
	#load("c:/climate/data/MBH98.2004/v.combined.tab")
	#dim(v)  # 1680 2592  #1854 to 1993 from Jones[1994]
	#vjones98<-v[,loc]#1769x17
	#save(vjones98,file="c:/climate/data/jones98/vjones98.MBH.tab")

   ##CORRELATION DATA IN J98 TABLE 4
	#this was manually transcribed
	table4<-read.table(file.path(url, "jones98.table4.txt"),header=TRUE,sep="\t")


#########################
### B. VERIFY REPORTED CORRELATIONS	
#################################
	corry<-array(rep(NA,10*17),dim=c(17,10))
	corry<-data.frame(corry)
	row.names(corry)<-dimnames(jser)[[2]][2:18]

	
	for (k in 1:17)  {
	  combine<-scale(cbind(jser[882:981,k+1],compare[(1881-1855):(1980-1855),k+1]))#this is correlation period 1881-1980 in Jones  
	   	# alternative to test: combine<-scale(ts.union (jser[,k],compare[,k]) )
	
	  corry[k,1]<-cor(combine,use="pairwise.complete.obs")[1,2]# 0.7372084 
	  corry[k,2]<-sum( ( apply(!is.na(combine),1,sum) >1 ) )
	  corry[k,3]<- sd(combine[,1],na.rm=TRUE)/sd(combine[,2],na.rm=TRUE)
	  decade<-array(rep(NA,10*2),dim=c(10,2)) #1, proxy, 2- jones
	  for (j in 1:2) {
		gtemp<-array(combine[,j],dim=c(10,10))
		decade[,j]<-apply(gtemp,2,mean,na.rm=TRUE)
		}
	  corry[k,4]<-cor(decade,use="pairwise.complete.obs")[1,2]# 0.73
	  corry[k,5]<-sum( ( apply(!is.na(decade),1,sum) >1 ) )
	  corry[k,6]<- sd(decade[,1],na.rm=TRUE)/sd(decade[,2],na.rm=TRUE)
	  corry[k,7]<-table4$cor[k]
	  corry[k,8]<-table4$cor.decadal[k] 
	  combine<-scale(cbind(jser[861:990,k+1],compare[(1860-1855):(1989-1855),k+1]))#this is correlation period 1881-1980 in Jones  
	   	# alternative to test: combine<-scale(ts.union (jser[,k],compare[,k]) )
	  corry[k,9]<-cor(combine,use="pairwise.complete.obs")[1,2]# 0.7372084 
  	decade<-array(rep(NA,13*2),dim=c(13,2)) #1, proxy, 2- jones
	  for (j in 1:2) {
		gtemp<-array(combine[,j],dim=c(10,13))
		decade[,j]<-apply(gtemp,2,mean,na.rm=TRUE)
		}
	  corry[k,10]<-cor(decade,use="pairwise.complete.obs")[1,2]# 0.73
	}
	names(corry)<-c("corr","count","sd.ratio","corr.decadal","decades","sd.decad.ratio","table4.cor","table4.decadal","cor2","cor.decadal.2")
	corry[,c(1,4,6,9,10)]<-round(corry[,c(1,4,6,9,10)],2)
	corry<-corry[,c(7,1,9,2:3,5,6,8,4,10)]
	corry

#                table4.cor corr cor2 count sd.ratio decades sd.decad.ratio table4.decadal corr.decadal cor.decadal.2
#Fenno                 0.79 0.74 0.70   100        1      10           0.92           0.80         0.74          0.68
#Urals                 0.83 0.61 0.55    65        1       9           0.95           0.92         0.00         -0.24
#Jasper                0.48 0.56 0.52    96        1      10           1.04           0.45         0.55          0.30
#Svalbard              0.08 0.27 0.29    80        1      10           0.47           0.38         0.73          0.74
#CEng                  0.84 0.80 0.75   100        1      10           0.97           0.80         0.92          0.86
#CEur                  0.90 0.94 0.93    99        1      10           1.06           0.83         0.91          0.88
#Kameda.melt           0.17 0.12 0.09   100        1      10           1.18          -0.28        -0.41         -0.32
#Jacoby.treeline       0.34 0.07 0.07    53        1       6           5.25           0.87         0.70          0.76
#Briffa.WUSA           0.60 0.35 0.35   100        1      10           0.93           0.79         0.74          0.60
#Crete.O18             0.30 0.42 0.43    86        1      10           0.56           0.49         0.15          0.47
#Tasmania.92           0.42 0.34 0.41   100        1      10           0.95           0.58         0.72          0.73
#Lenca                 0.36 0.14 0.11    87        1      10           0.37           0.55         0.28         -0.05
#Alerce                0.35 0.05 0.07    86        1      10           0.48           0.16         0.10          0.07
#Law.Dome              0.26 0.07 0.08    24        1       3           0.78           0.98         0.42          0.73
#GBR.5                 0.18 0.21 0.18    98        1      10           0.68           0.52         0.74          0.57
3Galapagos             0.39 0.13 0.15   100        1      10           1.48           0.16        -0.02         -0.05

	plot(corry$cor2,table4$cor,xlab="Calculated",ylab="Jones")
	abline(0,1)


#############################3
## C. NOW CALCULATE t-STATISTICS INSTEAD
###############################

  # FIRST BENCHMARK CORRELATIONS WITH REGRESSION CALCULATION IN ORDER TO DEMONSTRATE t-STATISTIC
	#the square root of the r.squared is the correlation

	jser<-ts(jser[,2:18],start=1000)
	compare<-ts(compare[,2:18],start=1856)
	k<-9
	  	combine<-ts.union(jser[,k],compare[,k])
		dates<-tsp(combine)
		z<-data.frame(scale(combine))
		names(z)<-c("jones","CRU")
		fm<-lm(jones~CRU,data=z[881:980,])
		summary(fm)$r.squared # 0.1477863
		cor(z[881:980,],use="pairwise.complete.obs")[1,2] # 0.3844298
		sqrt(summary(fm)$r.squared) #  0.3844298
			#the correlation can be extracted from linear regression, but regression also gives diagnostics

		index<-881:980
		m0<-apply(combine[index,],2,mean)
		sd0<-apply(combine[index,],2,sd)	
		z<-data.frame(scale(combine,center=m0,scale=sd0))
		apply(z[index,],2,mean) #0 0
		apply(z[index,],2,sd) #1 1
		names(z)<-c("jones","CRU")
		fm<-lm(jones~CRU,data=z[881:980,])
		summary(fm)$r.squared # 0.1477863
		cor(z[881:980,],use="pairwise.complete.obs")[1,2] # 0.3844298
		sqrt(summary(fm)$r.squared) #  0.3844298
		coef(fm)[2] #0.3844298 
		summary(fm)$coefficients[2,]
		#  Estimate   Std. Error      t value     Pr(>|t|) 
		# 3.844298e-01 9.325266e-02 4.122454e+00 7.853363e-05 

     
    ##NOW DO THIS STATISTICALLY
     #do this both with OLS and Newey West standard errors
	library(lmtest)
	index<-861:990   #do this from 1860 to 1989 #max available
	stat<-data.frame(array(rep(NA,17*8),dim=c(17,8)))
	row.names(stat)<-table4[,2]	
	names(stat)[1:7]<-c("coef.OLS","se.OLS","t.OLS","Pr.OLS","cor.fm","cor","dw")
	stat.decade<-stat

	for (k in c(1:13,15:17,14)) {
		combine<-ts.union(jser[,k],compare[,k])
		dates<-tsp(combine)
		m0<-apply(combine[index,],2,mean,na.rm=TRUE)
		sd0<-apply(combine[index,],2,sd,na.rm=TRUE)	
		combine<-scale(combine[index,])#,center=m0,scale=sd0)
		z<-data.frame(combine)
		names(z)<-c("jones","CRU")
		fm<-lm(jones~CRU,data=z)

		stat[k,1:4]<-summary(fm)$coefficients[2,]
		stat[k,5]<-sqrt(summary(fm)$r.squared)
		stat[k,6]<-cor(combine,use=use0)[1,2]
		stat[k,7]<-dwtest(fm)$statistic
	  	for (j in 1:2) { #make decadal averages 
			gtemp<-array(z[,j],dim=c(10,13)) #881, 1980
			decade[,j]<-apply(gtemp,2,mean,na.rm=TRUE)
			}
		decade<-scale(decade)
	 	decade<-data.frame(decade)
		names(decade)<-c("jones","CRU")

		fm.dec<-lm(jones~CRU,data=decade)
		stat.decade[k,1:4]<-summary(fm.dec)$coefficients[2,]
		stat.decade[k,5]<-sqrt(summary(fm.dec)$r.squared)
		stat.decade[k,7]<-dwtest(fm.dec)$statistic
	  	stat.decade[k,6]<-cor(decade,use="pairwise.complete.obs")[1,2]#  0.6464455 for k=9
		}
	stat[,1:7]<-round(stat[,1:7],2)
	stat<-cbind(name0[2:18],stat,table4[,4:6])
	write.table(stat,file="c:/climate/data/jones98/jones98.stat.txt",sep="\t")


################3
    coef.OLS se.OLS t.OLS Pr.OLS cor.fm  cor   dw X8  cor count sd.ratio
NFS     0.70   0.06 10.78   0.00   0.70 0.70 1.48 NA 0.79   100     0.65
NUR     0.46   0.08  5.62   0.00   0.55 0.55 1.56 NA 0.83   100     0.94
JAS     0.47   0.08  6.14   0.00   0.52 0.52 1.55 NA 0.48    96     0.66
SVA     0.26   0.09  2.79   0.01   0.29 0.29 0.77 NA 0.08    82       NA
ENG     0.75   0.06 12.96   0.00   0.75 0.75 1.94 NA 0.84   100     1.49
EUR     0.93   0.03 28.59   0.00   0.93 0.93 1.83 NA 0.90    99     1.39
SGR     0.09   0.09  0.98   0.33   0.09 0.09 0.39 NA 0.17   100     0.39
NTR     0.04   0.08  0.52   0.61   0.07 0.07 0.70 NA 0.34    75     0.34
WUS     0.33   0.08  4.01   0.00   0.35 0.35 2.01 NA 0.60   100     0.57
CRT     0.47   0.11  4.36   0.00   0.43 0.43 1.38 NA 0.30    99     1.09
TAS     0.41   0.08  5.10   0.00   0.41 0.41 1.57 NA 0.42   100     0.70
LEN     0.11   0.10  1.02   0.31   0.11 0.11 1.49 NA 0.36    82     0.85
ALE     0.07   0.11  0.64   0.53   0.07 0.07 1.56 NA 0.35    82     0.90
LAW     0.08   0.19  0.43   0.67   0.08 0.08 1.97 NA 0.26    25       NA
GBR     0.18   0.10  1.81   0.07   0.18 0.18 1.77 NA 0.18    92     1.14
GAL     0.15   0.10  1.55   0.12   0.15 0.15 1.28 NA 0.39    92     0.72
NCL     0.41   0.09  4.49   0.00   0.39 0.39 0.68 NA 0.41    85     0.49
###################

	stat.decade[,1:7]<-round(stat.decade[,1:7],2)
	stat.decade<-cbind( name0[2:18],stat.decade,table4[,7:9])
	write.table(stat.decade,file="c:/climate/data/jones98/jones98.stat.decade.txt",sep="\t")

#########################
    coef.OLS se.OLS t.OLS Pr.OLS cor.fm   cor   dw X8 cor.decadal decades ad.decad.ratio
NFS     0.68   0.22  3.06   0.01   0.68  0.68 1.80 NA        0.80      10           0.72
NUR    -0.20   0.26 -0.75   0.47   0.24 -0.24 1.75 NA        0.92      10           1.05
JAS     0.24   0.25  0.96   0.36   0.30  0.30 0.92 NA        0.45      10           0.92
SVA     0.74   0.21  3.50   0.01   0.74  0.74 1.53 NA        0.38       9             NA
ENG     0.86   0.15  5.60   0.00   0.86  0.86 2.52 NA        0.80      10           1.70
EUR     0.86   0.15  5.94   0.00   0.88  0.88 0.93 NA        0.83      10           1.26
SGR    -0.23   0.22 -1.06   0.31   0.32 -0.32 1.61 NA       -0.28      10           0.65
NTR     0.46   0.19  2.36   0.08   0.76  0.76 1.22 NA        0.87       8           1.49
WUS     0.60   0.24  2.47   0.03   0.60  0.60 1.90 NA        0.79      10           0.41
CRT     0.41   0.26  1.58   0.15   0.47  0.47 1.93 NA        0.49      10           0.62
TAS     0.73   0.20  3.59   0.00   0.73  0.73 1.82 NA        0.58      10           0.67
LEN    -0.05   0.31 -0.16   0.88   0.05 -0.05 2.32 NA        0.55       8           0.91
ALE     0.07   0.30  0.23   0.82   0.07  0.07 2.66 NA        0.16       8           1.11
LAW     0.37   0.24  1.53   0.27   0.73  0.73 2.34 NA        0.98       3             NA
GBR     0.57   0.25  2.28   0.04   0.57  0.57 1.34 NA        0.52      10           0.88
GAL    -0.05   0.30 -0.16   0.88   0.05 -0.05 0.79 NA        0.16      10           1.08
NCL     0.62   0.24  2.61   0.02   0.62  0.62 0.74 NA        0.48       9           0.48
###########################33