#data at http://www-pcmdi.llnl.gov/projects/msu/index.php
 #variable names: https://esg.llnl.gov:8443/about/ipccTables.do
ta 	 Temperature 	 air_temperature
tas 	Surface Air Temperature 	air_temperature 
tos 	 Sea Surface Temperature 	 sea_surface_temperature 
 [1] "FOIA/Tam2/"                                                                                                                       
  [2] "  2. Monthly-mean data are input and output"                                                                                      
  [3] "  3. No masking. No grid transformation"                                                                                          
  [4] "  4. Dataset type: CCCMA3.1       "                                                                                               
  [5] "  5. Dataset processed: 20c3m_run1               "                                                                                
  [6] "  6. Reference period for climatological monthly means: 1979-1999"                                                                
  [7] "  7. Years required for computing climatological monthly mean:  11  Threshold:   0.5"                                             
  [8] "  8. Fractional coverage required in order to compute valid spatial mean:   0.2"                                                  
  [9] "  9. Period for computing time series statistics 1979-1999"                                                                       
 [10] " 10. Total number of months processed: 1812"                                                                                      
 [11] " 11. Autocorrelation statistics computed from regression residuals"                                                               
 [12] " 12. No filtering of output data"                                                                                                 
 [13] " 13. Dimensions of input dataset (xyt):      96     48   1812"                                                                    
 [14] " 14. Data: /pcmdi/bsanter1/Model/CCCMA3.1/20c3m_run1/Xy/tam2_CCCMA3.1_20c3m_run1_mm_xy_wf_r0000_0000.nc                       "   
 [15] " 15. MSTIME: Time in months since 1800. Units: Deg. C      "                                                                      
 [16] ""                                                                                                                                 
 [17] " INDEX  1: Globe"                                                                                                                 
 [18] " INDEX  2: Northern Hemisphere"                                                                                                   
 [19] " INDEX  3: Southern Hemisphere"                                                                                                   
 [20] " INDEX  4: NH high latitudes (60N-90N)"                                                                                           
 [21] " INDEX  5: NH mid latitudes  (30N-60N)"                                                                                           
 [22] " INDEX  6: NH low latitudes  (0-30N)"                                                                                             
 [23] " INDEX  7: SH high latitudes (60S-90S)"                                                                                           
 [24] " INDEX  8: SH mid latitudes  (30S-60S)"                                                                                           
 [25] " INDEX  9: SH low latitudes  (0-30S)"                                                                                             
 [26] " INDEX 10: Tropics (20N-20S)"                                                                                                     
 [27] " "                                                                                                                                
 [28] "   NO  RTIME     MS1800      GLOBAL      NH        SH       NHHL      NHML      NHLL      SHHL      SHML      SHLL      TROP     "
 [29] "    1 1850.0000  600.0000   -1.2682   -1.3701   -1.1664   -1.5750   -1.2472   -1.4051   -0.9470   -1.0589   -1.3039   -1.3221"    
 [30] "    2 1850.0834  601.0000   -1.0709   -1.0406   -1.1013   -0.8878   -0.7586   -1.2879   -0.6399   -0.9667   -1.3236   -1.3190"    
 [31] "    3 1850.1666  602.0000   -1.1477   -1.2151   -1.0804   -1.1519   -1.1319   -1.2931   -0.3817   -0.9269   -1.3799   -1.4042"    
 [32] "    4 1850.2500  603.0000   -1.0825   -1.0271   -1.1378    0.0087   -0.7936   -1.4757   -0.5385   -0.9249   -1.4542   -1.5296"    
 [33] "    5 1850.3334  604.0000   -1.1736   -1.1790   -1.1682   -0.5957   -0.8946   -1.5436   -0.3000   -0.9368   -1.5702   -1.6197"    
 
 
  ##DOWNLOAD DATA: both T2 and T2LT
  	tam=list(NA,2);names(tam)=c("T2","T2LT")
   	url="http://www-pcmdi.llnl.gov/projects/msu"
  	name0=c("tam2.tar.gz","tam6.tar.gz")
	for (k in 1:2) {
	 download.file(file.path(url,name0[k]),"temp.gz",mode="wb")
	 handle=gzfile("temp.gz")
	 fred=readLines(handle)
	 close(handle)
	 N=length(fred)
	 index2= grep("RTIME",fred) #length 49
	 index1= c(index2[2:length(index2)]-28,N)
	 index=data.frame(index2+1,index1);names(index)=c("start","end")
	 writeLines(fred,"temp.dat")
	 writeLines(fred[28],"name1.dat")
	 name1=scan("name1.dat",what="")	
	 id=fred[grep("Data:",fred)];id=substr(id,34,nchar(id));id=gsub(" +$","",id)

	 tam[[k]]=list()
	 for(i in 1:49) {
	   tam[[k]][[i]]=read.table("temp.dat",skip=index$start[i]-1,nrow=index$end[i]-index$start[i]+1)
	   names(tam[[k]][[i]])=name1
	   tam[[k]][[i]]=ts(tam[[k]][[i]],start=c(round(tam[[k]][[i]][1,2]),1),freq=12)
	 }
	names(tam[[k]])=id
	}# k
	sapply(tam,length)
	save(tam,file="d:/climate/data/models/santer/tam.tab")
	
##COLLATE INFO
	fred=strsplit(names(tam[[1]]),"/")
	sapply(fred,length) #all 4
	info.pcmdi=sapply(fred,function(A) A[[1]])
	info.pcmdi=data.frame(info.pcmdi)
	names(info.pcmdi)="model"

	test=sapply(fred,function(A) A[[2]])
	test=gsub("-","_",test)
	test=strsplit(test,"_")
	n=sapply(test,length)
	info.pcmdi$santerid=sapply(test, function(A) A[[length(A)]]) 
	   #run id

	x=gsub("b30.030","run",info.pcmdi$santerid)
	x=gsub("B06.","run",x)
	x[46:49]=paste("run",1:4,sep="")
	x[2:6]=paste("run",c(1,2,3,4,5),sep="")
	x=gsub("h","run",x)
	x=as.numeric(substr(x,4,4))
	   #this links Santer series to KNMI surface
	info.pcmdi$run=x
	
	info.pcmdi$id=paste(info.pcmdi$model,info.pcmdi$run,sep="_")
	
     #these runs were semi-manually matched to KNMI surface data seeking gt than 0.95 correlations among models
     # the early part of CNRM3.0 (before 1965 was screwed up) but after deletion of early portion, matched
     # all but two runs matched a KNMI 20CEN version: CCSM 3.0.03b, CCSM b30.030d  
     #  CCSM 3.0.03b matched the corresponding portion of KNMI a1b run 2
	
 	goodruns=c(1:4,6,7,8:49)
             #this removes CCSM b30.030d from the inventory
        write.table(info.pcmdi,file="d:/climate/data/models/santer/info.pcmdi.csv",sep="\t",row.names=FALSE)
	info.pcmdi=read.csv("d:/climate/data/models/santer/info.pcmdi.csv",sep="\t",header=TRUE)
	goodruns=c(1:4,6,7,8:49)
    
##COLLATE T2 and T2LT Time Series 	
	load("d:/climate/data/models/santer/tam.tab")	
	info.pcmdi=read.csv("d:/climate/data/models/santer/info.pcmdi.csv",sep="\t",header=TRUE)

	test=sapply(tam[["T2"]],function(A) A[,"TROP"])
	T2=NULL
	for(i in 1:length(test)) T2=ts.union(T2,test[[i]])
	temp= T2< -900
	T2[temp]=NA
	dim(T2)
	  #[1] 2412   49
	tsp(T2)
	#T2=T2[,goodruns]
	dimnames(T2)[[2]]=info.pcmdi$id
	T2=window(T2,end=1999.99)

	test=sapply(tam[["T2LT"]],function(A) A[,"TROP"])
	T2LT=NULL
	for(i in 1:length(test)) T2LT=ts.union(T2LT,test[[i]])
	#dimnames(T2LT)[[2]]=id
	temp= T2LT< -900
	T2LT[temp]=NA
	dim(T2LT)
	  #[1] 2412   49
	dimnames(T2LT)[[2]]=info.pcmdi$id
	T2LT=window(T2LT,end=1999.99)

	
 ##READ INFO MODEL WHERE KNMI ALIAS IDENTIFIED MANUALLY
	ensemble.bak=ensemble.trp
	load("d:/climate/data/models/knmi/ensemble.trp.tab")
	sapply(ensemble.trp,dim)
	bak1=ensemble.trp[["CCSM3.0"]][,2]
	ensemble.trp[["CCSM3.0"]]=ensemble.trp[["CCSM3.0"]][,c(1,3:7)]
	save(ensemble.trp,file="d:/climate/data/models/knmi/ensemble.trp.tab")

	load("d:/climate/data/models/knmi/ensemble.trp.tab")
	n=tapply(!is.na(info.pcmdi$model[goodruns]),info.pcmdi$model[goodruns],sum)
	n[2]=5 #pick up extra CCSM

	T0=NULL;
	for(i in 1:19) T0=ts.union(T0, ensemble.trp[[paste(info.model$alias[i]) ]][,1:n[i] ] );
	T0=window(T0,start=1850,end=1999.99)
	dim(T0) #1800 49
	dimnames(T0)[[2]]=info.pcmdi$id 
 	dimnames(T0)[[2]][1:7]
   #[1] "CCCMA3.1_1" "CCSM3.0_1"  "CCSM3.0_2"  "CCSM3.0_3"  NA           "CCSM3.0_4"  "CNRM3.0_1" 

    #insert A1B run 2 in 2nd CCSM place, retain 20CEN 1,3,5
	load("d:/climate/data/models/knmi/ensemble.a1b.tab")

	X=window(T2LT[,2:6],start=1900,end=1999.99)
	i=2;alias=info.model$alias[2]
	Y=window(ensemble.trp[[paste(alias)]],start=1850,end=1999.99)
	cor(X,window(Y,start=1900))
#                  1        3        4        5        6        7
#CCSM3.0_1 0.968750 0.621506 0.660972 0.671652 0.614726 0.567725
#CCSM3.0_2 0.656334 0.636118 0.636382 0.658588 0.634927 0.665585
#CCSM3.0_3 0.606598 0.970822 0.632535 0.656004 0.627961 0.603712
#<NA>      0.655336 0.636064 0.682988 0.616731 0.614533 0.655226
#CCSM3.0_4 0.649414 0.639160 0.969340 0.623379 0.683175 0.640576

	Z=window(ensemble.a1b[[paste(alias)]],start=1850,end=1999.99)
	cor(X,window(Z,start=1900))
#
#                   1        2        3        4        5        6        7
#CCSM3.0_1 0.968871 0.652726 0.620704 0.658121 0.671177 0.609564 0.566692
#CCSM3.0_2 0.656777 0.970786 0.637115 0.637148 0.659230 0.635969 0.667121
#CCSM3.0_3 0.606573 0.630486 0.969996 0.630778 0.655615 0.624801 0.603691
#<NA>      0.656807 0.702604 0.637400 0.684162 0.618716 0.615527 0.656876
#CCSM3.0_4 0.649765 0.643373 0.639674 0.967482 0.623854 0.681927 0.641297

	dummy=ts.union(Y[,1],Z[,2],Y[,2:3])
	cor(X[,c(1:3,5)],window(dummy,start=1900))
           Y[, 1]   Z[, 2]   Y[, 2]   Y[, 3]
CCSM3.0_1 0.968750 0.652726 0.621506 0.660972
CCSM3.0_2 0.656334 0.970786 0.636118 0.636382
CCSM3.0_3 0.606598 0.630486 0.970822 0.632535
<NA>      0.655336 0.702604 0.636064 0.682988
CCSM3.0_4 0.649414 0.643373 0.639160 0.969340

	T0=ts.union(T0[,1],dummy,T0[,7:49])
	dim(T0) # 1800 48

	T2LT=T2LT[,goodruns]
	T2=T2[,goodruns]
	info.pcmdi=info.pcmdi[goodruns,]

    #ensert HadGEM run 2 
	alias=info.model$alias[12]
	dummy=window(ensemble.trp[[paste(alias)]],start=1850,end=1999.99)
	  #natch to 2
	T0=ts.union(T0[,1:29],dummy[,2],T0[,31:48])
	dim(T0) # 1800 48

   #NA out bad T2 andT2LT
	T2[time(T2)<1965,"CNRM3.0_1"]=NA
	T2LT[time(T2LT)<1965,"CNRM3.0_1"]=NA

 ##DOUBLE CHECK MATCH
 	info.pcmdi=info.pcmdi[goodruns,]
	row.names(info.pcmdi)=1:48
	dimnames(T0)[[2]]=info.pcmdi$id

	use0="pairwise.complete.obs"
	stat=rep(NA,48)
	for( i in 1:48) stat[i]=cor(T0[,i],T2LT[,i],use=use0)
	names(stat)=info.pcmdi$id
	range(stat)
	   #0.902213 0.984653

	   #problem were CCSM30.0, HadGEM1 (model 2) and CNRM
	
	SanterClean=list(T2=T2,T2LT=T2LT,T0=T0)
	save(SanterClean,file="d:/climate/data/models/santer/SanterClean.tab")


	  #uploaded to CA 
ing groups used very similar changes in well mixed greenhouse gases, the changes in other forcings were not prescribed as part of the experimental design. In practice, each group used different combinations of 20CEN forcings and often used different datasets for specifying individual forcings. End dates for the experiment varied between groups and ranged from 1999 to 2003.