##SCRAPING KNMI DATA ### ### knmi.info - information on KNMI moels scraped from webpage in early 2009 ### ### download_html - utility function to read webpages ### ### read.knmi.models - function to read KNMI model data. Requires ### field - "tas" is the field name for temperature. Only one tested so far ### model - use alias nomenclature from knmi.info. ### scenario- use nomenclature from knmi.info. "20c3m" and "sresa1b" are examples. Radio "SCenario Runs" for nomenclature. ### landtype - default is all. Can be "land" or "sea" ### lat - south to north. default c(-90,90) ### long - 0 to 360. default c(0,360) ### version - default is anomaly. "centigrade" gets Centigrade ### Info - requires knmi.info ### ### get.knmi - function to read observation data. Just beginning on this. knmi.info=read.csv("http://www.climateaudit.info/data/models/knmi/knmi.info.csv",sep="\t",header=TRUE) download_html =function(url) { download.file(url, "temp.html"); html_handle <- file("temp.html", "rt"); html_data <- readLines(html_handle); close(html_handle); unlink("temp.html"); return(html_data); } read.knmi.models=function(field, model,scenario,landtype="",lat=c(-90,90), long=c(0,360), suffix= "&standardunits=true",email=Email,version="anomaly", method="default",Info=knmi.info) { #construct cgi call prefix=paste("http://climexp.knmi.nl/get_index.cgi?email=",email,sep="") suffix=paste(suffix,"&lat1=",lat[1],"&lat2=",lat[2],"&lon1=",long[1],"&lon2=",long[2],sep="") nruns= Info[Info$scenario==scenario&Info$alias==model,"runs"]; #2 url= paste(prefix,"&field=",field,"_",model,"_",scenario,"&",suffix,sep="") if( !(landtype=="")) url=paste(url,"&masktype=",landtype,sep="");url #ping KNMI to retrieve data using above call my_info=download_html(url) #my_info[grep("raw data",my_info)] #option for anomaly data #paste the location of the raw data file (anomaly) if (version=="anomaly") { rawdata_call= my_info[grep("raw data",my_info)][3]; #y n=nchar(rawdata_call) #suffix2=paste(long[1],"-",long[2],"E_",lat[1],"-",lat[2],"N",sep="") if(nruns==1) nruncall="a" else nruncall="__++a" loc=paste("http://climexp.knmi.nl/",substr(rawdata_call,10,n-16) ,sep="") ; #loc Sys.sleep(2) #retrieve the data test=readLines(loc);test[1:5] #extract the model runs in time series form from the data: ncol=1 and ncol>1 index=grep("ensemble",test) # 1 1817 3633 5449 7265 K=length(index); if(K>0) { index=c(index,length(test)+1) writeLines(substr(test,1,24),"temp.dat") test=NULL for (i in 1:K) { working=read.table("temp.dat",skip=index[i]+1,nrow= (index[i+1]-2)-(index[i]+1) ) ; test=ts.union(test,ts(working[,2],start=c(floor(working[1,1]),round( 12* (working[1,1]%%1),0)+1),freq=12)) } dimnames(test)[[2]]=1:K read.knmi.models=test} else { writeLines(substr(test,1,24),"temp.dat") working=read.table("temp.dat",nrow= length(test)-2) ; read.knmi.models= ts(as.matrix(working[,2]),start=c(floor(working[1,1]),round( 12* (working[1,1]%%1),0)+1),freq=12) } if(method=="verbose") read.knmi.models=list(data=read.knmi.models,info=list(field=field,model=as.character(model),scenario=scenario,region=region,loc=loc) ) } #end anomaly version if (version=="raw") { rawcall=my_info[grep("filteryearseries",my_info)+7] n=nchar(rawcall) loc=file.path("http://climexp.knmi.nl/data", substr(rawcall,44,n-2) ) test=readLines(loc);#test #[1] "# data/itas_giss_model_e_r_20c3m_0-360E_-20-20N___00.dat" #"# data/itas_giss_model_e_r_20c3m_0-360E_-20-20N___01.dat" K=length(test) if(K<99) {test=substr(test,3,nchar(test)) working=NULL for (i in 1:K) { loc=file.path( "http://climexp.knmi.nl",test[i]) x= scan(loc,skip=5); x=t( array(x,dim=c(13,length(x)/13) ) ); x= ts( c(t(x[,2:13])),start=x[1,1],freq=12) working=ts.union(working,x ) } dimnames(working)[[2]]=paste(model,1:K,sep="_") } else { x= scan(loc,skip=5); x= t( array(x,dim=c(13,length(x)/13) ) ); working = ts( as.matrix( c(t(x[,2:13]))),start=x[1,1],freq=12) dimnames(working)[[2]]=paste(model,1,sep="_") } read.knmi.models=working } #end non-anomaly version read.knmi.models } anomaly=function(x,reference=c(1961,1990) ) { month= (1:length(x))%%12; month[month==0]=12 temp= time (x)>=reference[1] & time(x) < (reference[2]+1) if( sum(temp)==0) return(NA) anomaly=factor(month) levels(anomaly)=tapply(x[temp],month[temp],mean,na.rm=T) y=x-as.numeric(as.character(anomaly)) return(y) } numbertochar=function(x) {if(x<10) paste ("0",x,sep="") else paste(x) } ##fields: crutem3, hadcru #long=c(-158,-157);lat=c(21,22);field="crutem3_hadsst2" get.knmi=function(field,long=c(0,360),lat=c(-90,90),email=Email) { prefix= paste("http://climexp.knmi.nl/get_index.cgi?email=",email,sep="") fieldfix=paste("field",field,sep="=") suffix="standardunits=true" latfix= paste("lat1=",lat[1],"&lat2=",lat[2],"&lon1=",long[1],"&lon2=",long[2],sep="") # 0 to 360 longitude #logon logon=download_html( paste("http://climexp.knmi.nl/start.cgi?",email,sep="")) #ping data url=paste(prefix,fieldfix,suffix,latfix,sep="&") my_info=download_html(url) rawdata_call=my_info[grep("raw data",my_info)][3] # my_info[grep("raw data",my_info)][3]; #retrieve data #latcall= paste(long[1],"-",long[2],"E_",lat[1],"-",lat[2],"N",sep="") #loc=paste( paste("http://climexp.knmi.nl/data/i",field,sep=""), latcall,"na.txt",sep="_");loc n=nchar(rawdata_call) loc=paste("http://climexp.knmi.nl/",substr(rawdata_call,10,n-16) ,sep="") ; #loc Sys.sleep(2) #retrieve the data fred=readLines(loc) X=read.fwf(loc,widths=c(10,14)) X[grep("repeat",fred),2]=NA time0=range(X[,1],na.rm=T); time1=seq(time0[1],time0[length(time0)],1/12) working= rep(NA,length(time1)) x=ts(working,start=time0[1],freq=12) test=match(round(time(x),2),round(X[,1],2));temp=!is.na(test); sum(temp) x[temp]=X[test[temp],2] get.knmi=x #if(method="verbose") get.knmi=list(data=ts( test[,2],start=test[1,1],freq=12),loc=loc) get.knmi } #long=c(-158,-157);lat=c(21,22);field="crutem3_hadsst2" get.knmi.old=function(field,long=c(0,360),lat=c(-90,90),email=Email) { prefix= paste("http://climexp.knmi.nl/get_index.cgi?email=",email,sep="") fieldfix=paste("field",field,sep="=") suffix="standardunits=true" latfix= paste("lat1=",lat[1],"&lat2=",lat[2],"&lon1=",long[1],"&lon2=",long[2],sep="") # 0 to 360 longitude #logon logon=download_html( paste("http://climexp.knmi.nl/start.cgi?",email,sep="")) #ping data url=paste(prefix,fieldfix,suffix,latfix,sep="&") my_info=download_html(url) rawdata_call=my_info[grep("raw data",my_info)][1] # my_info[grep("raw data",my_info)][3]; rawdata_call=strsplit(rawdata_call,",")[[1]][2] #retrieve data #latcall= paste(long[1],"-",long[2],"E_",lat[1],"-",lat[2],"N",sep="") #loc=paste( paste("http://climexp.knmi.nl/data/i",field,sep=""), latcall,"na.txt",sep="_");loc n=nchar(rawdata_call) loc=paste("http://climexp.knmi.nl/",substr(rawdata_call,11,n-14) ,sep="") ; #loc Sys.sleep(2) #retrieve the data fred=readLines(loc) X=read.fwf(loc,widths=c(5,rep(14,12))) temp= (X< -900);sum(temp) X[temp]=NA target=range(X[,1],na.rm=T) target= cbind(target[1]:target[2],array(NA,dim=c(target[2]-target[1]+1,12) )) test=match(target[,1],X[,1]);temp=!is.na(test); sum(temp) target[temp,2:13]=as.matrix(X)[test[temp],2:13] x= ts( c(t(target[,2:13])),start=target[1,1],freq=12) temp=!is.na(x) get.knmi=round(window(x, end=max(time(x)[temp])),3) #if(method="verbose") get.knmi=list(data=ts( test[,2],start=test[1,1],freq=12),loc=loc) get.knmi }