diff --git a/R/03a_QM_DOY_utils.R b/R/03a_QM_DOY_utils.R new file mode 100755 index 0000000000000000000000000000000000000000..70cd7622755ae2429b0c33e4053d1415d0be3c3b --- /dev/null +++ b/R/03a_QM_DOY_utils.R @@ -0,0 +1,152 @@ +# A list of functions used to compute the quantile mapping on a 91-day moving window, +# using .01 to .99 quantiles for deriving the correction +# applying a frequency adjustment for precipitation +# functions derived from https://github.com/SvenKotlarski/qmCH2018 with small changes + +qm.cdf.doy <- function(series, yearbegin, yearend, doywindow, minq, maxq, incq, var, pr.wet){ + + d <- qm.doywindowgen(doywindow) + cdf_doy <- array(NA,dim=c(365,length(seq(minq,maxq,by=incq)))) + doy.series <- qm.doystring(yearbegin,yearend) + fwet_doy <- rep(NA,365) + cdfwet_doy <- array(NA,dim=c(365,101)) + + # *** set 29th February to 1st March *** + doy.series[doy.series > 500] <- 60 + + # *** check of input data: length OK? *** + if(length(series) != length(doy.series)){ + print("ERROR: data series and doy series not equally long") + cdf_doy <- NULL + }else{ + for(i in 1:365){ + # *** match: returns vector of length(doy.series): Position of matches in d$doy.matrix[i,] or NA if no match + # *** => all positions in doy.series that correspond to the window defined in d$doy.matrix[i,] are marked + cdf_doy[i,] <- quantile(series[which(is.na(match(doy.series,d$doy.matrix[i,])) == FALSE)], probs=seq(minq,maxq,by=incq),na.rm=TRUE) + + # *** possible (shorter) alternative *** + # cdf_doy[i,] <- quantile(series[which(!is.na(match(doy.series,d$doy.matrix[i,])))],probs=c(seq(minq,maxq,by=incq)),na.rm=TRUE) + + # *** for precipitation additionally compute wet day frequency and wet day distribution for each DOY *** + if (var == 'pr') { + # *** all values for current DOY including window *** + series.doywin <- series[which(is.na(match(doy.series,d$doy.matrix[i,])) == FALSE)] + # *** total number of days w/o NA *** + ndays <- length(series.doywin[!is.na(series.doywin)]) + # *** number of wet days w/o NA *** + ndays.wet <- length(which(series.doywin >= pr.wet)) + # *** wet day fraction for current DOY *** + fwet_doy[i] <- ndays.wet / ndays + # *** all values for current DOY including window but excluding dry days *** + series.doywin.wet <- series.doywin[series.doywin >= pr.wet] + # *** wet day CDF for current DOY including window *** + cdfwet_doy[i,] <- quantile(series.doywin.wet,probs=seq(0,1,0.01),na.rm=TRUE) + } + } + } + return(list(cdf.matrix = cdf_doy, f.wet = fwet_doy, cdf.wet = cdfwet_doy)) +} + + +qm.empqm.doy.fa <- function(series, doy.series, cdf.mod, cdf.obs, cdf.obs.wet, cor.fun, fwet.obs, fwet.mod, pr.wet, method=c("linear","bin"), var, minq, maxq, incq){ + + if(length(series) != length(doy.series)){ # *** a little check + print(paste("ERROR: length of time series (",length(series),") and DOY series (",length(doy.series),") do not match.", sep="")) + }else{ + series_q <- rep(NA,length(series)) + series_c <- rep(NA,length(series)) + for(i in 1:length(series)){ + doy <- doy.series[i] + if(doy == 999){ doy <- 60 } + if(is.na(series[i]) == "TRUE"){ + series_q[i] <- NA + series_c[i] <- NA + }else{ + # *** OPTION A: if dry day in model and if modelled wet day frequency <= observed wet day frequency apply frenquency adjustment *** + if ((series[i]==0) & (fwet.mod[doy] <= fwet.obs[doy])) { + # *** generate random number *** + rnum <- runif(1) + # *** determine fraction of dry days in model that needs to become wet in order to represent observed wet day frequency *** + probwet <- 1 - ((1-fwet.obs[doy])/(1-fwet.mod[doy])) + # *** if rnum <= probwet: fill with a precipitation day; else assign original value, i.e. zero precip *** + if (rnum <= probwet) { + # *** randomly draw from observed wet day distribution and assign that value *** + fun <- approxfun(seq(0,1,0.01),cdf.obs.wet[doy,]) + random.number <- runif(1) + series_c[i] <- fun(random.number) + }else{ + series_c[i] <- series[i] + } + # *** in any case, assign NA to series of modelled quantile numbers: means FA has been carried out! *** + series_q[i] <- NA + }else{ + # *** OPTION B: else, don't apply frenquency adjustment; apply regular correction function *** + # *** using quantile-"bins" *** + if(match.arg(method) == "bin"){ + # *** Note: this command implicitly selects the first (last) quantile considered for new extremes , i.e. *** + # *** for smaller (larger) values than those occuring in the calbration period (in case of zeros: first *** + # *** occurence). If minq=0.01 and maxq=0.99 nex extremes are hence corrected according to the correction *** + # *** function for the 1st and 99th percentile. *** + ix <- which.min(abs(cdf.mod[doy,]-series[i])) + # *** Compute quantile index *** + series_q[i] <- minq + (ix-1)*incq + # *** Compute corrected value *** + series_c[i] <- series[i]-cor.fun[doy,ix] + } + + # *** using linear interpolation between quantiles *** + if(match.arg(method) == "linear"){ + # *** number of interpolated sampling points *** + npoints <- 1000 + cor <- approx(cdf.mod[doy,],n=npoints,method="linear")$y + # *** Note: this command implicitly selects the first (last) quantile considered for new extremes , i.e. *** + # *** for smaller (larger) values than those occuring in the calbration period (in case of zeros: first *** + # *** occurence). If minq=0.01 and maxq=0.99 nex extremes are hence corrected according to the correction *** + # *** function for the 1st and 99th percentile. *** + ix <- which.min(abs(cor-series[i])) + # *** Compute quantile index *** + series_q[i] <- minq + (ix-1) * ((maxq-minq)/(npoints-1)) + # *** Compute correction function *** + cf <- approx(cor.fun[doy,],n=npoints,method="linear")$y + # *** Compute corrected value *** + series_c[i] <- series[i]-cf[ix] + } + } + + } + } + + # *** for huss (specific humidity), sfcWind (wind speed), hurs (relative humidity), rsds (global radiation): set negative values to 0 *** + #if ((var == 'huss') || (var == 'sfcWind') || (var == 'hurs') || (var == 'rsds')) series_c[which(series_c<0)] <- 0 + + return(list(qm.input.series = series, qm.corrected.series = series_c, quantile.index = series_q)) + } +} + + + +qm.doywindowgen <- function(window.width){ + + doys <- c(1:365) + ndays <- length(doys) + half.width <- (window.width/2) - 0.5 + + doy_ix <- array(data=NA,dim=c(ndays,window.width)) + temp.vec <- rep(1:ndays,3) + + for (day in doys) { doy_ix[day,] <- temp.vec[((ndays+day)-half.width):((ndays+day)+half.width)] } + + return(list(half.width = half.width,window.width = window.width, doy.matrix = doy_ix)) +} + +qm.doystring <- function(yearbegin,yearend){ + + doyreg <- c(1:365) + doyleap <- c(1:59,999,60:365) + doyscenario <- integer() + + for (countyear in yearbegin:yearend) { + if (leap_year(countyear)) doyscenario <- c(doyscenario,doyleap) else doyscenario <- c(doyscenario,doyreg) + } + return(doyscenario) +} diff --git a/R/03b_QM_DOY.R b/R/03b_QM_DOY.R new file mode 100755 index 0000000000000000000000000000000000000000..8f63e1d6e425ee53fc82e8790415357dc109a55a --- /dev/null +++ b/R/03b_QM_DOY.R @@ -0,0 +1,121 @@ +library(raster) +library(rasterVis) +library(rgdal) +library(ncdf4) +library(ncdf.tools) +library(ncdf4.helpers) +library(data.table) +library(magrittr) +#library(doParallel) +#library(foreach) +library(PCICt) + +# the code performes the QM correction on daily data by using a 91-day moving window +# the correction is performed for quantiles from 0.01 to 0.99 in order to better account for extremes +# a frequency adjustment is also performed for precipitation only +# NOTE: quite slow - optimization and parallelization is needed + +#registerDoParallel(parallel::detectCores() - 1) + +path_in_mod<-"/../CORDEX/" #already remapped and cropped files +variable<-"tasmin" +inventory<-readRDS(paste0(path_in_mod,"/",variable,"_inventory.RDS")) #or a list for the selected models +CORDEX.models<-unique(paste0(inventory$gcm,"+",inventory$institute_rcm)) # total number of used models +hist_years <- 1950:2005 # years in historical period +proj_years <- 2006:2100 # years in projections +cal_years <- 1981:2010 # years for the calibration +cor_years <- 1950:2100 # years to correct +wday.width<-91 # window centred on the DOY to correct +minq<-0.01; maxq<-0.99; incq<-0.01 # definition of quantiles min, max and step +FA <- TRUE # frequency adjustment for precipitation? +source("/../03a_QM_DOY_utils.R") # load the functions required for QM +# step 1 +# load daily observations for the variable under evaluation and for the calibration period +# observations are split by month and stored in separate folders for each year + +path_in_obs<-"/../OBS_DATA/" +list<-list.files(paste0(path_in_obs,variable,"/",cal.years[1]),pattern=".nc",full.names = T) +obs.stack<-stack(lapply(list,stack)) +for(i in cal.years[-1]){ + list<-list.files(paste0(path_in_obs,variable,"/",i),pattern=".nc",full.names = T) + suppressWarnings(tmp<-stack(lapply(list,stack))) + obs.stack<-stack(obs.stack,tmp) +} + +idx.TSTcell<-which(!is.na(getValues(obs.stack[[1]]))) # indices of cells which will be processed +scenario<-"rcp85" +# convert stack into matrix with only points of interest +# row=time col=points +obs.dat<-as.data.frame(obs.stack) +obs.dat<-obs.dat[idx.TSTcell,]; obs.dat<-t(data.matrix(obs.dat)) + +# step 2 +# load CORDEX for the specific simulation and run the QM correction +dates.ref<-seq(as.Date(paste0(min(cal_years),"/01/01")),as.Date(paste0(max(cal_years),"/12/31")),"day") +dates.hist.full<-seq(as.Date(paste0(min(hist_years),"/01/01")),as.Date(paste0(max(hist_years),"/12/31"),"day")) # CORDEX historical period +dates.proj.full<-seq(as.Date(paste0(min(proj_years),"/01/01")),as.Date(paste0(max(proj_years),"/01/01")),"day") # CORDEX scenario period +dates.full<-c(dates.hist.full,dates.proj.full) # full analysed period + +# loop over each gcm+rcm simulation --> this loop should be parallelized +for(i.mod in seq_along(CORDEX.models)){ + + i.gcm<-strsplit(CORDEX.models[i.mod],"+",fixed=T)[1] + i.rcm<-strsplit(CORDEX.models[i.mod],"+",fixed=T)[2] + + #load full data series for the gcm+rcm simulation + dat.mod<-load_CORDEX(gcm=i.gcm,rcm=i.rcm, + path_in_mod=path_in_mod, variable=variable, + cells=idx.TSTcell, + dates.historical=dates.hist.full, + dates.proections=dates.proj.full) + + # matrix for storing QM corrected simulations + # row=time, col=points + qm.mat<-matrix(ncol=length(idx.TSTcell),nrow=length(dates.full)) + + for(i.cell in 1:length(idx.TSTcell)){ + # load the series for the test point + series.mod.cal<-dat.mod[which(year(dates.full)%in%cal_years),i.cell] + series.obs.cal<-as.numeric(obs.dat.conv[which(year(dates.ref)%in%cal_years),i.cell]) + series.mod.all<-dat.mod[which(year(dates.full)%in%cor_years),i.cell] + + # set to zero pr correspomding to dry days + if ((variiable=='pr') & FA) { + series.mod.cal[series.mod.cal < pr.wet] <- 0 + series.obs.cal[series.obs.cal < pr.wet] <- 0 + series.mod.all[series.mod.all < pr.wet] <- 0 + } + + # extract the DOY index for the full model series + # the index for 29th Feb is set to 999 in all leap years + # 29th Feb is then handled as 1st Mar + doy.full <- qm.doystring(min(cor_years), max(cor_years)) + + # the CDF for each DOY for both model and observation data is computed + # the CDF for wet days and the wet day frequency for each DOY are also reported. They will be used for the frequency adjustment + c_mod <- qm.cdf.doy(series.mod.cal, min(cal_years), max(cal_years), wday.width, minq, maxq, incq, variable, pr.wet) + c_obs <- qm.cdf.doy(series.obs.cal, min(cal_years), max(cal_years), wday.width, minq, maxq, incq, variable, pr.wet) + + + # here the correction for each quantile and DOY (additive) is computed + if((length(c_mod$cdf.matrix[1,]) != length(c_obs$cdf.matrix[1,])) & (length(c_mod$cdf.matrix[,1]) != length(c_obs$cdf.matrix[,1]))){ + print("ERROR: doy-based CDF of model and observations do not have the same dimension") + }else{ + dif.cdf <- c_mod$cdf.matrix-c_obs$cdf.matrix + } + + if(variable == "pr"){ + # apply the quantile correction by adding an adjustment of precipitation frequency in model data + cc <- qm.empqm.doy.fa(series.mod.all, doy.full, c_mod$cdf.matrix, c_obs$cdf.matrix, c_obs$cdf.wet, + dif.cdf, c_obs$f.wet, c_mod$f.wet, pr.wet, method=qm.method, variable, minq, maxq, incq) + # check if negative corrected precipitation occur and set to 0 + cc$qm.corrected.series[cc$qm.corrected.series < pr.wet] <- 0 + } else { + cc <- qm.empqm.doy(series.mod.all.in, doy.full, c_mod$cdf.matrix, dif.cdf, + method=qm.method, variable, minq, maxq, incq) + } + + # store the corrected series in the output matrix + qm.mat[,i.cell]<-cc$qm.corrected.series + } # close loop on grid cell +} # close loop on models \ No newline at end of file