diff --git a/R/03a_QM_DOY_utils.R b/R/03a_QM_DOY_utils.R index 70cd7622755ae2429b0c33e4053d1415d0be3c3b..cfa9f23654743caedd3ca942c330515efbf2b6af 100755 --- a/R/03a_QM_DOY_utils.R +++ b/R/03a_QM_DOY_utils.R @@ -6,10 +6,10 @@ qm.cdf.doy <- function(series, yearbegin, yearend, doywindow, minq, maxq, incq, var, pr.wet){ d <- qm.doywindowgen(doywindow) - cdf_doy <- array(NA,dim=c(365,length(seq(minq,maxq,by=incq)))) - doy.series <- qm.doystring(yearbegin,yearend) - fwet_doy <- rep(NA,365) - cdfwet_doy <- array(NA,dim=c(365,101)) + cdf_doy <- array(NA, dim=c(365, length(seq(minq, maxq, by=incq)))) + doy.series <- qm.doystring(yearbegin, yearend) + fwet_doy <- rep(NA, 365) + cdfwet_doy <- array(NA, dim=c(365, 101)) # *** set 29th February to 1st March *** doy.series[doy.series > 500] <- 60 @@ -18,19 +18,21 @@ qm.cdf.doy <- function(series, yearbegin, yearend, doywindow, minq, maxq, incq, if(length(series) != length(doy.series)){ print("ERROR: data series and doy series not equally long") cdf_doy <- NULL - }else{ + } else { + # iterate over the days of the year for(i in 1:365){ - # *** match: returns vector of length(doy.series): Position of matches in d$doy.matrix[i,] or NA if no match - # *** => all positions in doy.series that correspond to the window defined in d$doy.matrix[i,] are marked - cdf_doy[i,] <- quantile(series[which(is.na(match(doy.series,d$doy.matrix[i,])) == FALSE)], probs=seq(minq,maxq,by=incq),na.rm=TRUE) + # all values for current DOY within moving window + series.doywin <- series[which(is.na(match(doy.series, d$doy.matrix[i,])) + == FALSE)] - # *** possible (shorter) alternative *** - # cdf_doy[i,] <- quantile(series[which(!is.na(match(doy.series,d$doy.matrix[i,])))],probs=c(seq(minq,maxq,by=incq)),na.rm=TRUE) + # empirical quantile function for current DOY + cdf_doy[i,] <- quantile(series.doywin, probs=seq(minq, maxq, by=incq), + na.rm=TRUE) - # *** for precipitation additionally compute wet day frequency and wet day distribution for each DOY *** + + # for precipitation additionally compute wet day frequency and wet day + # distribution for each DOY if (var == 'pr') { - # *** all values for current DOY including window *** - series.doywin <- series[which(is.na(match(doy.series,d$doy.matrix[i,])) == FALSE)] # *** total number of days w/o NA *** ndays <- length(series.doywin[!is.na(series.doywin)]) # *** number of wet days w/o NA *** diff --git a/R/03b_QM_DOY.R b/R/03b_QM_DOY.R index 8f63e1d6e425ee53fc82e8790415357dc109a55a..8e81db517be6aa4c3f38b7f244b6e17dba38d384 100755 --- a/R/03b_QM_DOY.R +++ b/R/03b_QM_DOY.R @@ -1,86 +1,97 @@ +# imports library(raster) library(rasterVis) library(rgdal) library(ncdf4) -library(ncdf.tools) library(ncdf4.helpers) library(data.table) library(magrittr) -#library(doParallel) -#library(foreach) library(PCICt) -# the code performes the QM correction on daily data by using a 91-day moving window -# the correction is performed for quantiles from 0.01 to 0.99 in order to better account for extremes -# a frequency adjustment is also performed for precipitation only -# NOTE: quite slow - optimization and parallelization is needed +# load local modules +source('./config.R') +source('./02_load_CORDEX_function.R') +source("./03a_QM_DOY_utils.R") -#registerDoParallel(parallel::detectCores() - 1) +# TODO: parallelization of different models +# registerDoParallel(parallel::detectCores() - 1) -path_in_mod<-"/../CORDEX/" #already remapped and cropped files -variable<-"tasmin" -inventory<-readRDS(paste0(path_in_mod,"/",variable,"_inventory.RDS")) #or a list for the selected models -CORDEX.models<-unique(paste0(inventory$gcm,"+",inventory$institute_rcm)) # total number of used models -hist_years <- 1950:2005 # years in historical period -proj_years <- 2006:2100 # years in projections -cal_years <- 1981:2010 # years for the calibration -cor_years <- 1950:2100 # years to correct -wday.width<-91 # window centred on the DOY to correct -minq<-0.01; maxq<-0.99; incq<-0.01 # definition of quantiles min, max and step -FA <- TRUE # frequency adjustment for precipitation? -source("/../03a_QM_DOY_utils.R") # load the functions required for QM -# step 1 -# load daily observations for the variable under evaluation and for the calibration period -# observations are split by month and stored in separate folders for each year +# ------------------------------------------------------------------------------ +# Step 1: load daily observations for the calibration period ------------------- +# ------------------------------------------------------------------------------ -path_in_obs<-"/../OBS_DATA/" -list<-list.files(paste0(path_in_obs,variable,"/",cal.years[1]),pattern=".nc",full.names = T) -obs.stack<-stack(lapply(list,stack)) -for(i in cal.years[-1]){ - list<-list.files(paste0(path_in_obs,variable,"/",i),pattern=".nc",full.names = T) - suppressWarnings(tmp<-stack(lapply(list,stack))) - obs.stack<-stack(obs.stack,tmp) +# search observations in defined path: observations are split by month and +# stored in separate folders for each year +obs <- list.files(paste0(obs_path, '/', variable, '/', cal_years[1]), + pattern=".nc", full.names = T) + +# create raster stack +obs.stack <- stack(lapply(obs, stack)) + +# read all years of the calibration period +for(i in cal_years[-1]){ + list <- list.files(paste0(obs_path, '/', variable, "/", i), + pattern=".nc", full.names = T) + tmp <- stack(lapply(list, stack)) + obs.stack <- stack(obs.stack, tmp) } -idx.TSTcell<-which(!is.na(getValues(obs.stack[[1]]))) # indices of cells which will be processed -scenario<-"rcp85" -# convert stack into matrix with only points of interest -# row=time col=points -obs.dat<-as.data.frame(obs.stack) -obs.dat<-obs.dat[idx.TSTcell,]; obs.dat<-t(data.matrix(obs.dat)) +# indices of cells which will be processed +idx.TSTcell <- which(!is.na(getValues(obs.stack[[1]]))) + +# convert stack into matrix with only points of interest: (time, points) +obs.dat <- as.data.frame(obs.stack) +obs.dat <- obs.dat[idx.TSTcell,] +obs.dat<-t(data.matrix(obs.dat)) + +# ------------------------------------------------------------------------------ +# Step 2: define daily time periods -------------------------------------------- +# ------------------------------------------------------------------------------ + +# calibration period +dates.ref <- seq(as.Date(paste0(min(cal_years), "/01/01")), + as.Date(paste0(max(cal_years), "/12/31")), + "day") -# step 2 -# load CORDEX for the specific simulation and run the QM correction -dates.ref<-seq(as.Date(paste0(min(cal_years),"/01/01")),as.Date(paste0(max(cal_years),"/12/31")),"day") -dates.hist.full<-seq(as.Date(paste0(min(hist_years),"/01/01")),as.Date(paste0(max(hist_years),"/12/31"),"day")) # CORDEX historical period -dates.proj.full<-seq(as.Date(paste0(min(proj_years),"/01/01")),as.Date(paste0(max(proj_years),"/01/01")),"day") # CORDEX scenario period -dates.full<-c(dates.hist.full,dates.proj.full) # full analysed period +# historical period +dates.hist.full<-seq(as.Date(paste0(min(hist_years), "/01/01")), + as.Date(paste0(max(hist_years), "/12/31")), + "day") -# loop over each gcm+rcm simulation --> this loop should be parallelized +# projected period +dates.proj.full<-seq(as.Date(paste0(min(proj_years), "/01/01")), + as.Date(paste0(max(proj_years), "/01/01")), + "day") + +# full period +dates.full<- c(dates.hist.full, dates.proj.full) + +# loop over each gcm+rcm simulation for(i.mod in seq_along(CORDEX.models)){ - i.gcm<-strsplit(CORDEX.models[i.mod],"+",fixed=T)[1] - i.rcm<-strsplit(CORDEX.models[i.mod],"+",fixed=T)[2] + i.gcm<-strsplit(CORDEX.models[i.mod], "+", fixed=T)[1] + i.rcm<-strsplit(CORDEX.models[i.mod], "+", fixed=T)[2] - #load full data series for the gcm+rcm simulation + # load full data series for the gcm + rcm simulation dat.mod<-load_CORDEX(gcm=i.gcm,rcm=i.rcm, - path_in_mod=path_in_mod, variable=variable, + path_in_mod=mod_path, variable=variable, cells=idx.TSTcell, dates.historical=dates.hist.full, dates.proections=dates.proj.full) # matrix for storing QM corrected simulations # row=time, col=points - qm.mat<-matrix(ncol=length(idx.TSTcell),nrow=length(dates.full)) + qm.mat<-matrix(ncol=length(idx.TSTcell), nrow=length(dates.full)) for(i.cell in 1:length(idx.TSTcell)){ # load the series for the test point - series.mod.cal<-dat.mod[which(year(dates.full)%in%cal_years),i.cell] - series.obs.cal<-as.numeric(obs.dat.conv[which(year(dates.ref)%in%cal_years),i.cell]) - series.mod.all<-dat.mod[which(year(dates.full)%in%cor_years),i.cell] + series.mod.cal <- dat.mod[which(year(dates.full) %in% cal_years), i.cell] + series.obs.cal <- as.numeric(obs.dat.conv[which(year(dates.ref) + %in% cal_years), i.cell]) + series.mod.all <- dat.mod[which(year(dates.full) %in% cor_years), i.cell] - # set to zero pr correspomding to dry days - if ((variiable=='pr') & FA) { + # set to zero pr corresponding to dry days + if ((variable=='pr') & FA) { series.mod.cal[series.mod.cal < pr.wet] <- 0 series.obs.cal[series.obs.cal < pr.wet] <- 0 series.mod.all[series.mod.all < pr.wet] <- 0 @@ -91,26 +102,34 @@ for(i.mod in seq_along(CORDEX.models)){ # 29th Feb is then handled as 1st Mar doy.full <- qm.doystring(min(cor_years), max(cor_years)) - # the CDF for each DOY for both model and observation data is computed - # the CDF for wet days and the wet day frequency for each DOY are also reported. They will be used for the frequency adjustment - c_mod <- qm.cdf.doy(series.mod.cal, min(cal_years), max(cal_years), wday.width, minq, maxq, incq, variable, pr.wet) - c_obs <- qm.cdf.doy(series.obs.cal, min(cal_years), max(cal_years), wday.width, minq, maxq, incq, variable, pr.wet) - + # the quantile function for each DOY for both model and observation data + c_mod <- qm.cdf.doy(series.mod.cal, min(cal_years), max(cal_years), + wday.width, minq, maxq, incq, variable, pr.wet) + c_obs <- qm.cdf.doy(series.obs.cal, min(cal_years), max(cal_years), + wday.width, minq, maxq, incq, variable, pr.wet) # here the correction for each quantile and DOY (additive) is computed - if((length(c_mod$cdf.matrix[1,]) != length(c_obs$cdf.matrix[1,])) & (length(c_mod$cdf.matrix[,1]) != length(c_obs$cdf.matrix[,1]))){ + if((length(c_mod$cdf.matrix[1,]) != length(c_obs$cdf.matrix[1,])) & + (length(c_mod$cdf.matrix[,1]) != length(c_obs$cdf.matrix[,1]))){ print("ERROR: doy-based CDF of model and observations do not have the same dimension") - }else{ - dif.cdf <- c_mod$cdf.matrix-c_obs$cdf.matrix + } else { + # correction factor for each DOY and quantile + dif.cdf <- c_mod$cdf.matrix - c_obs$cdf.matrix } if(variable == "pr"){ - # apply the quantile correction by adding an adjustment of precipitation frequency in model data - cc <- qm.empqm.doy.fa(series.mod.all, doy.full, c_mod$cdf.matrix, c_obs$cdf.matrix, c_obs$cdf.wet, - dif.cdf, c_obs$f.wet, c_mod$f.wet, pr.wet, method=qm.method, variable, minq, maxq, incq) + # apply the quantile correction by adding an adjustment of precipitation + # frequency in model data + cc <- qm.empqm.doy.fa(series.mod.all, doy.full, c_mod$cdf.matrix, + c_obs$cdf.matrix, c_obs$cdf.wet, dif.cdf, + c_obs$f.wet, c_mod$f.wet, pr.wet, + method=qm.method, variable, minq, maxq, incq) + # check if negative corrected precipitation occur and set to 0 cc$qm.corrected.series[cc$qm.corrected.series < pr.wet] <- 0 + } else { + # quantile mapping cc <- qm.empqm.doy(series.mod.all.in, doy.full, c_mod$cdf.matrix, dif.cdf, method=qm.method, variable, minq, maxq, incq) }