Code refactoring.

64e3e90d · Frisinghelli Daniel · 01ee4d0c · 64e3e90d · 64e3e90d
Commit 64e3e90d authored 3 years ago by Frisinghelli Daniel
--- a/R/03a_QM_DOY_utils.R
+++ b/R/03a_QM_DOY_utils.R
@@ -6,10 +6,10 @@
 qm.cdf.doy <- function(series, yearbegin, yearend, doywindow, minq, maxq, incq, var, pr.wet){
  
  d		<- qm.doywindowgen(doywindow)
-  cdf_doy		<- array(NA,dim=c(365,length(seq(minq,maxq,by=incq))))
-  doy.series	<- qm.doystring(yearbegin,yearend)
-  fwet_doy	<- rep(NA,365)
-  cdfwet_doy	<- array(NA,dim=c(365,101))
+  cdf_doy		<- array(NA, dim=c(365, length(seq(minq, maxq, by=incq))))
+  doy.series	<- qm.doystring(yearbegin, yearend)
+  fwet_doy	<- rep(NA, 365)
+  cdfwet_doy	<- array(NA, dim=c(365, 101))
  
  # *** set 29th February to 1st March ***
  doy.series[doy.series > 500] <- 60
@@ -18,19 +18,21 @@ qm.cdf.doy <- function(series, yearbegin, yearend, doywindow, minq, maxq, incq,
  if(length(series) != length(doy.series)){
    print("ERROR: data series and doy series not equally long")
    cdf_doy <- NULL
-  }else{
+  } else {
+    # iterate over the days of the year
    for(i in 1:365){
-      #			*** match: returns vector of length(doy.series): Position of matches in d$doy.matrix[i,] or NA if no match
-      #			*** => all positions in doy.series that correspond to the window defined in d$doy.matrix[i,] are marked
-      cdf_doy[i,] <- quantile(series[which(is.na(match(doy.series,d$doy.matrix[i,])) == FALSE)], probs=seq(minq,maxq,by=incq),na.rm=TRUE)
+      # all values for current DOY within moving window
+      series.doywin <- series[which(is.na(match(doy.series, d$doy.matrix[i,]))
+                                    == FALSE)]
      
-      #			*** possible (shorter) alternative ***
-      #			cdf_doy[i,] <- quantile(series[which(!is.na(match(doy.series,d$doy.matrix[i,])))],probs=c(seq(minq,maxq,by=incq)),na.rm=TRUE)
+      # empirical quantile function for current DOY      
+      cdf_doy[i,] <- quantile(series.doywin, probs=seq(minq, maxq, by=incq),
+                              na.rm=TRUE)
      
-      #			*** for precipitation additionally compute wet day frequency and wet day distribution for each DOY ***
+    
+      #	for precipitation additionally compute wet day frequency and wet day
+      # distribution for each DOY
      if (var == 'pr') {
-        # *** all values for current DOY including window ***
-        series.doywin <- series[which(is.na(match(doy.series,d$doy.matrix[i,])) == FALSE)]
        # *** total number of days w/o NA ***
        ndays <- length(series.doywin[!is.na(series.doywin)])
        # *** number of wet days w/o NA ***

--- a/R/03b_QM_DOY.R
+++ b/R/03b_QM_DOY.R
+# imports
 library(raster)
 library(rasterVis)
 library(rgdal)
 library(ncdf4)
-library(ncdf.tools)
 library(ncdf4.helpers)
 library(data.table)
 library(magrittr)
-#library(doParallel)
-#library(foreach)
 library(PCICt)

-# the code performes the QM correction on daily data by using a 91-day moving window
-# the correction is performed for quantiles from 0.01 to 0.99 in order to better account for extremes
-# a frequency adjustment is also performed for precipitation only
-# NOTE: quite slow - optimization and parallelization is needed
+# load local modules
+source('./config.R')
+source('./02_load_CORDEX_function.R')
+source("./03a_QM_DOY_utils.R")

-#registerDoParallel(parallel::detectCores() - 1)
+# TODO: parallelization of different models
+# registerDoParallel(parallel::detectCores() - 1)

-path_in_mod<-"/../CORDEX/" #already remapped and cropped files 
-variable<-"tasmin"
-inventory<-readRDS(paste0(path_in_mod,"/",variable,"_inventory.RDS")) #or a list for the selected models
-CORDEX.models<-unique(paste0(inventory$gcm,"+",inventory$institute_rcm)) # total number of used models
-hist_years <- 1950:2005 # years in historical period
-proj_years <- 2006:2100 # years in projections
-cal_years  <- 1981:2010 # years for the calibration
-cor_years  <- 1950:2100 # years to correct
-wday.width<-91 # window centred on the DOY to correct
-minq<-0.01; maxq<-0.99; incq<-0.01 # definition of quantiles min, max and step
-FA <- TRUE # frequency adjustment for precipitation?
-source("/../03a_QM_DOY_utils.R") # load the functions required for QM
-# step 1
-# load daily observations for the variable under evaluation and for the calibration period 
-# observations are split by month and stored in separate folders for each year
+# ------------------------------------------------------------------------------
+# Step 1: load daily observations for the calibration period -------------------
+# ------------------------------------------------------------------------------

-path_in_obs<-"/../OBS_DATA/"
-list<-list.files(paste0(path_in_obs,variable,"/",cal.years[1]),pattern=".nc",full.names = T)
-obs.stack<-stack(lapply(list,stack))
-for(i in cal.years[-1]){
-  list<-list.files(paste0(path_in_obs,variable,"/",i),pattern=".nc",full.names = T)
-  suppressWarnings(tmp<-stack(lapply(list,stack)))
-  obs.stack<-stack(obs.stack,tmp)
+# search observations in defined path: observations are split by month and
+# stored in separate folders for each year
+obs <- list.files(paste0(obs_path, '/', variable, '/', cal_years[1]),
+                         pattern=".nc", full.names = T)
+
+# create raster stack
+obs.stack <- stack(lapply(obs, stack))
+
+# read all years of the calibration period
+for(i in cal_years[-1]){
+  list <- list.files(paste0(obs_path, '/', variable, "/", i),
+                     pattern=".nc", full.names = T)
+  tmp <- stack(lapply(list, stack))
+  obs.stack <- stack(obs.stack, tmp)
 }

-idx.TSTcell<-which(!is.na(getValues(obs.stack[[1]]))) # indices of cells which will be processed
-scenario<-"rcp85"
-# convert stack into matrix with only points of interest 
-# row=time col=points
-obs.dat<-as.data.frame(obs.stack)
-obs.dat<-obs.dat[idx.TSTcell,]; obs.dat<-t(data.matrix(obs.dat))
+# indices of cells which will be processed
+idx.TSTcell <- which(!is.na(getValues(obs.stack[[1]])))
+
+# convert stack into matrix with only points of interest: (time, points)
+obs.dat <- as.data.frame(obs.stack)
+obs.dat <- obs.dat[idx.TSTcell,]
+obs.dat<-t(data.matrix(obs.dat))
+
+# ------------------------------------------------------------------------------
+# Step 2: define daily time periods --------------------------------------------
+# ------------------------------------------------------------------------------
+
+# calibration period
+dates.ref <- seq(as.Date(paste0(min(cal_years), "/01/01")),
+                 as.Date(paste0(max(cal_years), "/12/31")),
+                 "day")

-# step 2 
-# load CORDEX for the specific simulation and run the QM correction
-dates.ref<-seq(as.Date(paste0(min(cal_years),"/01/01")),as.Date(paste0(max(cal_years),"/12/31")),"day")
-dates.hist.full<-seq(as.Date(paste0(min(hist_years),"/01/01")),as.Date(paste0(max(hist_years),"/12/31"),"day")) # CORDEX historical period
-dates.proj.full<-seq(as.Date(paste0(min(proj_years),"/01/01")),as.Date(paste0(max(proj_years),"/01/01")),"day") # CORDEX scenario period
-dates.full<-c(dates.hist.full,dates.proj.full) # full analysed period
+# historical period
+dates.hist.full<-seq(as.Date(paste0(min(hist_years), "/01/01")),
+                     as.Date(paste0(max(hist_years), "/12/31")),
+                             "day")

-# loop over each gcm+rcm simulation --> this loop should be parallelized
+# projected period
+dates.proj.full<-seq(as.Date(paste0(min(proj_years), "/01/01")),
+                     as.Date(paste0(max(proj_years), "/01/01")),
+                     "day")
+
+# full period
+dates.full<- c(dates.hist.full, dates.proj.full)
+
+# loop over each gcm+rcm simulation
 for(i.mod in seq_along(CORDEX.models)){ 
  
-  i.gcm<-strsplit(CORDEX.models[i.mod],"+",fixed=T)[1]
-  i.rcm<-strsplit(CORDEX.models[i.mod],"+",fixed=T)[2]
+  i.gcm<-strsplit(CORDEX.models[i.mod], "+", fixed=T)[1]
+  i.rcm<-strsplit(CORDEX.models[i.mod], "+", fixed=T)[2]
  
-  #load full data series for the gcm+rcm simulation
+  # load full data series for the gcm + rcm simulation
  dat.mod<-load_CORDEX(gcm=i.gcm,rcm=i.rcm,
-                       path_in_mod=path_in_mod, variable=variable,
+                       path_in_mod=mod_path, variable=variable,
                       cells=idx.TSTcell,
                       dates.historical=dates.hist.full,
                       dates.proections=dates.proj.full)
  
  # matrix for storing QM corrected simulations
  # row=time, col=points
-  qm.mat<-matrix(ncol=length(idx.TSTcell),nrow=length(dates.full))
+  qm.mat<-matrix(ncol=length(idx.TSTcell), nrow=length(dates.full))
  
  for(i.cell in 1:length(idx.TSTcell)){
    # load the series for the test point
-    series.mod.cal<-dat.mod[which(year(dates.full)%in%cal_years),i.cell]
-    series.obs.cal<-as.numeric(obs.dat.conv[which(year(dates.ref)%in%cal_years),i.cell])
-    series.mod.all<-dat.mod[which(year(dates.full)%in%cor_years),i.cell]
+    series.mod.cal <- dat.mod[which(year(dates.full) %in% cal_years), i.cell]
+    series.obs.cal <- as.numeric(obs.dat.conv[which(year(dates.ref)
+                                                    %in% cal_years), i.cell])
+    series.mod.all <- dat.mod[which(year(dates.full) %in% cor_years), i.cell]
    
-    # set to zero pr correspomding to dry days
-    if ((variiable=='pr') & FA) {
+    # set to zero pr corresponding to dry days
+    if ((variable=='pr') & FA) {
      series.mod.cal[series.mod.cal < pr.wet] <- 0
      series.obs.cal[series.obs.cal < pr.wet] <- 0
      series.mod.all[series.mod.all < pr.wet] <- 0
@@ -91,26 +102,34 @@ for(i.mod in seq_along(CORDEX.models)){
    # 29th Feb is then handled as 1st Mar
    doy.full <- qm.doystring(min(cor_years), max(cor_years))
    
-    # the CDF for each DOY for both model and observation data is computed
-    # the CDF for wet days and the wet day frequency for each DOY are also reported. They will be used for the frequency adjustment
-    c_mod <- qm.cdf.doy(series.mod.cal, min(cal_years), max(cal_years), wday.width, minq, maxq, incq, variable, pr.wet)
-    c_obs <- qm.cdf.doy(series.obs.cal, min(cal_years), max(cal_years), wday.width, minq, maxq, incq, variable, pr.wet)
-    
+    # the quantile function for each DOY for both model and observation data
+    c_mod <- qm.cdf.doy(series.mod.cal, min(cal_years), max(cal_years),
+                        wday.width, minq, maxq, incq, variable, pr.wet)
+    c_obs <- qm.cdf.doy(series.obs.cal, min(cal_years), max(cal_years),
+                        wday.width, minq, maxq, incq, variable, pr.wet)
    
    # here the correction for each quantile and DOY (additive) is computed
-    if((length(c_mod$cdf.matrix[1,]) != length(c_obs$cdf.matrix[1,])) & (length(c_mod$cdf.matrix[,1]) != length(c_obs$cdf.matrix[,1]))){
+    if((length(c_mod$cdf.matrix[1,]) != length(c_obs$cdf.matrix[1,])) &
+       (length(c_mod$cdf.matrix[,1]) != length(c_obs$cdf.matrix[,1]))){
      print("ERROR: doy-based CDF of model and observations do not have the same dimension")
-    }else{
-      dif.cdf	<- c_mod$cdf.matrix-c_obs$cdf.matrix
+    } else {
+      # correction factor for each DOY and quantile
+      dif.cdf	<- c_mod$cdf.matrix - c_obs$cdf.matrix
    }
    
    if(variable == "pr"){
-      # apply the quantile correction by adding an adjustment of precipitation frequency in model data
-      cc <- qm.empqm.doy.fa(series.mod.all, doy.full, c_mod$cdf.matrix, c_obs$cdf.matrix, c_obs$cdf.wet,
-                            dif.cdf, c_obs$f.wet, c_mod$f.wet, pr.wet, method=qm.method, variable, minq, maxq, incq)
+      # apply the quantile correction by adding an adjustment of precipitation
+      # frequency in model data
+      cc <- qm.empqm.doy.fa(series.mod.all, doy.full, c_mod$cdf.matrix,
+                            c_obs$cdf.matrix, c_obs$cdf.wet, dif.cdf,
+                            c_obs$f.wet, c_mod$f.wet, pr.wet,
+                            method=qm.method, variable, minq, maxq, incq)
+      
      # check if negative corrected precipitation occur and set to 0
      cc$qm.corrected.series[cc$qm.corrected.series < pr.wet] <- 0
+      
    } else {
+      # quantile mapping
      cc <- qm.empqm.doy(series.mod.all.in, doy.full, c_mod$cdf.matrix, dif.cdf, 
                         method=qm.method, variable, minq, maxq, incq)
    }