From 72350c6f94a0be5f1534f48415187d85e672e769 Mon Sep 17 00:00:00 2001
From: dfrisinghelli <daniel.frisinghelli@gmail.com>
Date: Thu, 20 May 2021 15:35:04 +0200
Subject: [PATCH] R-codes: starting point for operational implementation.

---
 R/00_get_inventory.R        |  74 ++++++++++++++++++++++++++
 R/01_remap_cdo.R            |  36 +++++++++++++
 R/02_load_CORDEX_function.R |  84 ++++++++++++++++++++++++++++++
 R/03_QM_calibration.R       | 100 ++++++++++++++++++++++++++++++++++++
 4 files changed, 294 insertions(+)
 create mode 100644 R/00_get_inventory.R
 create mode 100644 R/01_remap_cdo.R
 create mode 100644 R/02_load_CORDEX_function.R
 create mode 100644 R/03_QM_calibration.R

diff --git a/R/00_get_inventory.R b/R/00_get_inventory.R
new file mode 100644
index 0000000..e56de79
--- /dev/null
+++ b/R/00_get_inventory.R
@@ -0,0 +1,74 @@
+#' Get inventory from path containing EURO-CORDEX .nc files
+#'
+#' Returns a data.table with information by splitting the netcdf files into
+#' their components (GCM, RCM, variable, experiment, ...) and aggregates over
+#' years.
+#'
+#' @param path Path that will be searched recursively for .nc files.
+#' @param add_files Boolean, if \code{TRUE}, will add a column containing lists
+#'   of associated files with their full paths (useful e.g. for further
+#'   processing).
+#'
+#' @return A data.table with the inventory information.
+#'
+#' @seealso \code{\link{compare_variables_in_inventory}} for further comparing the
+#'   results, and \code{\link{check_inventory}} for performing some checks.
+#'
+#' @export
+#'
+#' @examples
+#' \dontrun{
+#'
+#' path <- "/mnt/CEPH_BASEDATA/METEO/SCENARIO"
+#' dat <- get_inventory(path)
+#' print(dat)
+#'
+#' # the same, but with files (does not print nicely)
+#' dat_file <- get_inventory(path, add_files = T)
+#' print(dat_file)
+#' }
+get_inventory <- function(path,
+                          add_files = F){
+
+  all_files_fullpath <- fs::dir_ls(path,
+                                   regexp = "[.]nc$",
+                                   recurse = T)
+
+  all_files_base <- fs::path_file(all_files_fullpath)
+
+  # create info data
+  data.table(fn = fs::path_ext_remove(all_files_base)) %>%
+    .[, tstrsplit(fn, "_")] -> dat_info
+
+  # add years column (V9), if it does not exist, as e.g. for OROG
+  if(! "V9" %in% names(dat_info)){
+    dat_info[, V9 := NA_character_]
+  }
+
+  setnames(dat_info,
+           old = paste0("V", 1:9),
+           new = c("variable", "domain", "gcm", "experiment","ensemble",
+                   "institute_rcm", "downscale_realisation", "timefreq", "years"))
+
+  dat_info[, file_fullpath := all_files_fullpath]
+
+
+  # get unique models
+  dat_info_summary <- dat_info[,
+                               .(nn_files = .N,
+                                 list_files = list(file_fullpath),
+                                 years_total = paste0(.SD[1, years %>% strsplit("-") %>% .[[1]] %>% .[1]],
+                                                      "-",
+                                                      .SD[.N, years %>% strsplit("-") %>% .[[1]] %>% .[2]])),
+                               keyby = .(variable, domain, gcm, institute_rcm, experiment,
+                                         ensemble, downscale_realisation, timefreq)]
+
+  # remove files if not requested
+  if(!add_files) dat_info_summary[, list_files := NULL]
+
+
+  return(dat_info_summary)
+}
+
+
+
diff --git a/R/01_remap_cdo.R b/R/01_remap_cdo.R
new file mode 100644
index 0000000..ea709a8
--- /dev/null
+++ b/R/01_remap_cdo.R
@@ -0,0 +1,36 @@
+library(raster)
+library(data.table)
+library(magrittr)
+library(fs)
+library(doParallel)
+library(foreach)
+registerDoParallel(parallel::detectCores() - 1)
+
+path_in<-"/../"
+path_out<-"/../"
+variable<-"tasmin"; remapmode<-"remapbil"
+target<-paste0(path_in,"grid_1km_laea")
+
+inventory<-readRDS(paste0(path_in,"CORDEX/",variable,"_inventory.RDS"))
+
+#-------------- remap original CORDEX 0.11° to the target grid ---------------------------
+# All projections are remapped from rotated pole lat/lon to a common regular grid of 1 km centred on Trentino South Tyrol
+# bilinear mapping - remapbil
+# first-order conservative mapping for pr - remapcon
+
+foreach(i = 1:nrow(inventory),
+        .inorder=F,
+        .packages=c("data.table","magrittr")
+  ) %dopar%{
+    
+    files_glob <- inventory$list_files[i][[1]]
+    for(j in seq_along(files_glob)){
+      
+    file_in<- files_glob[j]
+    file_out<-path(path_out, 
+                   paste0(variable,"_day"), 
+                   path_file(file_in))
+    cdo_call<-paste0("cdo ",remapmode,",",target," ",file_in," ",file_out)
+    system(cdo_call)
+    }
+  }
diff --git a/R/02_load_CORDEX_function.R b/R/02_load_CORDEX_function.R
new file mode 100644
index 0000000..d39d011
--- /dev/null
+++ b/R/02_load_CORDEX_function.R
@@ -0,0 +1,84 @@
+
+# --- the function load the CORDEX data for selected gcm+rcm simulation for the dates of interest
+# --- it performs the interpolation to standard calendar in case this condition is not fullfilled by the model 
+# --- data for historical and projections are returned as one matrix
+# --- input required: 
+# --- gcm = GCM simulation 
+# --- rcm = RCM simulation
+# --- path_in_mod = path to CORDEX data 
+# --- variable = tasmin, tasmax or pr
+# --- cells = indices of cells in the grid to be processed
+# --- dates.historical = daily dates of historical period
+# --- dates.projections = daily dates over projection
+
+load_CORDEX<-function(gcm, rcm, path_in_mod, variable, cells, dates.historical, dates.projections ){
+    # available simulations for the model i.mod
+    n.files<-list.files(paste0(path_in_mod,variable,"/"),pattern=paste0(c(gcm,rcm),collapse="|")) 
+    idx.hist<-grep("historical",n.files);idx.proj<-grep(scenario,n.files) 
+    
+    # final data array with standard calendar
+    mat.hist.full<-matrix(nrow=length(dates.historical),ncol=length(cells))
+    mat.proj.full<-matrix(nrow=length(dates.projections),ncol=length(cells))
+    
+    # used for loading only
+    dates.hist<-numeric(); dates.proj<-numeric() 
+    mat.hist<-matrix(nrow=0,ncol=length(cells)); 
+    mat.proj<-matrix(nrow=0,ncol=length(cells))
+    
+    # load all files for historical period
+    for(i.file.hist in seq_along(idx.hist)){
+      r<-nc_open(paste0(path_in_mod,variable,"/",n.files[idx.hist[i.file.hist]]))
+      values<-(ncvar_get(r,variable)); tmp<-as.vector(values); 
+      # load data in a tmp matrix with row=time and col=grid points
+      mat.tmp<-t(matrix(tmp,ncol=length(values[1,1,]),nrow=length(values[1,,1])*length(values[,1,1])))
+      # select only cells of interest
+      mat.tmp<-mat.tmp[,cells]; 
+      # append to the full matrix
+      mat.hist<-rbind(mat.hist,mat.tmp)
+      
+      # get time and append to the total vector of dates
+      times<-nc.get.time.series(r)
+      dates.hist<-append(dates.hist,times,length(dates.hist))
+    }
+    # truncate hours
+    dates.hist<-trunc(dates.hist,"day")
+    # remove data outside the considered historical period
+    rm<-which(year(dates.hist)<min(year(dates.historical)) | year(dates.hist)>max(year(dates.historical)))
+    if(length(rm)>0){ dates.hist<-dates.hist[-rm]; mat.hist<-mat.hist[-rm,]}
+    
+    # check if time series is of expected length
+    if(length(dates.historical)==length(dates.hist)) mat.hist.full<-mat.hist
+    # otherwise it is not a standard calendar and interpolation is needed
+    if(length(dates.historical)!=length(dates.hist)){
+      # days are randomly inserted to reach the expected length of time series
+      seq_pcict<-seq(1,length(dates.hist),length.out=length(dates.historical))
+      seq_pcict<-round(seq_pcict)
+      for(k in 1:length(cells)){
+        mat.hist.full[,k]<-mat.hist[seq_pcict,k]
+      }
+    }
+    # start the loading for projections
+    for(i.file.proj in seq_along(idx.proj)){
+      r<-nc_open(paste0(path_in_mod,variable,"/",n.files[idx.proj[i.file.proj]]))
+      values<-(ncvar_get(r,variable)); tmp<-as.vector(values); 
+      mat.tmp<-t(matrix(tmp,ncol=length(values[1,1,]),nrow=length(values[1,,1])*length(values[,1,1])))
+      mat.tmp<-mat.tmp[,cells]; mat.proj<-rbind(mat.proj,mat.tmp)
+      times<-nc.get.time.series(r)
+      dates.proj<-append(dates.proj,times,length(dates.proj))
+    }
+    dates.proj<-trunc(dates.proj,"day")
+    rm<-which(year(dates.proj)<min(year(dates.projections)) | year(dates.proj)>max(year(dates.projections)))
+    if(length(rm)>0){ dates.proj<-dates.proj[-rm]; dates.proj<-mat.proj[-rm,]}
+    if(length(dates.projections)==length(dates.proj)) mat.proj.full<-mat.proj
+    if(length(dates.projections)!=length(dates.proj)){
+      seq_pcict<-seq(1,length(dates.proj),length.out=length(dates.projections))
+      seq_pcict<-round(seq_pcict)
+      for(k in 1:length(cells)){
+        mat.proj.full[,k]<-mat.proj[seq_pcict,k]
+      }
+    }
+  
+  # append time and data for historical and projections and return the model
+  mod.dat<-rbind(mat.hist.full,mat.proj.full)
+  return(mod.dat)
+ }
diff --git a/R/03_QM_calibration.R b/R/03_QM_calibration.R
new file mode 100644
index 0000000..ea01ae8
--- /dev/null
+++ b/R/03_QM_calibration.R
@@ -0,0 +1,100 @@
+library(raster)
+library(rasterVis)
+library(rgdal)
+library(ncdf4)
+library(ncdf.tools)
+library(ncdf4.helpers)
+library(data.table)
+library(magrittr)
+#library(doParallel)
+#library(foreach)
+library(eurocordexr)
+library(PCICt)
+
+#registerDoParallel(parallel::detectCores() - 1)
+
+# testing bias-correction methods for tas
+BA.method<-"QM" # start with Empirical Quantile Mapping
+# the QM implementation needs to be improved by following https://github.com/SvenKotlarski/qmCH2018
+path_in_mod<-"/../CORDEX/" #already remapped and cropped files 
+variable<-"tasmin"
+inventory<-readRDS(paste0(path_in_mod,"/",variable,"_inventory.RDS")) #or a list for the selected models
+CORDEX.models<-unique(paste0(inventory$gcm,"+",inventory$institute_rcm)) # total number of used models
+hist_years <- 1950:2005 # years in historical period
+proj_years <- 2006:2100 # years in projections
+cal_years  <- 1981:2010 # years for the calibration
+
+# step 1
+# load daily observations for the variable under evaluation and for the reference period 
+# observations are split by month and stored in separate folders for each year
+ref.years<-1981:2010
+path_in_obs<-"/../OBS_DATA/"
+list<-list.files(paste0(path_in_obs,variable,"/",ref.years[1]),pattern=".nc",full.names = T)
+obs.stack<-stack(lapply(list,stack))
+ for(i in ref.years[-1]){
+   list<-list.files(paste0(path_in_obs,variable,"/",i),pattern=".nc",full.names = T)
+   suppressWarnings(tmp<-stack(lapply(list,stack)))
+   obs.stack<-stack(obs.stack,tmp)
+ }
+
+idx.TSTcell<-which(!is.na(getValues(obs.stack[[1]]))) # indices of cells which will be processed
+scenario<-"rcp85"
+# convert stack into matrix with only points of interest 
+# row=time col=points
+obs.dat<-as.data.frame(obs.stack)
+obs.dat<-obs.dat[idx.TSTcell,]; obs.dat<-t(data.matrix(obs.dat))
+
+# step 2 
+# load CORDEX for the specific simulation and run the QM correction
+dates.ref<-seq(as.Date(paste0(min(cal_years),"/01/01")),as.Date(paste0(max(cal_years),"/12/31")),"day")
+dates.hist.full<-seq(as.Date(paste0(min(hist_years),"/01/01")),as.Date(paste0(max(hist_years),"/12/31"),"day")) # CORDEX historical period
+dates.proj.full<-seq(as.Date(paste0(min(proj_years),"/01/01")),as.Date(paste0(max(proj_years),"/01/01")),"day") # CORDEX scenario period
+dates.full<-c(dates.hist.full,dates.proj.full) # full analysed period
+
+# loop over each gcm+rcm simulation --> this loop should be parallelized
+  for(i.mod in seq_along(CORDEX.models)){ 
+    
+    i.gcm<-strsplit(CORDEX.models[i.mod],"+",fixed=T)[1]
+    i.rcm<-strsplit(CORDEX.models[i.mod],"+",fixed=T)[2]
+    
+    #load full data series for the gcm+rcm simulation
+    dat.mod<-load_CORDEX(gcm=i.gcm,rcm=i.rcm,
+                         path_in_mod=path_in_mod, variable=variable,
+                         cells=idx.TSTcell,
+                         dates.historical=dates.hist.full,
+                         dates.proections=dates.proj.full)
+
+    # matrix for storing QM corrected simulations
+    # row=time, col=points
+    mod.qm<-matrix(ncol=length(idx.TSTcell),nrow=length(dates.full))
+    
+    
+    # the loop is currently on months but it should run over calendar days 
+    # and a 91-day window centered on the day should be selected to extract the correction: 
+    # see implementation here: https://github.com/SvenKotlarski/qmCH2018 
+    for (i.month in 1:12){ 
+      
+      # extract observations in the considered month
+      obs<-obs.dat[which(month(dates.ref)==i.month),]
+      dates.obs<-dates.ref[which(month(dates.ref)==i.month)]
+      # extract model data over the calibration period for the monthly window
+      mod.hist<-dat.mod[which(year(dates.full)%in%cal_years & month(dates.full)==i.month),]
+      
+      for(i.cell in seq_along(idx.TSTcell)){
+        # fit quantiles and extract transfer function over the calibration period
+        # wet.day=T for precipitation only
+        # this code should be modified in order to change the quantiles used
+        # currently the function considers all centiles from 0 to 1 
+        # it would be better to use 0.01 to 0.99 only and constant correction for outside this range
+        # it would be more robust when dealing with extremes --> qm with custom quantiles here:
+        # https://github.com/SvenKotlarski/qmCH2018
+        qm<-fitQmapQUANT(obs=as.numeric(obs[,i.cell]),mod=mod.hist[,i.cell],wet.day = F)
+        # apply quantile mapping to the model simulation over the entire spanned period e.g. 1950-2100
+        mod.qm[which(month(dates.full)==i.month),i.cell]<-doQmapQUANT(mod.dat[which(month(dates.full)==i.month),i.cell],qm)     
+      }
+    }
+    # save as .rda (rdata file) but it has to be exported as netCDF
+    name.out<-paste0(inventory[which(inventory$gcm==i.gcm & inventory$institute_rcm==i.rcm & 
+                                       inventory$experiment==scenario),-c(9,10)],collapse="_")
+    save(mod.qm,file=paste0(path_out,variable,"/",BA.method,"/BiasAjdust_",name.out,".rda"),compress=T)
+  }
\ No newline at end of file
-- 
GitLab