From 72350c6f94a0be5f1534f48415187d85e672e769 Mon Sep 17 00:00:00 2001 From: dfrisinghelli <daniel.frisinghelli@gmail.com> Date: Thu, 20 May 2021 15:35:04 +0200 Subject: [PATCH] R-codes: starting point for operational implementation. --- R/00_get_inventory.R | 74 ++++++++++++++++++++++++++ R/01_remap_cdo.R | 36 +++++++++++++ R/02_load_CORDEX_function.R | 84 ++++++++++++++++++++++++++++++ R/03_QM_calibration.R | 100 ++++++++++++++++++++++++++++++++++++ 4 files changed, 294 insertions(+) create mode 100644 R/00_get_inventory.R create mode 100644 R/01_remap_cdo.R create mode 100644 R/02_load_CORDEX_function.R create mode 100644 R/03_QM_calibration.R diff --git a/R/00_get_inventory.R b/R/00_get_inventory.R new file mode 100644 index 0000000..e56de79 --- /dev/null +++ b/R/00_get_inventory.R @@ -0,0 +1,74 @@ +#' Get inventory from path containing EURO-CORDEX .nc files +#' +#' Returns a data.table with information by splitting the netcdf files into +#' their components (GCM, RCM, variable, experiment, ...) and aggregates over +#' years. +#' +#' @param path Path that will be searched recursively for .nc files. +#' @param add_files Boolean, if \code{TRUE}, will add a column containing lists +#' of associated files with their full paths (useful e.g. for further +#' processing). +#' +#' @return A data.table with the inventory information. +#' +#' @seealso \code{\link{compare_variables_in_inventory}} for further comparing the +#' results, and \code{\link{check_inventory}} for performing some checks. +#' +#' @export +#' +#' @examples +#' \dontrun{ +#' +#' path <- "/mnt/CEPH_BASEDATA/METEO/SCENARIO" +#' dat <- get_inventory(path) +#' print(dat) +#' +#' # the same, but with files (does not print nicely) +#' dat_file <- get_inventory(path, add_files = T) +#' print(dat_file) +#' } +get_inventory <- function(path, + add_files = F){ + + all_files_fullpath <- fs::dir_ls(path, + regexp = "[.]nc$", + recurse = T) + + all_files_base <- fs::path_file(all_files_fullpath) + + # create info data + data.table(fn = fs::path_ext_remove(all_files_base)) %>% + .[, tstrsplit(fn, "_")] -> dat_info + + # add years column (V9), if it does not exist, as e.g. for OROG + if(! "V9" %in% names(dat_info)){ + dat_info[, V9 := NA_character_] + } + + setnames(dat_info, + old = paste0("V", 1:9), + new = c("variable", "domain", "gcm", "experiment","ensemble", + "institute_rcm", "downscale_realisation", "timefreq", "years")) + + dat_info[, file_fullpath := all_files_fullpath] + + + # get unique models + dat_info_summary <- dat_info[, + .(nn_files = .N, + list_files = list(file_fullpath), + years_total = paste0(.SD[1, years %>% strsplit("-") %>% .[[1]] %>% .[1]], + "-", + .SD[.N, years %>% strsplit("-") %>% .[[1]] %>% .[2]])), + keyby = .(variable, domain, gcm, institute_rcm, experiment, + ensemble, downscale_realisation, timefreq)] + + # remove files if not requested + if(!add_files) dat_info_summary[, list_files := NULL] + + + return(dat_info_summary) +} + + + diff --git a/R/01_remap_cdo.R b/R/01_remap_cdo.R new file mode 100644 index 0000000..ea709a8 --- /dev/null +++ b/R/01_remap_cdo.R @@ -0,0 +1,36 @@ +library(raster) +library(data.table) +library(magrittr) +library(fs) +library(doParallel) +library(foreach) +registerDoParallel(parallel::detectCores() - 1) + +path_in<-"/../" +path_out<-"/../" +variable<-"tasmin"; remapmode<-"remapbil" +target<-paste0(path_in,"grid_1km_laea") + +inventory<-readRDS(paste0(path_in,"CORDEX/",variable,"_inventory.RDS")) + +#-------------- remap original CORDEX 0.11° to the target grid --------------------------- +# All projections are remapped from rotated pole lat/lon to a common regular grid of 1 km centred on Trentino South Tyrol +# bilinear mapping - remapbil +# first-order conservative mapping for pr - remapcon + +foreach(i = 1:nrow(inventory), + .inorder=F, + .packages=c("data.table","magrittr") + ) %dopar%{ + + files_glob <- inventory$list_files[i][[1]] + for(j in seq_along(files_glob)){ + + file_in<- files_glob[j] + file_out<-path(path_out, + paste0(variable,"_day"), + path_file(file_in)) + cdo_call<-paste0("cdo ",remapmode,",",target," ",file_in," ",file_out) + system(cdo_call) + } + } diff --git a/R/02_load_CORDEX_function.R b/R/02_load_CORDEX_function.R new file mode 100644 index 0000000..d39d011 --- /dev/null +++ b/R/02_load_CORDEX_function.R @@ -0,0 +1,84 @@ + +# --- the function load the CORDEX data for selected gcm+rcm simulation for the dates of interest +# --- it performs the interpolation to standard calendar in case this condition is not fullfilled by the model +# --- data for historical and projections are returned as one matrix +# --- input required: +# --- gcm = GCM simulation +# --- rcm = RCM simulation +# --- path_in_mod = path to CORDEX data +# --- variable = tasmin, tasmax or pr +# --- cells = indices of cells in the grid to be processed +# --- dates.historical = daily dates of historical period +# --- dates.projections = daily dates over projection + +load_CORDEX<-function(gcm, rcm, path_in_mod, variable, cells, dates.historical, dates.projections ){ + # available simulations for the model i.mod + n.files<-list.files(paste0(path_in_mod,variable,"/"),pattern=paste0(c(gcm,rcm),collapse="|")) + idx.hist<-grep("historical",n.files);idx.proj<-grep(scenario,n.files) + + # final data array with standard calendar + mat.hist.full<-matrix(nrow=length(dates.historical),ncol=length(cells)) + mat.proj.full<-matrix(nrow=length(dates.projections),ncol=length(cells)) + + # used for loading only + dates.hist<-numeric(); dates.proj<-numeric() + mat.hist<-matrix(nrow=0,ncol=length(cells)); + mat.proj<-matrix(nrow=0,ncol=length(cells)) + + # load all files for historical period + for(i.file.hist in seq_along(idx.hist)){ + r<-nc_open(paste0(path_in_mod,variable,"/",n.files[idx.hist[i.file.hist]])) + values<-(ncvar_get(r,variable)); tmp<-as.vector(values); + # load data in a tmp matrix with row=time and col=grid points + mat.tmp<-t(matrix(tmp,ncol=length(values[1,1,]),nrow=length(values[1,,1])*length(values[,1,1]))) + # select only cells of interest + mat.tmp<-mat.tmp[,cells]; + # append to the full matrix + mat.hist<-rbind(mat.hist,mat.tmp) + + # get time and append to the total vector of dates + times<-nc.get.time.series(r) + dates.hist<-append(dates.hist,times,length(dates.hist)) + } + # truncate hours + dates.hist<-trunc(dates.hist,"day") + # remove data outside the considered historical period + rm<-which(year(dates.hist)<min(year(dates.historical)) | year(dates.hist)>max(year(dates.historical))) + if(length(rm)>0){ dates.hist<-dates.hist[-rm]; mat.hist<-mat.hist[-rm,]} + + # check if time series is of expected length + if(length(dates.historical)==length(dates.hist)) mat.hist.full<-mat.hist + # otherwise it is not a standard calendar and interpolation is needed + if(length(dates.historical)!=length(dates.hist)){ + # days are randomly inserted to reach the expected length of time series + seq_pcict<-seq(1,length(dates.hist),length.out=length(dates.historical)) + seq_pcict<-round(seq_pcict) + for(k in 1:length(cells)){ + mat.hist.full[,k]<-mat.hist[seq_pcict,k] + } + } + # start the loading for projections + for(i.file.proj in seq_along(idx.proj)){ + r<-nc_open(paste0(path_in_mod,variable,"/",n.files[idx.proj[i.file.proj]])) + values<-(ncvar_get(r,variable)); tmp<-as.vector(values); + mat.tmp<-t(matrix(tmp,ncol=length(values[1,1,]),nrow=length(values[1,,1])*length(values[,1,1]))) + mat.tmp<-mat.tmp[,cells]; mat.proj<-rbind(mat.proj,mat.tmp) + times<-nc.get.time.series(r) + dates.proj<-append(dates.proj,times,length(dates.proj)) + } + dates.proj<-trunc(dates.proj,"day") + rm<-which(year(dates.proj)<min(year(dates.projections)) | year(dates.proj)>max(year(dates.projections))) + if(length(rm)>0){ dates.proj<-dates.proj[-rm]; dates.proj<-mat.proj[-rm,]} + if(length(dates.projections)==length(dates.proj)) mat.proj.full<-mat.proj + if(length(dates.projections)!=length(dates.proj)){ + seq_pcict<-seq(1,length(dates.proj),length.out=length(dates.projections)) + seq_pcict<-round(seq_pcict) + for(k in 1:length(cells)){ + mat.proj.full[,k]<-mat.proj[seq_pcict,k] + } + } + + # append time and data for historical and projections and return the model + mod.dat<-rbind(mat.hist.full,mat.proj.full) + return(mod.dat) + } diff --git a/R/03_QM_calibration.R b/R/03_QM_calibration.R new file mode 100644 index 0000000..ea01ae8 --- /dev/null +++ b/R/03_QM_calibration.R @@ -0,0 +1,100 @@ +library(raster) +library(rasterVis) +library(rgdal) +library(ncdf4) +library(ncdf.tools) +library(ncdf4.helpers) +library(data.table) +library(magrittr) +#library(doParallel) +#library(foreach) +library(eurocordexr) +library(PCICt) + +#registerDoParallel(parallel::detectCores() - 1) + +# testing bias-correction methods for tas +BA.method<-"QM" # start with Empirical Quantile Mapping +# the QM implementation needs to be improved by following https://github.com/SvenKotlarski/qmCH2018 +path_in_mod<-"/../CORDEX/" #already remapped and cropped files +variable<-"tasmin" +inventory<-readRDS(paste0(path_in_mod,"/",variable,"_inventory.RDS")) #or a list for the selected models +CORDEX.models<-unique(paste0(inventory$gcm,"+",inventory$institute_rcm)) # total number of used models +hist_years <- 1950:2005 # years in historical period +proj_years <- 2006:2100 # years in projections +cal_years <- 1981:2010 # years for the calibration + +# step 1 +# load daily observations for the variable under evaluation and for the reference period +# observations are split by month and stored in separate folders for each year +ref.years<-1981:2010 +path_in_obs<-"/../OBS_DATA/" +list<-list.files(paste0(path_in_obs,variable,"/",ref.years[1]),pattern=".nc",full.names = T) +obs.stack<-stack(lapply(list,stack)) + for(i in ref.years[-1]){ + list<-list.files(paste0(path_in_obs,variable,"/",i),pattern=".nc",full.names = T) + suppressWarnings(tmp<-stack(lapply(list,stack))) + obs.stack<-stack(obs.stack,tmp) + } + +idx.TSTcell<-which(!is.na(getValues(obs.stack[[1]]))) # indices of cells which will be processed +scenario<-"rcp85" +# convert stack into matrix with only points of interest +# row=time col=points +obs.dat<-as.data.frame(obs.stack) +obs.dat<-obs.dat[idx.TSTcell,]; obs.dat<-t(data.matrix(obs.dat)) + +# step 2 +# load CORDEX for the specific simulation and run the QM correction +dates.ref<-seq(as.Date(paste0(min(cal_years),"/01/01")),as.Date(paste0(max(cal_years),"/12/31")),"day") +dates.hist.full<-seq(as.Date(paste0(min(hist_years),"/01/01")),as.Date(paste0(max(hist_years),"/12/31"),"day")) # CORDEX historical period +dates.proj.full<-seq(as.Date(paste0(min(proj_years),"/01/01")),as.Date(paste0(max(proj_years),"/01/01")),"day") # CORDEX scenario period +dates.full<-c(dates.hist.full,dates.proj.full) # full analysed period + +# loop over each gcm+rcm simulation --> this loop should be parallelized + for(i.mod in seq_along(CORDEX.models)){ + + i.gcm<-strsplit(CORDEX.models[i.mod],"+",fixed=T)[1] + i.rcm<-strsplit(CORDEX.models[i.mod],"+",fixed=T)[2] + + #load full data series for the gcm+rcm simulation + dat.mod<-load_CORDEX(gcm=i.gcm,rcm=i.rcm, + path_in_mod=path_in_mod, variable=variable, + cells=idx.TSTcell, + dates.historical=dates.hist.full, + dates.proections=dates.proj.full) + + # matrix for storing QM corrected simulations + # row=time, col=points + mod.qm<-matrix(ncol=length(idx.TSTcell),nrow=length(dates.full)) + + + # the loop is currently on months but it should run over calendar days + # and a 91-day window centered on the day should be selected to extract the correction: + # see implementation here: https://github.com/SvenKotlarski/qmCH2018 + for (i.month in 1:12){ + + # extract observations in the considered month + obs<-obs.dat[which(month(dates.ref)==i.month),] + dates.obs<-dates.ref[which(month(dates.ref)==i.month)] + # extract model data over the calibration period for the monthly window + mod.hist<-dat.mod[which(year(dates.full)%in%cal_years & month(dates.full)==i.month),] + + for(i.cell in seq_along(idx.TSTcell)){ + # fit quantiles and extract transfer function over the calibration period + # wet.day=T for precipitation only + # this code should be modified in order to change the quantiles used + # currently the function considers all centiles from 0 to 1 + # it would be better to use 0.01 to 0.99 only and constant correction for outside this range + # it would be more robust when dealing with extremes --> qm with custom quantiles here: + # https://github.com/SvenKotlarski/qmCH2018 + qm<-fitQmapQUANT(obs=as.numeric(obs[,i.cell]),mod=mod.hist[,i.cell],wet.day = F) + # apply quantile mapping to the model simulation over the entire spanned period e.g. 1950-2100 + mod.qm[which(month(dates.full)==i.month),i.cell]<-doQmapQUANT(mod.dat[which(month(dates.full)==i.month),i.cell],qm) + } + } + # save as .rda (rdata file) but it has to be exported as netCDF + name.out<-paste0(inventory[which(inventory$gcm==i.gcm & inventory$institute_rcm==i.rcm & + inventory$experiment==scenario),-c(9,10)],collapse="_") + save(mod.qm,file=paste0(path_out,variable,"/",BA.method,"/BiasAjdust_",name.out,".rda"),compress=T) + } \ No newline at end of file -- GitLab