Skip to content
Snippets Groups Projects
Commit 7e281443 authored by Frisinghelli Daniel's avatar Frisinghelli Daniel
Browse files

Adaptive preprocessing based on parameters of interest.

parent 9111e224
No related branches found
No related tags found
No related merge requests found
......@@ -9,7 +9,9 @@ import logging
import pathlib
# locals
from climax.core.constants import CORDEX_VARIABLES, CDO_RESAMPLING_MODES
from climax.core.constants import (CORDEX_VARIABLES, CORDEX_EXPERIMENTS,
EUROCORDEX_GCMS, EUROCORDEX_RCMS,
CDO_RESAMPLING_MODES)
# epilogue to display at the end of each parser
EPILOGUE = 'Author: Daniel Frisinghelli, daniel.frisinghelli@gmail.com'
......@@ -34,11 +36,6 @@ def preprocess_parser():
parser.add_argument('grid', type=pathlib.Path,
help='Path to the target grid file.')
# positional argument: name of the variable of interest
parser.add_argument('variable', type=str,
help='Name of the variable of interest.',
choices=CORDEX_VARIABLES)
# positional argument: path to search for Cordex NetCDF files
parser.add_argument('source', type=pathlib.Path,
help='Path to search for Cordex NetCDF files.')
......@@ -52,18 +49,40 @@ def preprocess_parser():
# default values
default = '(default: %(default)s)'
# optional argument: name of the variable of interest
parser.add_argument('-v' '--variable', type=str,
help='Name of the variable of interest.',
choices=CORDEX_VARIABLES, default=None, metavar='')
# optional argument: name of the global climate model
parser.add_argument('-g' '--gcm', type=str,
help=('Name of the global climate model {}.'
.format(default)),
choices=EUROCORDEX_GCMS, default=None, metavar='')
# optional argument: name of the Cordex scenario
parser.add_argument('-s' '--scenario', type=str,
help=('Name of the Cordex scenario {}.'
.format(default)),
choices=CORDEX_EXPERIMENTS, default=None, metavar='')
# optional argument: name of the regional climate model
parser.add_argument('-r' '--rcm', type=str,
help=('Name of the regional climate model {}.'
.format(default)),
choices=EUROCORDEX_RCMS, default=None, metavar='')
# optional argument: name of the Cordex scenario
parser.add_argument('-e' '--ensemble', type=str,
help=('Ensemble identifier {}.'.format(default)),
default=None, metavar='')
# optional argument: resampling mode
parser.add_argument('-m', '--mode', type=str,
help='Resampling mode {}.'.format(default),
default='bilinear', choices=CDO_RESAMPLING_MODES,
metavar='')
# optional argument: file pattern to search for in source directory
parser.add_argument('-p', '--pattern', type=str,
help=('(Regex) file pattern to search for in the '
'source directory {}.'.format(default)),
default='(.*).nc$', metavar='')
# optional argument: whether to overwrite files
parser.add_argument('-o', '--overwrite', type=bool,
help='Overwrite existing files {}.'.format(default),
......
......@@ -4,6 +4,7 @@
# -*- coding: utf-8 -*-
# builtins
import re
import sys
import logging
from joblib import Parallel, delayed
......@@ -13,6 +14,7 @@ from logging.config import dictConfig
from pysegcnn.core.logging import log_conf
from climax.core.utils import get_inventory, reproject_cdo
from climax.core.cli import preprocess_parser
from climax.core.constants import EUROCORDEX_DOMAIN
# module level logger
LOGGER = logging.getLogger(__name__)
......@@ -51,15 +53,29 @@ if __name__ == '__main__':
LOGGER.info('mkdir {}'.format(args.target))
args.target.mkdir(parents=True, exist_ok=True)
# check ensemble identifier
ensemble_pattern = 'r[0-9]i[0-9]p[0-9]'
ensemble = re.search(ensemble_pattern, args.ensemble)
if ensemble is None:
LOGGER.info('Ensemble identifier "{}" invalid, form "{}" required.'
.format(args.ensemble, ensemble_pattern))
sys.exit()
else:
ensemble = ensemble[0]
# construct file pattern to match from input parameters
pattern = '(.*).nc$'
pattern = '_'.join([param for param in [
args.variable, EUROCORDEX_DOMAIN, args.gcm, args.scenario,
ensemble, args.rcm, pattern] if param is not None])
# get all the files matching the defined pattern in the source
# directory
source = get_inventory(args.source, args.pattern, return_df=False)
source = get_inventory(args.source, pattern, return_df=False)
# extract files of the specified variable
source = [file for file in source.keys() if
file.stem.split('_')[0] == args.variable]
LOGGER.info('Files matching "{}" for variable "{}":'.format(
args.pattern, args.variable))
pattern, args.variable))
LOGGER.info(('\n ' + (len(__name__) + 1) * ' ').join(
['{}'.format(file) for file in source]))
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment