Adaptive preprocessing based on parameters of interest.

7e281443 · Frisinghelli Daniel · 9111e224 · 7e281443 · 7e281443
Commit 7e281443 authored 3 years ago by Frisinghelli Daniel
--- a/climax/core/cli.py
+++ b/climax/core/cli.py
@@ -9,7 +9,9 @@ import logging
 import pathlib

 # locals
-from climax.core.constants import CORDEX_VARIABLES, CDO_RESAMPLING_MODES
+from climax.core.constants import (CORDEX_VARIABLES, CORDEX_EXPERIMENTS,
+                                   EUROCORDEX_GCMS,  EUROCORDEX_RCMS,
+                                   CDO_RESAMPLING_MODES)

 # epilogue to display at the end of each parser
 EPILOGUE = 'Author: Daniel Frisinghelli, daniel.frisinghelli@gmail.com'
@@ -34,11 +36,6 @@ def preprocess_parser():
    parser.add_argument('grid', type=pathlib.Path,
                        help='Path to the target grid file.')

-    # positional argument: name of the variable of interest
-    parser.add_argument('variable', type=str,
-                        help='Name of the variable of interest.',
-                        choices=CORDEX_VARIABLES)
-
    # positional argument: path to search for Cordex NetCDF files
    parser.add_argument('source', type=pathlib.Path,
                        help='Path to search for Cordex NetCDF files.')
@@ -52,18 +49,40 @@ def preprocess_parser():
    # default values
    default = '(default: %(default)s)'

+    # optional argument: name of the variable of interest
+    parser.add_argument('-v' '--variable', type=str,
+                        help='Name of the variable of interest.',
+                        choices=CORDEX_VARIABLES, default=None, metavar='')
+
+    # optional argument: name of the global climate model
+    parser.add_argument('-g' '--gcm', type=str,
+                        help=('Name of the global climate model {}.'
+                              .format(default)),
+                        choices=EUROCORDEX_GCMS, default=None, metavar='')
+
+    # optional argument: name of the Cordex scenario
+    parser.add_argument('-s' '--scenario', type=str,
+                        help=('Name of the Cordex scenario {}.'
+                              .format(default)),
+                        choices=CORDEX_EXPERIMENTS, default=None, metavar='')
+
+    # optional argument: name of the regional climate model
+    parser.add_argument('-r' '--rcm', type=str,
+                        help=('Name of the regional climate model {}.'
+                              .format(default)),
+                        choices=EUROCORDEX_RCMS, default=None, metavar='')
+
+    # optional argument: name of the Cordex scenario
+    parser.add_argument('-e' '--ensemble', type=str,
+                        help=('Ensemble identifier {}.'.format(default)),
+                        default=None, metavar='')
+
    # optional argument: resampling mode
    parser.add_argument('-m', '--mode', type=str,
                        help='Resampling mode {}.'.format(default),
                        default='bilinear', choices=CDO_RESAMPLING_MODES,
                        metavar='')

-    # optional argument: file pattern to search for in source directory
-    parser.add_argument('-p', '--pattern', type=str,
-                        help=('(Regex) file pattern to search for in the '
-                              'source directory {}.'.format(default)),
-                        default='(.*).nc$', metavar='')
-
    # optional argument: whether to overwrite files
    parser.add_argument('-o', '--overwrite', type=bool,
                        help='Overwrite existing files {}.'.format(default),

--- a/climax/main/preprocess.py
+++ b/climax/main/preprocess.py
@@ -4,6 +4,7 @@
 # -*- coding: utf-8 -*-

 # builtins
+import re
 import sys
 import logging
 from joblib import Parallel, delayed
@@ -13,6 +14,7 @@ from logging.config import dictConfig
 from pysegcnn.core.logging import log_conf
 from climax.core.utils import get_inventory, reproject_cdo
 from climax.core.cli import preprocess_parser
+from climax.core.constants import EUROCORDEX_DOMAIN

 # module level logger
 LOGGER = logging.getLogger(__name__)
@@ -51,15 +53,29 @@ if __name__ == '__main__':
            LOGGER.info('mkdir {}'.format(args.target))
            args.target.mkdir(parents=True, exist_ok=True)

+        # check ensemble identifier
+        ensemble_pattern = 'r[0-9]i[0-9]p[0-9]'
+        ensemble = re.search(ensemble_pattern, args.ensemble)
+        if ensemble is None:
+            LOGGER.info('Ensemble identifier "{}" invalid, form "{}" required.'
+                        .format(args.ensemble, ensemble_pattern))
+            sys.exit()
+        else:
+            ensemble = ensemble[0]
+
+        # construct file pattern to match from input parameters
+        pattern = '(.*).nc$'
+        pattern = '_'.join([param for param in [
+            args.variable, EUROCORDEX_DOMAIN, args.gcm, args.scenario,
+            ensemble, args.rcm, pattern] if param is not None])
+
        # get all the files matching the defined pattern in the source
        # directory
-        source = get_inventory(args.source, args.pattern, return_df=False)
+        source = get_inventory(args.source, pattern, return_df=False)

        # extract files of the specified variable
-        source = [file for file in source.keys() if
-                  file.stem.split('_')[0] == args.variable]
        LOGGER.info('Files matching "{}" for variable "{}":'.format(
-            args.pattern, args.variable))
+            pattern, args.variable))
        LOGGER.info(('\n ' + (len(__name__) + 1) * ' ').join(
                    ['{}'.format(file) for file in source]))