diff --git a/climax/core/utils.py b/climax/core/utils.py index e1985b2e6259668f2327a4ad13524bc066179497..6afa031ed4e680947aa172dd3cd6199244634dda 100644 --- a/climax/core/utils.py +++ b/climax/core/utils.py @@ -3,24 +3,61 @@ # !/usr/bin/env python # -*- coding: utf-8 -*- +# builtins +import pathlib +import logging + # externals import pandas as pd +import cdo # locals from pysegcnn.core.utils import search_files -from climax.core.constants import CORDEX_PARAMETERS +from climax.core.constants import CORDEX_PARAMETERS, CDO_RESAMPLING_MODES + +# module level logger +LOGGER = logging.getLogger(__name__) -def get_inventory(path): +def get_inventory(path, pattern='(.*).nc$', return_df=False): # find all netcdf files in path - inventory = search_files(path, '(.*).nc$') + inventory = search_files(path, pattern) # create dictionary: (filename: [simulation parameters]) inventory = {k: k.stem.split('_') for k in inventory} # create a DataFrame - df = pd.DataFrame(data=inventory.values(), index=inventory.keys(), - columns=CORDEX_PARAMETERS) + if return_df: + inventory = pd.DataFrame(data=inventory.values(), + index=inventory.keys(), + columns=CORDEX_PARAMETERS) + + return inventory + + +def reproject_cdo(grid, src_ds, trg_ds, mode='bilinear', overwrite=False): + + # instanciate the cdo + operator = cdo.Cdo() + + # check if target dataset exists + if pathlib.Path(trg_ds).exists() and not overwrite: + LOGGER.info('{} already exists. Aborting ...'.format(trg_ds)) + return trg_ds + + # check if mode is supported + if mode not in CDO_RESAMPLING_MODES: + raise ValueError('Resampling mode "{}" not supported.'.format(mode)) + else: + # check which resampling mode to use + LOGGER.info('Reproject: {}'.format(trg_ds)) + if mode == 'bilinear': + operator.remapbil(str(grid), + infile=str(src_ds), outfile=str(trg_ds)) + + if mode == 'con': + operator.remapcon(str(grid), + infile=str(src_ds), outfile=str(trg_ds)) - return df + return trg_ds