diff --git a/climax/main/preprocess.py b/climax/main/preprocess.py new file mode 100644 index 0000000000000000000000000000000000000000..5d4dbbc64817b42e35746c05130c996bf0fdd536 --- /dev/null +++ b/climax/main/preprocess.py @@ -0,0 +1,77 @@ +"""Reproject and resample Cordex data to target grid.""" + +# !/usr/bin/env python +# -*- coding: utf-8 -*- + +# builtins +import sys +import logging +from joblib import Parallel, delayed +from logging.config import dictConfig + +# locals +from pysegcnn.core.logging import log_conf +from climax.core.utils import get_inventory, reproject_cdo +from climax.core.cli import preprocess_parser + +# module level logger +LOGGER = logging.getLogger(__name__) + + +if __name__ == '__main__': + + # configure logging + dictConfig(log_conf()) + + # define command line argument parser + parser = preprocess_parser() + + # parse command line arguments + args = sys.argv[1:] + if not args: + parser.print_help() + sys.exit() + else: + args = parser.parse_args(args) + + # check whether the source directory exists + if args.source.exists(): + # check whether the target grid file exists + if not args.grid.exists(): + LOGGER.info('{} does not exist.'.format(args.grid)) + sys.exit() + + # create target directory: check that it is differen from the source + # directory + if args.target == args.source: + LOGGER.info('Source and target paths cannot be equal.') + sys.exit() + + if not args.target.exists(): + LOGGER.info('mkdir {}'.format(args.target)) + args.target.mkdir(parents=True, exist_ok=True) + + # get all the files matching the defined pattern in the source + # directory + source = get_inventory(args.source, args.pattern, return_df=False) + + # extract files of the specified variable + source = [file for file in source.keys() if + file.stem.startswith(args.variable)] + LOGGER.info('Files matching "{}" for variable "{}":'.format( + args.pattern, args.variable)) + + # check whether to only print which files would be processed + if not args.dry_run: + # generate target filenames + target = [args.target.joinpath(file.name) for file in source] + + # run reprojection in parallel + target = Parallel(n_jobs=-1, verbose=51)( + delayed(reproject_cdo)(args.grid, src, trg, args.mode, + args.overwrite) + for src, trg in zip(source, target)) + + else: + LOGGER.info('{} does not exist.'.format(str(args.source))) + sys.exit()