diff --git a/climax/main/preprocess.py b/climax/main/preprocess.py index 20e60c5508786eed94dc84c1dbd0df46bb4b225e..8ae3457a50cf8b0fd4779f6be7f1a708e4d8b898 100644 --- a/climax/main/preprocess.py +++ b/climax/main/preprocess.py @@ -12,9 +12,10 @@ from logging.config import dictConfig # locals from pysegcnn.core.logging import log_conf +from pysegcnn.core.trainer import LogConfig from climax.core.utils import get_inventory, reproject_cdo from climax.core.cli import preprocess_parser -from climax.core.constants import EUROCORDEX_DOMAIN +from climax.core.constants import EUROCORDEX_DOMAIN, CORDEX_PARAMETERS # module level logger LOGGER = logging.getLogger(__name__) @@ -84,17 +85,45 @@ if __name__ == '__main__': # directory source = sorted(get_inventory(args.source, pattern, return_df=False)) - # extract files of the specified variable - LOGGER.info('Files matching "{}" for variable "{}":'.format( + # log list of input files + LogConfig.init_log('Files matching "{}" for variable "{}":'.format( pattern, args.variable)) LOGGER.info(('\n ' + (len(__name__) + 1) * ' ').join( ['{}'.format(file) for file in source])) + # generate target filenames + # output path: target/var/scenario + if (args.variable is not None) and (args.scenario is not None): + # if variable and scenarios are specified, directly build + # output path + output_path = args.target.joinpath(args.variable).joinpath( + args.scenario) + + # list of output files + target = [output_path.joinpath(file.name) for file in source] + else: + # if variable and scenarios are not specified, infer output + # path from file names + target = [] + for file in source: + # parts: dictionary of file name components + parts = {k: v for k, v in zip(CORDEX_PARAMETERS, + file.stem.split('_'))} + + # construct output path for current file + output_path = args.target.joinpath( + parts['Variable']).joinpath(parts['Scenario']) + + # output file name + target.append(output_path.joinpath(file.name)) + + # log list of output files + LogConfig.init_log('Output file names') + LOGGER.info(('\n ' + (len(__name__) + 1) * ' ').join( + ['{}'.format(file) for file in target])) + # check whether to only print which files would be processed if not args.dry_run: - # generate target filenames - target = [args.target.joinpath(file.name) for file in source] - # run reprojection in parallel target = Parallel(n_jobs=-1, verbose=51)( delayed(reproject_cdo)(args.grid, src, trg, args.mode,