diff --git a/climax/main/preprocess.py b/climax/main/preprocess.py index dab0ef237570f8c6a43c15d9f6a43d01b692c910..f6b537db302185db91368e48e9d7f38b2d184474 100644 --- a/climax/main/preprocess.py +++ b/climax/main/preprocess.py @@ -148,6 +148,7 @@ if __name__ == '__main__': # check whether to aggregate the netcdf files of a simulation covering # differnt time periods into a single file if args.aggregate: + LogConfig.init_log('Aggregating time periods of simulations.') # list of unique simulations simulations = np.unique([file.stem.rpartition('_')[0] for file in target]) @@ -159,23 +160,32 @@ if __name__ == '__main__': group = [file for file in target if file.name.startswith(sim)] group = sorted(group) - # read multiple netcdf files using xarray and dask - ds = xr.open_mfdataset(group) - # create filename for netcdf covering the entire time period of # the current simulation y_min, _ = _parse_cordex_time_span(group[0]) # first year _, y_max = _parse_cordex_time_span(group[-1]) # last year filename = '_'.join([sim, '-'.join([y_min, y_max])]) - filename = file.parent.joinpath(filename) + filename = group[0].parent.joinpath(filename) + + # log simulation name, time span and files + LOGGER.info('Aggregating simulation: {}, Time span: {}' + .format(sim, '-'.join([y_min, y_max]))) + LOGGER.info(('\n ' + (len(__name__) + 1) * ' ').join( + ['{}'.format(file) for file in group])) + + # read multiple netcdf files using xarray and dask + ds = xr.open_mfdataset(group) # save aggregated netcdf file + LOGGER.info('Saving aggregated NetCDF: {}'.format(filename)) ds.to_netcdf(filename) # remove single netcdf files from disk if args.remove: + LOGGER.info('Removing individual NetCDF files ...') for file in group: file.unlink() + LOGGER.info('rm {}'.format(file)) else: LOGGER.info('{} does not exist.'.format(str(args.source)))