From 4fdfa76b1a342ab659a2a54e644a994867aa03ba Mon Sep 17 00:00:00 2001
From: "Daniel.Frisinghelli" <daniel.frisinghelli@eurac.edu>
Date: Fri, 21 May 2021 15:04:14 +0200
Subject: [PATCH] Improved logging for aggregation.

---
 climax/main/preprocess.py | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/climax/main/preprocess.py b/climax/main/preprocess.py
index dab0ef2..f6b537d 100644
--- a/climax/main/preprocess.py
+++ b/climax/main/preprocess.py
@@ -148,6 +148,7 @@ if __name__ == '__main__':
         # check whether to aggregate the netcdf files of a simulation covering
         # differnt time periods into a single file
         if args.aggregate:
+            LogConfig.init_log('Aggregating time periods of simulations.')
             # list of unique simulations
             simulations = np.unique([file.stem.rpartition('_')[0] for file in
                                      target])
@@ -159,23 +160,32 @@ if __name__ == '__main__':
                 group = [file for file in target if file.name.startswith(sim)]
                 group = sorted(group)
 
-                # read multiple netcdf files using xarray and dask
-                ds = xr.open_mfdataset(group)
-
                 # create filename for netcdf covering the entire time period of
                 # the current simulation
                 y_min, _ = _parse_cordex_time_span(group[0])  # first year
                 _, y_max = _parse_cordex_time_span(group[-1])  # last year
                 filename = '_'.join([sim, '-'.join([y_min, y_max])])
-                filename = file.parent.joinpath(filename)
+                filename = group[0].parent.joinpath(filename)
+
+                # log simulation name, time span and files
+                LOGGER.info('Aggregating simulation: {}, Time span: {}'
+                            .format(sim, '-'.join([y_min, y_max])))
+                LOGGER.info(('\n ' + (len(__name__) + 1) * ' ').join(
+                    ['{}'.format(file) for file in group]))
+
+                # read multiple netcdf files using xarray and dask
+                ds = xr.open_mfdataset(group)
 
                 # save aggregated netcdf file
+                LOGGER.info('Saving aggregated NetCDF: {}'.format(filename))
                 ds.to_netcdf(filename)
 
                 # remove single netcdf files from disk
                 if args.remove:
+                    LOGGER.info('Removing individual NetCDF files ...')
                     for file in group:
                         file.unlink()
+                        LOGGER.info('rm {}'.format(file))
 
     else:
         LOGGER.info('{} does not exist.'.format(str(args.source)))
-- 
GitLab