Reprojection on daily averaged files.

90cfbf56 · Frisinghelli Daniel · 23100a2b · 90cfbf56
Commit 90cfbf56 authored 3 years ago by Frisinghelli Daniel
--- a/climax/main/preprocess_ERA5.py
+++ b/climax/main/preprocess_ERA5.py
@@ -81,24 +81,38 @@ if __name__ == '__main__':
                continue

            # create filenames for reprojected and resampled files
-            target = [args.target.joinpath(f.name) for f in source]
+            target = [args.target.joinpath(var, f.name) for f in source]
+            dlyavg = [args.target.joinpath(var, f.name.replace('.nc', '_d.nc'))
+                      for f in source]
+
+            # aggregate hourly data to daily data: resample in case of missing
+            # days
+            LOGGER.info('Computing daily averages ...')
+            for src, tmp in zip(source, dlyavg):
+                ds = xr.open_dataset(source)
+
+                # compute daily averages
+                ds = ds.resample(time='D').mean(dim='time')
+
+                # save intermediate file for resampling and reprojection to
+                # target grid
+                ds.to_netcdf(tmp, engine='h5netcdf')

            # reproject and resample to target grid in parallel
            target = Parallel(n_jobs=-1, verbose=51)(
-                    delayed(reproject_cdo)(args.grid, src, trg, args.mode,
+                    delayed(reproject_cdo)(args.grid, tmp, trg, args.mode,
                                           args.overwrite)
-                    for src, trg in zip(source, target))
+                    for tmp, trg in zip(dlyavg, target))
+
+            # remove temporary daily averages
+            for avg in dlyavg:
+                avg.unlink()

            # aggregate files for different years into a single file using
            # xarray and dask
            LOGGER.info('Reading ERA5 data ...')
            ds = xr.open_mfdataset(target, parallel=True).compute()

-            # aggregate hourly data to daily data: resample in case of missing
-            # days
-            LOGGER.info('Computing daily averages ...')
-            ds = ds.resample(time='D').mean(dim='time')
-
            # set NetCDF file compression for each variable
            for _, var in ds.data_vars.items():
                var.encoding['zlib'] = True