diff --git a/climax/main/download_ERA5.py b/climax/main/download_ERA5.py index 82a2bfd28a0800be9adeb5e9deeb88a5a1535074..2dfd77bb111f54a8eeb086810d10e6bc7655afb6 100644 --- a/climax/main/download_ERA5.py +++ b/climax/main/download_ERA5.py @@ -5,18 +5,15 @@ # builtins import os -import pathlib -import logging -from logging.config import dictConfig from joblib import Parallel, delayed # externals import cdsapi import numpy as np -import xarray as xr # locals -from pysegcnn.core.logging import log_conf +from climax.core.constants import ERA5_VARIABLES +from climax.main.config import ERA5_PATH # ERA-5 product product = 'reanalysis-era5-pressure-levels' @@ -25,14 +22,10 @@ product_type = 'reanalysis' # pressure levels pressure_levels = ['850', '500'] -# variables -variables = ['geopotential', 'temperature', 'u_component_of_wind', - 'v_component_of_wind', 'specific_humidity'] - # time period years = [str(y) for y in np.arange(1981, 2011)] month = [str(m) for m in np.arange(1, 13)] -days = [str(d) for d in np.arange(1, 31)] +days = [str(d) for d in np.arange(1, 32)] time = ["{:02d}:00".format(t) for t in np.arange(0,24)] # area of interest (Alps): North, West, South, East @@ -49,25 +42,16 @@ CONFIG = { 'area': area } -# output path -target = pathlib.Path('/mnt/CEPH_PROJECTS/FACT_CLIMAX/REANALYSIS/ERA5/') - -# module level Logger -LOGGER = logging.getLogger(__name__) - if __name__ == '__main__': - # initialize logging - dictConfig(log_conf()) - # initialize client c = cdsapi.Client() # download data for the different variables - for var in variables: + for var in ERA5_VARIABLES: # create output directory - output = target.joinpath(var) + output = ERA5_PATH.joinpath('Downloads', var) if not output.exists(): output.mkdir(parents=True, exist_ok=True) @@ -81,25 +65,3 @@ if __name__ == '__main__': delayed(c.retrieve)( product, {**CONFIG, **{'variable': var, 'year': year}}, file) for file, year in zip(files, years) if not file.exists()) - - # TODO: Move to preprocess_ERA5.py - - # aggregate files for different years into a single file using xarray - # and dask - ds = xr.open_mfdataset(files, parallel=True).compute() - filename = output.joinpath('_'.join('ERA5', var, years[0], years[-1])) - - # set NetCDF file compression for each variable - for _, var in ds.data_vars.items(): - var.encoding['zlib'] = True - var.encoding['complevel'] = 5 - - # save aggregated netcdf file - LOGGER.info('Compressing NetCDF: {}'.format(filename)) - ds.to_netcdf(filename, engine='h5netcdf') - - # remove single netcdf files from disk - # LOGGER.info('Removing individual NetCDF files ...') - # for file in files: - # file.unlink() - # LOGGER.info('rm {}'.format(file))