Skip to content
Snippets Groups Projects
Commit 40ee4efa authored by Frisinghelli Daniel's avatar Frisinghelli Daniel
Browse files

Include 31st day of the month.

parent 91dfb48f
No related branches found
No related tags found
No related merge requests found
......@@ -5,18 +5,15 @@
# builtins
import os
import pathlib
import logging
from logging.config import dictConfig
from joblib import Parallel, delayed
# externals
import cdsapi
import numpy as np
import xarray as xr
# locals
from pysegcnn.core.logging import log_conf
from climax.core.constants import ERA5_VARIABLES
from climax.main.config import ERA5_PATH
# ERA-5 product
product = 'reanalysis-era5-pressure-levels'
......@@ -25,14 +22,10 @@ product_type = 'reanalysis'
# pressure levels
pressure_levels = ['850', '500']
# variables
variables = ['geopotential', 'temperature', 'u_component_of_wind',
'v_component_of_wind', 'specific_humidity']
# time period
years = [str(y) for y in np.arange(1981, 2011)]
month = [str(m) for m in np.arange(1, 13)]
days = [str(d) for d in np.arange(1, 31)]
days = [str(d) for d in np.arange(1, 32)]
time = ["{:02d}:00".format(t) for t in np.arange(0,24)]
# area of interest (Alps): North, West, South, East
......@@ -49,25 +42,16 @@ CONFIG = {
'area': area
}
# output path
target = pathlib.Path('/mnt/CEPH_PROJECTS/FACT_CLIMAX/REANALYSIS/ERA5/')
# module level Logger
LOGGER = logging.getLogger(__name__)
if __name__ == '__main__':
# initialize logging
dictConfig(log_conf())
# initialize client
c = cdsapi.Client()
# download data for the different variables
for var in variables:
for var in ERA5_VARIABLES:
# create output directory
output = target.joinpath(var)
output = ERA5_PATH.joinpath('Downloads', var)
if not output.exists():
output.mkdir(parents=True, exist_ok=True)
......@@ -81,25 +65,3 @@ if __name__ == '__main__':
delayed(c.retrieve)(
product, {**CONFIG, **{'variable': var, 'year': year}}, file)
for file, year in zip(files, years) if not file.exists())
# TODO: Move to preprocess_ERA5.py
# aggregate files for different years into a single file using xarray
# and dask
ds = xr.open_mfdataset(files, parallel=True).compute()
filename = output.joinpath('_'.join('ERA5', var, years[0], years[-1]))
# set NetCDF file compression for each variable
for _, var in ds.data_vars.items():
var.encoding['zlib'] = True
var.encoding['complevel'] = 5
# save aggregated netcdf file
LOGGER.info('Compressing NetCDF: {}'.format(filename))
ds.to_netcdf(filename, engine='h5netcdf')
# remove single netcdf files from disk
# LOGGER.info('Removing individual NetCDF files ...')
# for file in files:
# file.unlink()
# LOGGER.info('rm {}'.format(file))
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment