diff --git a/climax/main/download_ERA5.py b/climax/main/download_ERA5.py index c00278ae3364ea89ba63ac98cfe6a342a47434cc..b8885798221eb15f6be6a58a0c554a752cb5c77f 100644 --- a/climax/main/download_ERA5.py +++ b/climax/main/download_ERA5.py @@ -35,9 +35,7 @@ area = [52, 2, 40, 20] # ERA5 download configuration dictionary CONFIG = { 'product_type': product_type, - 'variable': variables, 'pressure_level': pressure_levels, - 'year': years, 'month': month, 'day': days, 'time': time, @@ -55,15 +53,17 @@ if __name__ == '__main__': c = cdsapi.Client() # download data for the different variables - - # sequential implementation - # for var in variables: - # c.retrieve(product, {**CONFIG, **{'variable': var}}, str( - # target.joinpath('_'.join(['ERA5', var, years[0], years[-1]]) - # + '.nc'))) - - # parallel implementation - Parallel(n_jobs=min(len(variables), os.cpu_count()), verbose=51)( - delayed(c.retrieve)(product, {**CONFIG, **{'variable': var}}, str( - target.joinpath('_'.join(['ERA5', var, years[0], years[-1]]) - + '.nc'))) for var in variables) + for var in variables: + # create output directory + output = target.joinpath(var) + if not output.exists(): + output.mkdir(parents=True, exist_ok=True) + + # split the download to the different years: CDS API cannot handle + # requests over the whole time period + Parallel(n_jobs=min(len(years, os.cpu_count())), verbose=51)( + delayed(c.retrieve)( + product, + {**CONFIG, **{'variable': var, 'year': year}}, + output.joinpath('_'.join(['ERA5', var, year]) + '.nc')) + for year in years)