Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
C
Climax
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Deploy
Releases
Package Registry
Container Registry
Model registry
Operate
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
earth_observation_public
Climax
Commits
a323f6d1
Commit
a323f6d1
authored
3 years ago
by
Frisinghelli Daniel
Browse files
Options
Downloads
Patches
Plain Diff
Merge files for different years to single file.
parent
57530544
No related branches found
No related tags found
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
climax/main/download_ERA5.py
+38
-4
38 additions, 4 deletions
climax/main/download_ERA5.py
with
38 additions
and
4 deletions
climax/main/download_ERA5.py
+
38
−
4
View file @
a323f6d1
...
...
@@ -6,11 +6,17 @@
# builtins
import
os
import
pathlib
import
logging
from
logging.config
import
dictConfig
from
joblib
import
Parallel
,
delayed
# externals
import
cdsapi
import
numpy
as
np
import
xarray
as
xr
# locals
from
pysegcnn.core.logging
import
log_conf
# ERA-5 product
product
=
'
reanalysis-era5-pressure-levels
'
...
...
@@ -46,9 +52,15 @@ CONFIG = {
# output path
target
=
pathlib
.
Path
(
'
/mnt/CEPH_PROJECTS/FACT_CLIMAX/REANALYSIS/ERA5/
'
)
# module level Logger
LOGGER
=
logging
.
getLogger
(
__name__
)
if
__name__
==
'
__main__
'
:
# initialize logging
dictConfig
(
log_conf
())
# initialize client
c
=
cdsapi
.
Client
()
...
...
@@ -59,11 +71,33 @@ if __name__ == '__main__':
if
not
output
.
exists
():
output
.
mkdir
(
parents
=
True
,
exist_ok
=
True
)
# create output files
files
=
[
output
.
joinpath
(
'
_
'
.
join
([
'
ERA5
'
,
var
,
year
])
+
'
.nc
'
)
for
year
in
years
]
# split the download to the different years: CDS API cannot handle
# requests over the whole time period
Parallel
(
n_jobs
=
min
(
len
(
years
),
os
.
cpu_count
()),
verbose
=
51
)(
delayed
(
c
.
retrieve
)(
product
,
{
**
CONFIG
,
**
{
'
variable
'
:
var
,
'
year
'
:
year
}},
output
.
joinpath
(
'
_
'
.
join
([
'
ERA5
'
,
var
,
year
])
+
'
.nc
'
))
for
year
in
years
)
product
,
{
**
CONFIG
,
**
{
'
variable
'
:
var
,
'
year
'
:
year
}},
file
)
for
file
,
year
in
zip
(
files
,
years
)
if
not
file
.
exists
())
# aggregate files for different years into a single file using xarray
# and dask
ds
=
xr
.
open_mfdataset
(
files
,
parallel
=
True
).
compute
()
filename
=
output
.
joinpath
(
'
_
'
.
join
(
'
ERA5
'
,
var
,
years
[
0
],
years
[
-
1
]))
# set NetCDF file compression for each variable
for
_
,
var
in
ds
.
data_vars
.
items
():
var
.
encoding
[
'
zlib
'
]
=
True
var
.
encoding
[
'
complevel
'
]
=
5
# save aggregated netcdf file
LOGGER
.
info
(
'
Compressing NetCDF: {}
'
.
format
(
filename
))
ds
.
to_netcdf
(
filename
,
engine
=
'
h5netcdf
'
)
# remove single netcdf files from disk
# LOGGER.info('Removing individual NetCDF files ...')
# for file in files:
# file.unlink()
# LOGGER.info('rm {}'.format(file))
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment