Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
C
Climax
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Deploy
Releases
Package registry
Container registry
Model registry
Operate
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
earth_observation_public
Climax
Commits
46be79b3
Commit
46be79b3
authored
3 years ago
by
Frisinghelli Daniel
Browse files
Options
Downloads
Patches
Plain Diff
Example of generalized linear regression in sklearn.
parent
3645b5e5
No related branches found
No related tags found
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
Notebooks/glm.ipynb
+0
-10
0 additions, 10 deletions
Notebooks/glm.ipynb
with
0 additions
and
10 deletions
Notebooks/glm.ipynb
+
0
−
10
View file @
46be79b3
...
...
@@ -184,16 +184,6 @@
"VALID_PERIOD = slice('1991-01-01', '2010-01-01')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "cefcb0d1-e00e-4025-a872-b6da7e991876",
"metadata": {},
"outputs": [],
"source": [
"VALID_PERIOD.stop"
]
},
{
"cell_type": "code",
"execution_count": null,
...
...
%% Cell type:markdown id:0ade2427-0020-414a-957b-4ed71281b61f tags:
## Generalized linear regression
%% Cell type:code id:cfd6644a-9af1-4b71-8dac-1e423786e0a8 tags:
```
python
# builtins
import
pathlib
# externals
import
numpy
as
np
import
pandas
as
pd
import
xarray
as
xr
from
sklearn.linear_model
import
TweedieRegressor
# locals
from
climax.core.dataset
import
ERA5Dataset
from
climax.core.constants
import
ERA5_VARIABLES
from
climax.core.utils
import
search_files
```
%% Cell type:code id:646406dd-8c0d-4cdc-b7c3-b0b55b2ec4bd tags:
```
python
# path to ERA5 reanalysis data
ERA5_PATH
=
pathlib
.
Path
(
'
/mnt/CEPH_PROJECTS/FACT_CLIMAX/REANALYSIS/ERA5
'
)
```
%% Cell type:code id:18d222d5-fe39-4858-af39-04a14171cad6 tags:
```
python
# list of valid predictor variable names
ERA5_VARIABLES
```
%% Cell type:code id:1cdca239-c1c9-41c6-bce5-7929ff3b19de tags:
```
python
# define the predictor variables you want to use
ERA5_PREDICTORS
=
[
'
geopotential
'
,
'
temperature
'
,
'
mean_sea_level_pressure
'
]
# use geopotential, temperature and pressure
# you can change this list as you wish, e.g.:
# ERA5_PREDICTORS = ['geopotential', 'temperature'] # use only geopotential and temperature
# ERA5_PREDICTORS = ERA5_VARIABLES # use all ERA5 variables as predictors
# this checks if the variable names are correct
assert
all
([
p
in
ERA5_VARIABLES
for
p
in
ERA5_PREDICTORS
])
```
%% Cell type:markdown id:2fd49c6d-a7dc-49cb-87ba-c1f539ff2127 tags:
### Use the climax package to load ERA5 predictors
%% Cell type:code id:72e4c65c-add4-47a8-aae0-e4ae3301301a tags:
```
python
# define which pressure levels you want to use: currently only 500 and 850 are available
PLEVELS
=
[
500
,
850
]
```
%% Cell type:code id:35979cb6-0a18-41bd-9eea-081b187be5ff tags:
```
python
# create the xarray.Dataset of the specified predictor variables
predictors
=
ERA5Dataset
(
ERA5_PATH
,
ERA5_PREDICTORS
,
plevels
=
PLEVELS
)
predictors
=
predictors
.
merge
()
```
%% Cell type:code id:155c0954-cb8a-40d1-99e9-6c46281ae899 tags:
```
python
# check out the xarray.Dataset: you will see all the variables you specified
predictors
```
%% Cell type:markdown id:727fc0f2-c8ef-480d-a273-a07d3dff0141 tags:
### Load target data: observations
%% Cell type:code id:629d12b0-e4d1-4cc8-8c02-bd62bbb67cdc tags:
```
python
# path to observation data
OBS_PATH
=
pathlib
.
Path
(
'
/mnt/CEPH_PROJECTS/FACT_CLIMAX/OBSERVATION
'
)
```
%% Cell type:code id:c589cdea-09ed-415e-8613-a5ec311fdb2b tags:
```
python
# define the predictand, i.e. tasmax, tasmin or pr
PREDICTAND
=
'
tasmax
'
```
%% Cell type:code id:c8301c21-c32b-4123-a405-bde4b4d80f48 tags:
```
python
# load the observation data
predictand
=
xr
.
open_dataset
(
search_files
(
OBS_PATH
.
joinpath
(
PREDICTAND
),
'
.nc$
'
).
pop
())
```
%% Cell type:code id:4760558e-b1ed-4b16-9609-cf301aa39f55 tags:
```
python
# check out the xarray.Dataset: you will see a single variable
predictand
```
%% Cell type:markdown id:756e22e2-0b53-4d8f-877c-b9aee2659e17 tags:
### Prepare training data
%% Cell type:code id:8aec082a-1d6a-4932-8600-efc48f886cb3 tags:
```
python
# define the training period and the validation period
TRAIN_PERIOD
=
slice
(
'
1981-01-01
'
,
'
1991-01-01
'
)
VALID_PERIOD
=
slice
(
'
1991-01-01
'
,
'
2010-01-01
'
)
```
%% Cell type:code id:cefcb0d1-e00e-4025-a872-b6da7e991876 tags:
```
python
VALID_PERIOD
.
stop
```
%% Cell type:code id:0812673a-5a9d-48bf-ba87-3f96cb26ff49 tags:
```
python
# select training and validation data: predictors
predictors_train
=
predictors
.
sel
(
time
=
TRAIN_PERIOD
)
predictors_valid
=
predictors
.
sel
(
time
=
VALID_PERIOD
)
```
%% Cell type:code id:c326326c-5621-4ed4-af2b-a942bc678e71 tags:
```
python
# select training and validation data: predictand
predictand_train
=
predictand
.
sel
(
time
=
TRAIN_PERIOD
)
predictand_valid
=
predictand
.
sel
(
time
=
VALID_PERIOD
)
```
%% Cell type:markdown id:9a410594-714e-4124-b155-d2985ed5b6cb tags:
### Train the generalized linear regression model
%% Cell type:code id:57da5a56-bcd7-44eb-9304-62697d779f7f tags:
```
python
# instanciate the GLM
model
=
TweedieRegressor
(
power
=
0
if
PREDICTAND
in
[
'
tasmax
'
,
'
tasmin
'
]
else
2
)
model
# power = 0: Normal distribution (tasmax, tasmin)
# power = 1: Poisson distribution
# power = (1, 2): Compound Poisson Gamma distribution
# power = 2: Gamma distribution (pr)
# power = 3: Inverse Gaussian
```
%% Cell type:code id:802169db-8a22-42ad-8d4f-e29f676a49e9 tags:
```
python
# function to normalize predictors to [0, 1]
def
normalize
(
predictors
):
predictors
-=
predictors
.
min
(
axis
=
1
,
keepdims
=
True
)
predictors
/=
predictors
.
max
(
axis
=
1
,
keepdims
=
True
)
return
predictors
```
%% Cell type:code id:5382f429-d8e7-4636-8f47-922ae20a91cc tags:
```
python
# iterate over the grid points
prediction
=
np
.
ones
(
shape
=
(
len
(
predictors_valid
.
time
),
len
(
predictors_valid
.
y
),
len
(
predictors_valid
.
x
)))
*
np
.
nan
for
i
,
_
in
enumerate
(
predictors_train
.
x
):
for
j
,
_
in
enumerate
(
predictors_train
.
y
):
# current grid point: xarray.Dataset, dimensions=(time)
point_predictors
=
predictors_train
.
isel
(
x
=
i
,
y
=
j
)
point_predictand
=
predictand_train
.
isel
(
x
=
i
,
y
=
j
)
# convert xarray.Dataset to numpy.array: shape=(time, predictors)
point_predictors
=
point_predictors
.
to_array
().
values
.
swapaxes
(
0
,
1
)
point_predictand
=
point_predictand
.
to_array
().
values
.
squeeze
()
# check if the grid point is valid
if
np
.
isnan
(
point_predictors
).
any
()
or
np
.
isnan
(
point_predictand
).
any
():
# move on to next grid point
continue
# normalize each predictor variable to [0, 1]
point_predictors
=
normalize
(
point_predictors
)
# instanciate the model for the current grid point
model
=
TweedieRegressor
(
power
=
0
if
PREDICTAND
in
[
'
tasmax
'
,
'
tasmin
'
]
else
2
)
# train model on training data
model
.
fit
(
point_predictors
,
point_predictand
)
print
(
'
Processing grid point: ({:d}, {:d}), score: {:.2f}
'
.
format
(
j
,
i
,
model
.
score
(
point_predictors
,
point_predictand
)))
# prepare predictors of validation period
point_validation
=
predictors_valid
.
isel
(
x
=
i
,
y
=
j
).
to_array
().
values
.
swapaxes
(
0
,
1
)
point_validation
=
normalize
(
point_validation
)
# predict validation period
pred
=
model
.
predict
(
point_validation
)
# store predictions for current grid point
prediction
[:,
j
,
i
]
=
pred
# store predictions in xarray.Dataset
predictions
=
xr
.
DataArray
(
data
=
prediction
,
dims
=
[
'
time
'
,
'
y
'
,
'
x
'
],
coords
=
dict
(
time
=
pd
.
date_range
(
VALID_PERIOD
.
start
,
VALID_PERIOD
.
stop
,
freq
=
'
D
'
),
lat
=
predictand_valid
.
y
,
lon
=
predictand_valid
.
x
))
predictions
=
predictions
.
to_dataset
(
name
=
PREDICTAND
)
```
%% Cell type:markdown id:595ee11d-570d-42ed-8c40-226d724aa9fb tags:
### Save predictions as NetCDF
%% Cell type:code id:fb708c40-c0e3-4c62-bc4e-9d0b6bfc52d4 tags:
```
python
# specify the output path, filename: PREDICTAND.nc
OUTPUT_PATH
=
pathlib
.
Path
(
'
~/{}
'
.
format
(
PREDICTAND
+
'
.nc
'
))
# save to NetCDF
predictions
.
to_netcdf
(
OUTPUT_PATH
,
engine
=
'
h5netcdf
'
)
```
%% Cell type:code id:fc9aba1a-fde1-48c6-85e7-02cb5ebdcfa7 tags:
```
python
# Enjoy and have fun!
```
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment