Skip to content
Snippets Groups Projects
Commit 45cd5f33 authored by Frisinghelli Daniel's avatar Frisinghelli Daniel
Browse files

Change output paths for hyperparameter tuning.

parent c67b38e9
No related branches found
No related tags found
No related merge requests found
...@@ -23,6 +23,9 @@ LRSCHEDULER=(None torch.optim.lr_scheduler.CyclicLR) ...@@ -23,6 +23,9 @@ LRSCHEDULER=(None torch.optim.lr_scheduler.CyclicLR)
# weight decay values to test # weight decay values to test
LAMBDA=(0 0.000001 0.00001 0.0001 0.001 0.01 1) LAMBDA=(0 0.000001 0.00001 0.0001 0.001 0.01 1)
# change flag for sensitivity analysis in configuration
sed -i "s/SENSITIVITY\s*=.*/SENSITIVITY\=True/" ./climax/main/config.py
# iterate over predictands # iterate over predictands
for predictand in ${PREDICTAND[@]}; do for predictand in ${PREDICTAND[@]}; do
......
...@@ -103,6 +103,9 @@ assert PREDICTAND in PREDICTANDS ...@@ -103,6 +103,9 @@ assert PREDICTAND in PREDICTANDS
# threshold defining the minimum amount of precipitation (mm) for a wet day # threshold defining the minimum amount of precipitation (mm) for a wet day
WET_DAY_THRESHOLD = 1 WET_DAY_THRESHOLD = 1
# whether training a hyperparameter sensitivity analysis
SENSITIVITY = False
# ----------------------------------------------------------------------------- # -----------------------------------------------------------------------------
# Model training configuration ------------------------------------------------ # Model training configuration ------------------------------------------------
# ----------------------------------------------------------------------------- # -----------------------------------------------------------------------------
......
...@@ -24,7 +24,8 @@ from climax.core.utils import split_date_range ...@@ -24,7 +24,8 @@ from climax.core.utils import split_date_range
from climax.main.config import (ERA5_PREDICTORS, ERA5_PLEVELS, PREDICTAND, NET, from climax.main.config import (ERA5_PREDICTORS, ERA5_PLEVELS, PREDICTAND, NET,
VALID_PERIOD, BATCH_SIZE, NORM, DOY, NYEARS, VALID_PERIOD, BATCH_SIZE, NORM, DOY, NYEARS,
DEM, DEM_FEATURES, LOSS, ANOMALIES, OPTIM, DEM, DEM_FEATURES, LOSS, ANOMALIES, OPTIM,
OPTIM_PARAMS, CHUNKS, LR_SCHEDULER, OVERWRITE) OPTIM_PARAMS, CHUNKS, LR_SCHEDULER, OVERWRITE,
SENSITIVITY)
from climax.main.io import ERA5_PATH, DEM_PATH, MODEL_PATH, TARGET_PATH from climax.main.io import ERA5_PATH, DEM_PATH, MODEL_PATH, TARGET_PATH
# module level logger # module level logger
...@@ -44,11 +45,16 @@ if __name__ == '__main__': ...@@ -44,11 +45,16 @@ if __name__ == '__main__':
lr_scheduler=LR_SCHEDULER) lr_scheduler=LR_SCHEDULER)
# path to model state # path to model state
state_file = MODEL_PATH.joinpath(PREDICTAND, state_file) if SENSITIVITY:
# models trained for hyperparameter optimization
state_file = MODEL_PATH.joinpath(PREDICTAND, 'sensitivity', state_file)
target = TARGET_PATH.joinpath(PREDICTAND, 'sensitivity')
else:
state_file = MODEL_PATH.joinpath(PREDICTAND, state_file)
target = TARGET_PATH.joinpath(PREDICTAND)
# check if target dataset already exists # check if target dataset already exists
target = TARGET_PATH.joinpath(PREDICTAND, state_file.name.replace( target = target.joinpath(state_file.name.replace(state_file.suffix, '.nc'))
state_file.suffix, '.nc'))
if target.exists() and not OVERWRITE: if target.exists() and not OVERWRITE:
LogConfig.init_log('{} already exists.'.format(target)) LogConfig.init_log('{} already exists.'.format(target))
sys.exit() sys.exit()
......
"""Dynamical climate downscaling using deep convolutional neural networks."""
# !/usr/bin/env python
# -*- coding: utf-8 -*-
# builtins
import sys
import time
import logging
from datetime import timedelta
from logging.config import dictConfig
# externals
import xarray as xr
# locals
from pysegcnn.core.trainer import LogConfig
from pysegcnn.core.models import Network
from pysegcnn.core.logging import log_conf
from pysegcnn.core.utils import search_files
from climax.core.dataset import ERA5Dataset
from climax.core.predict import predict_ERA5
from climax.main.config import (ERA5_PREDICTORS, ERA5_PLEVELS, PREDICTAND, NET,
VALID_PERIOD, BATCH_SIZE, NORM, DOY, DEM,
DEM_FEATURES, LOSS, ANOMALIES)
from climax.main.io import ERA5_PATH, DEM_PATH, MODEL_PATH, TARGET_PATH
# module level logger
LOGGER = logging.getLogger(__name__)
if __name__ == '__main__':
# initialize timing
start_time = time.monotonic()
# initialize logging
dictConfig(log_conf())
# predict reference period
LogConfig.init_log('Predicting reference period: {}'.format(
' - '.join([str(VALID_PERIOD[0]), str(VALID_PERIOD[-1])])))
# initialize ERA5 predictor dataset
LogConfig.init_log('Initializing ERA5 predictors.')
Era5 = ERA5Dataset(ERA5_PATH.joinpath('ERA5'), ERA5_PREDICTORS,
plevels=ERA5_PLEVELS)
Era5_ds = Era5.merge(chunks=-1)
# whether to use digital elevation model
if DEM:
# digital elevation model: Copernicus EU-Dem v1.1
dem = search_files(DEM_PATH, '^eu_dem_v11_stt.nc$').pop()
# read elevation and compute slope and aspect
dem = ERA5Dataset.dem_features(
dem, {'y': Era5_ds.y, 'x': Era5_ds.x},
add_coord={'time': Era5_ds.time})
# check whether to use slope and aspect
if not DEM_FEATURES:
dem = dem.drop_vars(['slope', 'aspect'])
# add dem to set of predictor variables
Era5_ds = xr.merge([Era5_ds, dem])
# subset ERA-5 dataset to reference period
Era5_ds = Era5_ds.sel(time=VALID_PERIOD)
# group reference period by season
season_indices = Era5_ds.groupby('time.season').groups
# iterate over the seasons
trg_ds = []
for k, v in season_indices.items():
LogConfig.init_log('Predicting season: {}'.format(k))
# initialize network filename
state_file = ERA5Dataset.state_file(
NET, PREDICTAND, ERA5_PREDICTORS, ERA5_PLEVELS, dem=DEM,
dem_features=DEM_FEATURES, doy=DOY, loss=LOSS, season=k,
anomalies=ANOMALIES)
# path to model state: predictand/season/model.pt
state_file = MODEL_PATH.joinpath(PREDICTAND, k, state_file)
# path to model state
state_file = MODEL_PATH.joinpath(PREDICTAND, state_file)
# load pretrained model
if state_file.exists():
# load pretrained network
net, _ = Network.load_pretrained_model(state_file, NET)
else:
# initialize OBS predictand dataset
LOGGER.info('{} does not exist.'.format(state_file))
sys.exit()
# get predictors for current season
predictors = Era5_ds.isel(time=v)
# predict current season
predictand = predict_ERA5(net, predictors, PREDICTAND,LOSS,
normalize=NORM, batch_size=BATCH_SIZE,
doy=DOY, anomalies=ANOMALIES)
# append prediction to target dataset
trg_ds.append(predictand)
# merge predictions for all seasons
LOGGER.info('Merging seasons ...')
trg_ds = xr.concat(trg_ds, dim='time')
# sort predictions chronologically
trg_ds = trg_ds.sortby(trg_ds.time)
# save model predictions as NetCDF file
target = TARGET_PATH.joinpath(PREDICTAND, net.state_file.name.replace(
net.state_file.suffix, '.nc').replace(k, 'season'))
if not target.parent.exists():
target.parent.mkdir(parents=True, exist_ok=True)
LOGGER.info('Saving network predictions: {}.'.format(target))
trg_ds.to_netcdf(target, engine='h5netcdf')
# log execution time of script
LogConfig.init_log('Execution time of script {}: {}'
.format(__file__, timedelta(seconds=time.monotonic() -
start_time)))
...@@ -26,7 +26,7 @@ from climax.main.config import (ERA5_PLEVELS, ERA5_PREDICTORS, PREDICTAND, ...@@ -26,7 +26,7 @@ from climax.main.config import (ERA5_PLEVELS, ERA5_PREDICTORS, PREDICTAND,
NORM, TRAIN_CONFIG, NET, LOSS, FILTERS, NORM, TRAIN_CONFIG, NET, LOSS, FILTERS,
OVERWRITE, DEM, DEM_FEATURES, STRATIFY, OVERWRITE, DEM, DEM_FEATURES, STRATIFY,
WET_DAY_THRESHOLD, VALID_SIZE, ANOMALIES, WET_DAY_THRESHOLD, VALID_SIZE, ANOMALIES,
OPTIM_PARAMS, LR_SCHEDULER, OPTIM_PARAMS, LR_SCHEDULER, SENSITIVITY,
LR_SCHEDULER_PARAMS, CHUNKS) LR_SCHEDULER_PARAMS, CHUNKS)
from climax.main.io import ERA5_PATH, OBS_PATH, DEM_PATH, MODEL_PATH from climax.main.io import ERA5_PATH, OBS_PATH, DEM_PATH, MODEL_PATH
...@@ -47,7 +47,11 @@ if __name__ == '__main__': ...@@ -47,7 +47,11 @@ if __name__ == '__main__':
lr_scheduler=LR_SCHEDULER) lr_scheduler=LR_SCHEDULER)
# path to model state # path to model state
state_file = MODEL_PATH.joinpath(PREDICTAND, state_file) if SENSITIVITY:
# models trained for hyperparameter optimization
state_file = MODEL_PATH.joinpath(PREDICTAND, 'sensitivity', state_file)
else:
state_file = MODEL_PATH.joinpath(PREDICTAND, state_file)
# initialize logging # initialize logging
log_file = MODEL_PATH.joinpath(PREDICTAND, log_file = MODEL_PATH.joinpath(PREDICTAND,
......
"""Dynamical climate downscaling using deep convolutional neural networks."""
# !/usr/bin/env python
# -*- coding: utf-8 -*-
# builtins
import sys
import time
import logging
from datetime import timedelta
from logging.config import dictConfig
# externals
import torch
import xarray as xr
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader
# locals
from pysegcnn.core.utils import search_files
from pysegcnn.core.trainer import NetworkTrainer, LogConfig
from pysegcnn.core.models import Network
from pysegcnn.core.logging import log_conf
from climax.core.dataset import ERA5Dataset, NetCDFDataset
from climax.core.loss import MSELoss, L1Loss
from climax.main.config import (ERA5_PLEVELS, ERA5_PREDICTORS, PREDICTAND,
CALIB_PERIOD, DOY, SHUFFLE, BATCH_SIZE, LR,
LAMBDA, NORM, TRAIN_CONFIG, NET, LOSS, FILTERS,
OVERWRITE, DEM, DEM_FEATURES, STRATIFY,
WET_DAY_THRESHOLD, VALID_SIZE, ANOMALIES)
from climax.main.io import ERA5_PATH, OBS_PATH, DEM_PATH, MODEL_PATH
# module level logger
LOGGER = logging.getLogger(__name__)
if __name__ == '__main__':
# initialize timing
start_time = time.monotonic()
# initialize logging
dictConfig(log_conf())
# initialize downscaling
LogConfig.init_log('Initializing downscaling for period: {}'.format(
' - '.join([str(CALIB_PERIOD[0]), str(CALIB_PERIOD[-1])])))
# initialize ERA5 predictor dataset
LogConfig.init_log('Initializing ERA5 predictors.')
Era5 = ERA5Dataset(ERA5_PATH.joinpath('ERA5'), ERA5_PREDICTORS,
plevels=ERA5_PLEVELS)
Era5_ds = Era5.merge(chunks=-1)
# initialize OBS predictand dataset
LogConfig.init_log('Initializing observations for predictand: {}'
.format(PREDICTAND))
# check whether to joinlty train tasmin and tasmax
if PREDICTAND == 'tas':
# read both tasmax and tasmin
tasmax = xr.open_dataset(
search_files(OBS_PATH.joinpath('tasmax'), '.nc$').pop())
tasmin = xr.open_dataset(
search_files(OBS_PATH.joinpath('tasmin'), '.nc$').pop())
Obs_ds = xr.merge([tasmax, tasmin])
else:
# read in-situ gridded observations
Obs_ds = search_files(OBS_PATH.joinpath(PREDICTAND), '.nc$').pop()
Obs_ds = xr.open_dataset(Obs_ds)
# whether to use digital elevation model
if DEM:
# digital elevation model: Copernicus EU-Dem v1.1
dem = search_files(DEM_PATH, '^eu_dem_v11_stt.nc$').pop()
# read elevation and compute slope and aspect
dem = ERA5Dataset.dem_features(
dem, {'y': Era5_ds.y, 'x': Era5_ds.x},
add_coord={'time': Era5_ds.time})
# check whether to use slope and aspect
if not DEM_FEATURES:
dem = dem.drop_vars(['slope', 'aspect'])
# add dem to set of predictor variables
Era5_ds = xr.merge([Era5_ds, dem])
# split calibration period into training and validation period
if PREDICTAND == 'pr' and STRATIFY:
# stratify training and validation dataset by number of
# observed wet days for precipitation
wet_days = (Obs_ds.sel(time=CALIB_PERIOD).mean(dim=('y', 'x'))
>= WET_DAY_THRESHOLD).to_array().values.squeeze()
train, valid = train_test_split(
CALIB_PERIOD, stratify=wet_days, test_size=VALID_SIZE)
# sort chronologically
train, valid = sorted(train), sorted(valid)
else:
train, valid = train_test_split(CALIB_PERIOD, shuffle=False,
test_size=VALID_SIZE)
# initialize network and optimizer
LogConfig.init_log('Initializing training data grouped by season.')
# training and validation dataset
Era5_train, Obs_train = Era5_ds.sel(time=train), Obs_ds.sel(time=train)
Era5_valid, Obs_valid = Era5_ds.sel(time=valid), Obs_ds.sel(time=valid)
# group predictors and predictand by season
season_indices_train = Era5_train.groupby('time.season').groups
season_indices_valid = Era5_valid.groupby('time.season').groups
# group training and validation set by season
Era_season_train = {k: Era5_train.isel(time=v) for k, v in
season_indices_train.items()}
Obs_season_train = {k: Obs_train.isel(time=v) for k, v in
season_indices_train.items()}
Era_season_valid = {k: Era5_valid.isel(time=v) for k, v in
season_indices_valid.items()}
Obs_season_valid = {k: Obs_valid.isel(time=v) for k, v in
season_indices_valid.items()}
# iterate over the seasons
for k in Era_season_train.keys():
LogConfig.init_log('Initializing training for season: {}.'.format(k))
# get training and validation set for current season
Era5_train, Obs_train = Era_season_train[k], Obs_season_train[k]
Era5_valid, Obs_valid = Era_season_valid[k], Obs_season_valid[k]
# create PyTorch compliant dataset and dataloader instances for model
# training
train_ds = NetCDFDataset(Era5_train, Obs_train, normalize=NORM,
doy=DOY, anomalies=ANOMALIES)
valid_ds = NetCDFDataset(Era5_valid, Obs_valid, normalize=NORM,
doy=DOY, anomalies=ANOMALIES)
train_dl = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=SHUFFLE,
drop_last=False)
valid_dl = DataLoader(valid_ds, batch_size=BATCH_SIZE, shuffle=SHUFFLE,
drop_last=False)
# initialize network and optimizer
LogConfig.init_log('Initializing network and optimizer.')
# initialize network filename
state_file = ERA5Dataset.state_file(
NET, PREDICTAND, ERA5_PREDICTORS, ERA5_PLEVELS, dem=DEM,
dem_features=DEM_FEATURES, doy=DOY, loss=LOSS, season=k,
anomalies=ANOMALIES)
# path to model state: predictand/season/model.pt
state_file = MODEL_PATH.joinpath(PREDICTAND, k, state_file)
# check if model exists
if state_file.exists() and not OVERWRITE:
# load pretrained network
net, _ = Network.load_pretrained_model(state_file, NET)
sys.exit()
# define number of output fields
# check whether modelling pr with probabilistic approach
outputs = len(Obs_train.data_vars)
if PREDICTAND == 'pr':
outputs = (1 if (isinstance(LOSS, MSELoss) or
isinstance(LOSS, L1Loss)) else 3)
# instanciate network
inputs = (len(Era5_train.data_vars) + 2 if DOY else
len(Era5_train.data_vars))
net = NET(state_file, inputs, outputs, filters=FILTERS)
# initialize optimizer
# optimizer = torch.optim.Adam(net.parameters(), lr=LR,
# weight_decay=LAMBDA)
optimizer = torch.optim.SGD(net.parameters(), lr=LR, momentum=0.9,
weight_decay=LAMBDA)
# initialize network trainer
trainer = NetworkTrainer(net, optimizer, net.state_file, train_dl,
valid_dl, loss_function=LOSS, **TRAIN_CONFIG)
# train model
state = trainer.train()
# log execution time of script
LogConfig.init_log('Execution time of script {}: {}'
.format(__file__, timedelta(seconds=time.monotonic() -
start_time)))
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment