From 45cd5f33a327ad79a70905d2f0ce980c965dd418 Mon Sep 17 00:00:00 2001 From: "Daniel.Frisinghelli" <daniel.frisinghelli@eurac.edu> Date: Tue, 26 Oct 2021 14:22:38 +0200 Subject: [PATCH] Change output paths for hyperparameter tuning. --- Scripts/grid_search.sh | 3 + climax/main/config.py | 3 + climax/main/downscale_infer.py | 14 +- climax/main/downscale_infer_season.py | 128 ----------------- climax/main/downscale_train.py | 8 +- climax/main/downscale_train_season.py | 190 -------------------------- 6 files changed, 22 insertions(+), 324 deletions(-) delete mode 100644 climax/main/downscale_infer_season.py delete mode 100644 climax/main/downscale_train_season.py diff --git a/Scripts/grid_search.sh b/Scripts/grid_search.sh index 6ebe1c2..24a9809 100644 --- a/Scripts/grid_search.sh +++ b/Scripts/grid_search.sh @@ -23,6 +23,9 @@ LRSCHEDULER=(None torch.optim.lr_scheduler.CyclicLR) # weight decay values to test LAMBDA=(0 0.000001 0.00001 0.0001 0.001 0.01 1) +# change flag for sensitivity analysis in configuration +sed -i "s/SENSITIVITY\s*=.*/SENSITIVITY\=True/" ./climax/main/config.py + # iterate over predictands for predictand in ${PREDICTAND[@]}; do diff --git a/climax/main/config.py b/climax/main/config.py index a6a97a6..b9c9c0d 100644 --- a/climax/main/config.py +++ b/climax/main/config.py @@ -103,6 +103,9 @@ assert PREDICTAND in PREDICTANDS # threshold defining the minimum amount of precipitation (mm) for a wet day WET_DAY_THRESHOLD = 1 +# whether training a hyperparameter sensitivity analysis +SENSITIVITY = False + # ----------------------------------------------------------------------------- # Model training configuration ------------------------------------------------ # ----------------------------------------------------------------------------- diff --git a/climax/main/downscale_infer.py b/climax/main/downscale_infer.py index 81b0b04..6f3348d 100644 --- a/climax/main/downscale_infer.py +++ b/climax/main/downscale_infer.py @@ -24,7 +24,8 @@ from climax.core.utils import split_date_range from climax.main.config import (ERA5_PREDICTORS, ERA5_PLEVELS, PREDICTAND, NET, VALID_PERIOD, BATCH_SIZE, NORM, DOY, NYEARS, DEM, DEM_FEATURES, LOSS, ANOMALIES, OPTIM, - OPTIM_PARAMS, CHUNKS, LR_SCHEDULER, OVERWRITE) + OPTIM_PARAMS, CHUNKS, LR_SCHEDULER, OVERWRITE, + SENSITIVITY) from climax.main.io import ERA5_PATH, DEM_PATH, MODEL_PATH, TARGET_PATH # module level logger @@ -44,11 +45,16 @@ if __name__ == '__main__': lr_scheduler=LR_SCHEDULER) # path to model state - state_file = MODEL_PATH.joinpath(PREDICTAND, state_file) + if SENSITIVITY: + # models trained for hyperparameter optimization + state_file = MODEL_PATH.joinpath(PREDICTAND, 'sensitivity', state_file) + target = TARGET_PATH.joinpath(PREDICTAND, 'sensitivity') + else: + state_file = MODEL_PATH.joinpath(PREDICTAND, state_file) + target = TARGET_PATH.joinpath(PREDICTAND) # check if target dataset already exists - target = TARGET_PATH.joinpath(PREDICTAND, state_file.name.replace( - state_file.suffix, '.nc')) + target = target.joinpath(state_file.name.replace(state_file.suffix, '.nc')) if target.exists() and not OVERWRITE: LogConfig.init_log('{} already exists.'.format(target)) sys.exit() diff --git a/climax/main/downscale_infer_season.py b/climax/main/downscale_infer_season.py deleted file mode 100644 index 7d38d14..0000000 --- a/climax/main/downscale_infer_season.py +++ /dev/null @@ -1,128 +0,0 @@ -"""Dynamical climate downscaling using deep convolutional neural networks.""" - -# !/usr/bin/env python -# -*- coding: utf-8 -*- - -# builtins -import sys -import time -import logging -from datetime import timedelta -from logging.config import dictConfig - -# externals -import xarray as xr - -# locals -from pysegcnn.core.trainer import LogConfig -from pysegcnn.core.models import Network -from pysegcnn.core.logging import log_conf -from pysegcnn.core.utils import search_files -from climax.core.dataset import ERA5Dataset -from climax.core.predict import predict_ERA5 -from climax.main.config import (ERA5_PREDICTORS, ERA5_PLEVELS, PREDICTAND, NET, - VALID_PERIOD, BATCH_SIZE, NORM, DOY, DEM, - DEM_FEATURES, LOSS, ANOMALIES) -from climax.main.io import ERA5_PATH, DEM_PATH, MODEL_PATH, TARGET_PATH - -# module level logger -LOGGER = logging.getLogger(__name__) - - -if __name__ == '__main__': - - # initialize timing - start_time = time.monotonic() - - # initialize logging - dictConfig(log_conf()) - - # predict reference period - LogConfig.init_log('Predicting reference period: {}'.format( - ' - '.join([str(VALID_PERIOD[0]), str(VALID_PERIOD[-1])]))) - - # initialize ERA5 predictor dataset - LogConfig.init_log('Initializing ERA5 predictors.') - Era5 = ERA5Dataset(ERA5_PATH.joinpath('ERA5'), ERA5_PREDICTORS, - plevels=ERA5_PLEVELS) - Era5_ds = Era5.merge(chunks=-1) - - # whether to use digital elevation model - if DEM: - # digital elevation model: Copernicus EU-Dem v1.1 - dem = search_files(DEM_PATH, '^eu_dem_v11_stt.nc$').pop() - - # read elevation and compute slope and aspect - dem = ERA5Dataset.dem_features( - dem, {'y': Era5_ds.y, 'x': Era5_ds.x}, - add_coord={'time': Era5_ds.time}) - - # check whether to use slope and aspect - if not DEM_FEATURES: - dem = dem.drop_vars(['slope', 'aspect']) - - # add dem to set of predictor variables - Era5_ds = xr.merge([Era5_ds, dem]) - - # subset ERA-5 dataset to reference period - Era5_ds = Era5_ds.sel(time=VALID_PERIOD) - - # group reference period by season - season_indices = Era5_ds.groupby('time.season').groups - - # iterate over the seasons - trg_ds = [] - for k, v in season_indices.items(): - LogConfig.init_log('Predicting season: {}'.format(k)) - - # initialize network filename - state_file = ERA5Dataset.state_file( - NET, PREDICTAND, ERA5_PREDICTORS, ERA5_PLEVELS, dem=DEM, - dem_features=DEM_FEATURES, doy=DOY, loss=LOSS, season=k, - anomalies=ANOMALIES) - - # path to model state: predictand/season/model.pt - state_file = MODEL_PATH.joinpath(PREDICTAND, k, state_file) - - # path to model state - state_file = MODEL_PATH.joinpath(PREDICTAND, state_file) - - # load pretrained model - if state_file.exists(): - # load pretrained network - net, _ = Network.load_pretrained_model(state_file, NET) - else: - # initialize OBS predictand dataset - LOGGER.info('{} does not exist.'.format(state_file)) - sys.exit() - - # get predictors for current season - predictors = Era5_ds.isel(time=v) - - # predict current season - predictand = predict_ERA5(net, predictors, PREDICTAND,LOSS, - normalize=NORM, batch_size=BATCH_SIZE, - doy=DOY, anomalies=ANOMALIES) - - # append prediction to target dataset - trg_ds.append(predictand) - - # merge predictions for all seasons - LOGGER.info('Merging seasons ...') - trg_ds = xr.concat(trg_ds, dim='time') - - # sort predictions chronologically - trg_ds = trg_ds.sortby(trg_ds.time) - - # save model predictions as NetCDF file - target = TARGET_PATH.joinpath(PREDICTAND, net.state_file.name.replace( - net.state_file.suffix, '.nc').replace(k, 'season')) - if not target.parent.exists(): - target.parent.mkdir(parents=True, exist_ok=True) - LOGGER.info('Saving network predictions: {}.'.format(target)) - trg_ds.to_netcdf(target, engine='h5netcdf') - - # log execution time of script - LogConfig.init_log('Execution time of script {}: {}' - .format(__file__, timedelta(seconds=time.monotonic() - - start_time))) diff --git a/climax/main/downscale_train.py b/climax/main/downscale_train.py index 9bf803d..2ae11a2 100644 --- a/climax/main/downscale_train.py +++ b/climax/main/downscale_train.py @@ -26,7 +26,7 @@ from climax.main.config import (ERA5_PLEVELS, ERA5_PREDICTORS, PREDICTAND, NORM, TRAIN_CONFIG, NET, LOSS, FILTERS, OVERWRITE, DEM, DEM_FEATURES, STRATIFY, WET_DAY_THRESHOLD, VALID_SIZE, ANOMALIES, - OPTIM_PARAMS, LR_SCHEDULER, + OPTIM_PARAMS, LR_SCHEDULER, SENSITIVITY, LR_SCHEDULER_PARAMS, CHUNKS) from climax.main.io import ERA5_PATH, OBS_PATH, DEM_PATH, MODEL_PATH @@ -47,7 +47,11 @@ if __name__ == '__main__': lr_scheduler=LR_SCHEDULER) # path to model state - state_file = MODEL_PATH.joinpath(PREDICTAND, state_file) + if SENSITIVITY: + # models trained for hyperparameter optimization + state_file = MODEL_PATH.joinpath(PREDICTAND, 'sensitivity', state_file) + else: + state_file = MODEL_PATH.joinpath(PREDICTAND, state_file) # initialize logging log_file = MODEL_PATH.joinpath(PREDICTAND, diff --git a/climax/main/downscale_train_season.py b/climax/main/downscale_train_season.py deleted file mode 100644 index ab74e41..0000000 --- a/climax/main/downscale_train_season.py +++ /dev/null @@ -1,190 +0,0 @@ -"""Dynamical climate downscaling using deep convolutional neural networks.""" - -# !/usr/bin/env python -# -*- coding: utf-8 -*- - -# builtins -import sys -import time -import logging -from datetime import timedelta -from logging.config import dictConfig - -# externals -import torch -import xarray as xr -from sklearn.model_selection import train_test_split -from torch.utils.data import DataLoader - -# locals -from pysegcnn.core.utils import search_files -from pysegcnn.core.trainer import NetworkTrainer, LogConfig -from pysegcnn.core.models import Network -from pysegcnn.core.logging import log_conf -from climax.core.dataset import ERA5Dataset, NetCDFDataset -from climax.core.loss import MSELoss, L1Loss -from climax.main.config import (ERA5_PLEVELS, ERA5_PREDICTORS, PREDICTAND, - CALIB_PERIOD, DOY, SHUFFLE, BATCH_SIZE, LR, - LAMBDA, NORM, TRAIN_CONFIG, NET, LOSS, FILTERS, - OVERWRITE, DEM, DEM_FEATURES, STRATIFY, - WET_DAY_THRESHOLD, VALID_SIZE, ANOMALIES) -from climax.main.io import ERA5_PATH, OBS_PATH, DEM_PATH, MODEL_PATH - -# module level logger -LOGGER = logging.getLogger(__name__) - - -if __name__ == '__main__': - - # initialize timing - start_time = time.monotonic() - - # initialize logging - dictConfig(log_conf()) - - # initialize downscaling - LogConfig.init_log('Initializing downscaling for period: {}'.format( - ' - '.join([str(CALIB_PERIOD[0]), str(CALIB_PERIOD[-1])]))) - - # initialize ERA5 predictor dataset - LogConfig.init_log('Initializing ERA5 predictors.') - Era5 = ERA5Dataset(ERA5_PATH.joinpath('ERA5'), ERA5_PREDICTORS, - plevels=ERA5_PLEVELS) - Era5_ds = Era5.merge(chunks=-1) - - # initialize OBS predictand dataset - LogConfig.init_log('Initializing observations for predictand: {}' - .format(PREDICTAND)) - - # check whether to joinlty train tasmin and tasmax - if PREDICTAND == 'tas': - # read both tasmax and tasmin - tasmax = xr.open_dataset( - search_files(OBS_PATH.joinpath('tasmax'), '.nc$').pop()) - tasmin = xr.open_dataset( - search_files(OBS_PATH.joinpath('tasmin'), '.nc$').pop()) - Obs_ds = xr.merge([tasmax, tasmin]) - else: - # read in-situ gridded observations - Obs_ds = search_files(OBS_PATH.joinpath(PREDICTAND), '.nc$').pop() - Obs_ds = xr.open_dataset(Obs_ds) - - # whether to use digital elevation model - if DEM: - # digital elevation model: Copernicus EU-Dem v1.1 - dem = search_files(DEM_PATH, '^eu_dem_v11_stt.nc$').pop() - - # read elevation and compute slope and aspect - dem = ERA5Dataset.dem_features( - dem, {'y': Era5_ds.y, 'x': Era5_ds.x}, - add_coord={'time': Era5_ds.time}) - - # check whether to use slope and aspect - if not DEM_FEATURES: - dem = dem.drop_vars(['slope', 'aspect']) - - # add dem to set of predictor variables - Era5_ds = xr.merge([Era5_ds, dem]) - - # split calibration period into training and validation period - if PREDICTAND == 'pr' and STRATIFY: - # stratify training and validation dataset by number of - # observed wet days for precipitation - wet_days = (Obs_ds.sel(time=CALIB_PERIOD).mean(dim=('y', 'x')) - >= WET_DAY_THRESHOLD).to_array().values.squeeze() - train, valid = train_test_split( - CALIB_PERIOD, stratify=wet_days, test_size=VALID_SIZE) - - # sort chronologically - train, valid = sorted(train), sorted(valid) - else: - train, valid = train_test_split(CALIB_PERIOD, shuffle=False, - test_size=VALID_SIZE) - - # initialize network and optimizer - LogConfig.init_log('Initializing training data grouped by season.') - - # training and validation dataset - Era5_train, Obs_train = Era5_ds.sel(time=train), Obs_ds.sel(time=train) - Era5_valid, Obs_valid = Era5_ds.sel(time=valid), Obs_ds.sel(time=valid) - - # group predictors and predictand by season - season_indices_train = Era5_train.groupby('time.season').groups - season_indices_valid = Era5_valid.groupby('time.season').groups - - # group training and validation set by season - Era_season_train = {k: Era5_train.isel(time=v) for k, v in - season_indices_train.items()} - Obs_season_train = {k: Obs_train.isel(time=v) for k, v in - season_indices_train.items()} - Era_season_valid = {k: Era5_valid.isel(time=v) for k, v in - season_indices_valid.items()} - Obs_season_valid = {k: Obs_valid.isel(time=v) for k, v in - season_indices_valid.items()} - - # iterate over the seasons - for k in Era_season_train.keys(): - LogConfig.init_log('Initializing training for season: {}.'.format(k)) - - # get training and validation set for current season - Era5_train, Obs_train = Era_season_train[k], Obs_season_train[k] - Era5_valid, Obs_valid = Era_season_valid[k], Obs_season_valid[k] - - # create PyTorch compliant dataset and dataloader instances for model - # training - train_ds = NetCDFDataset(Era5_train, Obs_train, normalize=NORM, - doy=DOY, anomalies=ANOMALIES) - valid_ds = NetCDFDataset(Era5_valid, Obs_valid, normalize=NORM, - doy=DOY, anomalies=ANOMALIES) - train_dl = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=SHUFFLE, - drop_last=False) - valid_dl = DataLoader(valid_ds, batch_size=BATCH_SIZE, shuffle=SHUFFLE, - drop_last=False) - - # initialize network and optimizer - LogConfig.init_log('Initializing network and optimizer.') - - # initialize network filename - state_file = ERA5Dataset.state_file( - NET, PREDICTAND, ERA5_PREDICTORS, ERA5_PLEVELS, dem=DEM, - dem_features=DEM_FEATURES, doy=DOY, loss=LOSS, season=k, - anomalies=ANOMALIES) - - # path to model state: predictand/season/model.pt - state_file = MODEL_PATH.joinpath(PREDICTAND, k, state_file) - - # check if model exists - if state_file.exists() and not OVERWRITE: - # load pretrained network - net, _ = Network.load_pretrained_model(state_file, NET) - sys.exit() - - # define number of output fields - # check whether modelling pr with probabilistic approach - outputs = len(Obs_train.data_vars) - if PREDICTAND == 'pr': - outputs = (1 if (isinstance(LOSS, MSELoss) or - isinstance(LOSS, L1Loss)) else 3) - - # instanciate network - inputs = (len(Era5_train.data_vars) + 2 if DOY else - len(Era5_train.data_vars)) - net = NET(state_file, inputs, outputs, filters=FILTERS) - - # initialize optimizer - # optimizer = torch.optim.Adam(net.parameters(), lr=LR, - # weight_decay=LAMBDA) - optimizer = torch.optim.SGD(net.parameters(), lr=LR, momentum=0.9, - weight_decay=LAMBDA) - - # initialize network trainer - trainer = NetworkTrainer(net, optimizer, net.state_file, train_dl, - valid_dl, loss_function=LOSS, **TRAIN_CONFIG) - - # train model - state = trainer.train() - - # log execution time of script - LogConfig.init_log('Execution time of script {}: {}' - .format(__file__, timedelta(seconds=time.monotonic() - - start_time))) -- GitLab