diff --git a/pysegcnn/main/config.py b/pysegcnn/main/config.py index 707129a0d57ec5e933ff83c9422ef7a5168644f9..583bc7d734095d8ee15f393a2f112a6e0fc061d3 100644 --- a/pysegcnn/main/config.py +++ b/pysegcnn/main/config.py @@ -28,7 +28,7 @@ HERE = pathlib.Path(__file__).resolve().parent # path to the datasets on the current machine DRIVE_PATH = pathlib.Path('C:/Eurac/Projects/CCISNOW/_Datasets/') -# DRIVE_PATH = pathlib.Path('/mnt/CEPH_PROJECTS/cci_snow/dfrisinghelli/_Datasets/') +# DRIVE_PATH = pathlib.Path('/mnt/CEPH_PROJECTS/cci_snow/dfrisinghelli/_Datasets/') # nopep8 # name and paths to the datasets DATASETS = {'Sparcs': DRIVE_PATH.joinpath('Sparcs'), @@ -47,6 +47,9 @@ BANDS = ['red', 'green', 'blue', 'nir', 'swir1', 'swir2'] # tile size of a single sample TILE_SIZE = 128 +# number of folds for cross validation +K_FOLDS = 2 + # the source dataset configuration dictionary src_ds_config = { @@ -81,11 +84,6 @@ src_ds_config = { # 'pad': False, 'pad': True, - # the random seed for the numpy random number generator - # ensures reproducibility of the training, validation and test data split - # used if split_mode='random' and split_mode='scene' - 'seed': 0, - # whether to sort the dataset in chronological order, useful for time # series data # 'sort': True, @@ -156,11 +154,11 @@ trg_ds_config = { 'bands': BANDS, 'tile_size': TILE_SIZE, 'pad': True, - 'seed': 0, 'sort': True, 'transforms': [], 'merge_labels': {'Cirrus': 'Cloud', 'Not_used': 'No_data'} + } # the source dataset split configuration dictionary @@ -172,64 +170,58 @@ src_split_config = { # the mode to split the dataset: # - # - 'random': randomly split the scenes - # for each scene, the tiles can be distributed among the + # - 'tile': for each scene, the tiles can be distributed among the # training, validation and test set # - # - 'scene': randomly split the scenes - # for each scene, all the tiles of the scene are included in + # - 'scene': for each scene, all the tiles of the scene are included in # either the training set, the validation set or the test # set, respectively - # - # - 'date': split the scenes of a dataset based on a date, useful for - # time series data - # scenes before date build the training set, scenes after - # the date build the validation set, the test set is empty - # 'split_mode': 'date', - # 'split_mode': 'random', + # 'split_mode': 'tile', 'split_mode': 'scene', + # the number of folds for cross validation + # + # k_folds = 1 : The model is trained with a single dataset split based on + # 'tvratio' and 'ttratio' + # k_folds > 1 : The model is trained via cross validation on k_folds splits + # of the dataset + 'k_folds': K_FOLDS, + + # the random seed for the random number generator + # ensures reproducibility of the training, validation and test data split + 'seed': 0, + + # whether to shuffle the data before splitting + 'shuffle': True, + + # ------------------------------------------------------------------------- + # IMPORTANT: these setting only apply if 'kfolds=1' + # ------------------------------------------------------------------------- + # (ttratio * 100) % of the dataset will be used for training and # validation - # used if split_mode='random' and split_mode='scene' + # used if 'kfolds=1' 'ttratio': 1, # (ttratio * tvratio) * 100 % will be used for training # (1 - ttratio * tvratio) * 100 % will be used for validation - # used if split_mode='random' and split_mode='scene' + # used if 'kfolds=1' 'tvratio': 0.8, - # the date to split the scenes - # format: 'yyyymmdd' - # scenes before date build the training set, scenes after the date build - # the validation set, the test set is empty - # used if split_mode='date' - 'date': '', - 'dateformat': '%Y%m%d', - - # whether to drop samples (during training only) with a fraction of - # pixels equal to the constant padding value cval >= drop - # drop=1 means, do not use a sample if all pixels = cval - # drop=0.8 means, do not use a sample if 80% or more of the pixels are - # equal to cval - # drop=0.2 means, ... - # drop=0 means, do not drop any samples - 'drop': 0, - - } +} # the target dataset split configuration dictionary trg_split_config = { - # 'split_mode': 'date', - # 'split_mode': 'random', + # 'split_mode': 'tile', 'split_mode': 'scene', + 'k_folds': K_FOLDS, + 'seed': 0, + 'shuffle': True, 'ttratio': 1, 'tvratio': 0.8, - 'date': '', - 'dateformat': '%Y%m%d', - 'drop': 0, - } + +} # the model configuration dictionary model_config = { @@ -302,8 +294,8 @@ tlda_config = { # whether to apply any sort of transfer learning # if transfer=False, the model is only trained on the source dataset - 'transfer': True, - # 'transfer': False, + # 'transfer': True, + 'transfer': False, # Supervised vs. Unsupervised --------------------------------------------- # ------------------------------------------------------------------------- diff --git a/pysegcnn/main/train.py b/pysegcnn/main/train.py deleted file mode 100644 index fc0e91db95184dfcc48ee5f541852eba4a6f8cfd..0000000000000000000000000000000000000000 --- a/pysegcnn/main/train.py +++ /dev/null @@ -1,126 +0,0 @@ -"""Main script to train a model. - -Steps to launch a model run: - - 1. Configure the model run in :py:mod:`pysegcnn.main.config.py` - - configure the dataset(s): ``src_ds_config`` and ``trg_ds_config`` - - configure the split(s) : ``src_ds_config`` and ``trg_ds_config`` - - configure the model : ``model_config`` - 2. Save :py:mod:`pysegcnn.main.config.py` - 3. In a terminal, navigate to the repository's root directory - 4. Run - - .. code-block:: bash - - python pysegcnn/main/train.py - - -License -------- - - Copyright (c) 2020 Daniel Frisinghelli - - This source code is licensed under the GNU General Public License v3. - - See the LICENSE file in the repository's root directory. - -""" - -# !/usr/bin/env python -# -*- coding: utf-8 -*- - -# builtins -from logging.config import dictConfig - -# locals -from pysegcnn.core.trainer import (DatasetConfig, SplitConfig, ModelConfig, - TransferLearningConfig, StateConfig, - LogConfig, DomainAdaptationTrainer) -from pysegcnn.main.config import (src_ds_config, src_split_config, - trg_ds_config, trg_split_config, - model_config, tlda_config) -from pysegcnn.core.logging import log_conf - - -if __name__ == '__main__': - - # (i) instanciate the source domain configurations - src_dc = DatasetConfig(**src_ds_config) # source domain dataset - src_sc = SplitConfig(**src_split_config) # source domain dataset split - - # (ii) instanciate the target domain configuration - trg_dc = DatasetConfig(**trg_ds_config) # target domain dataset - trg_sc = SplitConfig(**trg_split_config) # target domain dataset split - - # (iii) instanciate the model configuration - net_mc = ModelConfig(**model_config) - - # (iv) instanciate the transfer learning configuration - trn_sf = TransferLearningConfig(**tlda_config) - - # (v) instanciate the model state file - net_sc = StateConfig(src_dc, src_sc, trg_dc, trg_sc, net_mc, trn_sf) - state_file = net_sc.init_state() - - # (vi) instanciate logging configuration - net_lc = LogConfig(state_file) - dictConfig(log_conf(net_lc.log_file)) - - # (vii) instanciate the datasets to train the model on - src_ds = src_dc.init_dataset() - trg_ds = trg_dc.init_dataset() - - # (viii) instanciate the training, validation and test datasets and - # dataloaders for the source domain - src_tra_ds, src_val_ds, src_tes_ds = src_sc.train_val_test_split(src_ds) - src_tra_dl, src_val_dl, src_tes_dl = src_sc.dataloaders( - src_tra_ds, src_val_ds, src_tes_ds, batch_size=net_mc.batch_size, - shuffle=True, drop_last=False) - - # (ix) instanciate the training, validation and test datasets and - # dataloaders dor the target domain - trg_tra_ds, trg_val_ds, trg_tes_ds = trg_sc.train_val_test_split(trg_ds) - trg_tra_dl, trg_val_dl, trg_tes_dl = trg_sc.dataloaders( - trg_tra_ds, trg_val_ds, trg_tes_ds, batch_size=net_mc.batch_size, - shuffle=True, drop_last=False) - - # (x) instanciate the model - if trn_sf.transfer and (trn_sf.supervised or trn_sf.uda_from_pretrained): - # check whether to load a pretrained model for (un)supervised transfer - # learning - net, optimizer, checkpoint = trn_sf.transfer_model( - trn_sf.pretrained_path, - nclasses=len(src_ds).labels, - optim_kwargs=net_mc.optim_kwargs, - freeze=trn_sf.freeze) - else: - # initialize model from scratch or from an existing model checkpoint - net, optimizer, checkpoint = net_mc.init_model( - len(src_ds.use_bands), len(src_ds.labels), state_file) - - # (xi) instanciate the network trainer class - trainer = DomainAdaptationTrainer( - model=net, - optimizer=optimizer, - state_file=net.state_file, - src_train_dl=src_tra_dl, - src_valid_dl=src_val_dl, - src_test_dl=src_tes_dl, - epochs=net_mc.epochs, - nthreads=net_mc.nthreads, - early_stop=net_mc.early_stop, - mode=net_mc.mode, - delta=net_mc.delta, - patience=net_mc.patience, - checkpoint_state=checkpoint, - save=net_mc.save, - supervised=trn_sf.supervised, - trg_train_dl=trg_tra_dl, - trg_valid_dl=trg_val_dl, - trg_test_dl=trg_tes_dl, - uda_loss_function=trn_sf.uda_loss_function, - uda_lambda=trn_sf.uda_lambda, - uda_pos=trn_sf.uda_pos) - - # (xi) train the model - # training_state = trainer.train()