From eac2851ee358511e50a48b964ebd348181561b87 Mon Sep 17 00:00:00 2001
From: "Daniel.Frisinghelli" <daniel.frisinghelli@eurac.edu>
Date: Mon, 17 Aug 2020 17:23:08 +0200
Subject: [PATCH] Removed old version

---
 pysegcnn/core/trainer_old.py | 586 -----------------------------------
 1 file changed, 586 deletions(-)
 delete mode 100644 pysegcnn/core/trainer_old.py

diff --git a/pysegcnn/core/trainer_old.py b/pysegcnn/core/trainer_old.py
deleted file mode 100644
index fa8715e..0000000
--- a/pysegcnn/core/trainer_old.py
+++ /dev/null
@@ -1,586 +0,0 @@
-# !/usr/bin/env python
-# -*- coding: utf-8 -*-
-"""
-Created on Fri Jun 26 16:31:36 2020
-
-@author: Daniel
-"""
-# builtins
-import os
-
-# externals
-import numpy as np
-import torch
-import torch.nn.functional as F
-from torch.utils.data import DataLoader
-
-# locals
-from pysegcnn.core.dataset import SupportedDatasets
-from pysegcnn.core.layers import Conv2dSame
-from pysegcnn.core.utils import img2np, accuracy_function
-from pysegcnn.core.split import (RandomTileSplit, RandomSceneSplit, DateSplit,
-                                 VALID_SPLIT_MODES)
-
-
-class NetworkTrainer(object):
-
-    def __init__(self, config):
-
-        # the configuration file as defined in pysegcnn.main.config.py
-        for k, v in config.items():
-            setattr(self, k, v)
-
-        # whether to use the gpu
-        self.device = torch.device("cuda:0" if torch.cuda.is_available() else
-                                   "cpu")
-
-        # initialize the dataset to train the model on
-        self._init_dataset()
-
-        # initialize the model state files
-        self._init_state()
-
-        # initialize the model
-        self._init_model()
-
-    def from_pretrained(self):
-
-        # load the pretrained model
-        model_state = os.path.join(self.state_path, self.pretrained_model)
-        if not os.path.exists(model_state):
-            raise FileNotFoundError('Pretrained model {} does not exist.'
-                                    .format(model_state))
-
-        # load the model state
-        model_state = torch.load(model_state)
-
-        # get the input bands of the pretrained model
-        bands = model_state['bands']
-
-        # get the number of convolutional filters
-        filters = model_state['params']['filters']
-
-        # check whether the current dataset uses the correct spectral bands
-        if self.bands != bands:
-            raise ValueError('The bands of the pretrained network do not '
-                             'match the specified bands: {}'
-                             .format(self.bands))
-
-        # instanciate pretrained model architecture
-        model = self.model(**model_state['params'], **model_state['kwargs'])
-
-        # load pretrained model weights
-        model.load(self.pretrained_model, inpath=self.state_path)
-
-        # reset model epoch to 0, since the model is trained on a different
-        # dataset
-        model.epoch = 0
-
-        # adjust the number of classes in the model
-        model.nclasses = len(self.dataset.labels)
-
-        # adjust the classification layer to the number of classes of the
-        # current dataset
-        model.classifier = Conv2dSame(in_channels=filters[0],
-                                      out_channels=model.nclasses,
-                                      kernel_size=1)
-
-
-        return model
-
-    def from_checkpoint(self):
-
-        # whether to resume training from an existing model
-        if not os.path.exists(self.state):
-            raise FileNotFoundError('Model checkpoint {} does not exist.'
-                                    .format(self.state))
-
-        # load the model state
-        state = self.model.load(self.state_file, self.optimizer,
-                                self.state_path)
-        print('Resuming training from {} ...'.format(state))
-        print('Model epoch: {:d}'.format(self.model.epoch))
-
-        # load the model loss and accuracy
-        checkpoint_state = torch.load(self.loss_state)
-
-        # get all non-zero elements, i.e. get number of epochs trained
-        # before the early stop
-        checkpoint_state = {k: v[np.nonzero(v)].reshape(v.shape[0], -1) for
-                            k, v in checkpoint_state.items()}
-
-        # maximum accuracy on the validation set
-        max_accuracy = checkpoint_state['va'][:, -1].mean().item()
-
-        return checkpoint_state, max_accuracy
-
-    def train(self):
-
-        print('------------------------- Training ---------------------------')
-
-        # set the number of threads
-        torch.set_num_threads(self.nthreads)
-
-        # instanciate early stopping class
-        if self.early_stop:
-            es = EarlyStopping(self.mode, self.delta, self.patience)
-            print('Initializing early stopping ...')
-            print('mode = {}, delta = {}, patience = {} epochs ...'
-                  .format(self.mode, self.delta, self.patience))
-
-        # create dictionary of the observed losses and accuracies on the
-        # training and validation dataset
-        tshape = (len(self.train_dl), self.epochs)
-        vshape = (len(self.valid_dl), self.epochs)
-        training_state = {'tl': np.zeros(shape=tshape),
-                          'ta': np.zeros(shape=tshape),
-                          'vl': np.zeros(shape=vshape),
-                          'va': np.zeros(shape=vshape)
-                          }
-
-        # send the model to the gpu if available
-        self.model = self.model.to(self.device)
-
-        # initialize the training: iterate over the entire training data set
-        for epoch in range(self.epochs):
-
-            # set the model to training mode
-            print('Setting model to training mode ...')
-            self.model.train()
-
-            # iterate over the dataloader object
-            for batch, (inputs, labels) in enumerate(self.train_dl):
-
-                # send the data to the gpu if available
-                inputs = inputs.to(self.device)
-                labels = labels.to(self.device)
-
-                # reset the gradients
-                self.optimizer.zero_grad()
-
-                # perform forward pass
-                outputs = self.model(inputs)
-
-                # compute loss
-                loss = self.loss_function(outputs, labels.long())
-                observed_loss = loss.detach().numpy().item()
-                training_state['tl'][batch, epoch] = observed_loss
-
-                # compute the gradients of the loss function w.r.t.
-                # the network weights
-                loss.backward()
-
-                # update the weights
-                self.optimizer.step()
-
-                # calculate predicted class labels
-                ypred = F.softmax(outputs, dim=1).argmax(dim=1)
-
-                # calculate accuracy on current batch
-                observed_accuracy = accuracy_function(ypred, labels)
-                training_state['ta'][batch, epoch] = observed_accuracy
-
-                # print progress
-                print('Epoch: {:d}/{:d}, Mini-batch: {:d}/{:d}, Loss: {:.2f}, '
-                      'Accuracy: {:.2f}'.format(epoch + 1,
-                                                self.epochs,
-                                                batch + 1,
-                                                len(self.train_dl),
-                                                observed_loss,
-                                                observed_accuracy))
-
-            # update the number of epochs trained
-            self.model.epoch += 1
-
-            # whether to evaluate model performance on the validation set and
-            # early stop the training process
-            if self.early_stop:
-
-                # model predictions on the validation set
-                vacc, vloss = self.predict()
-
-                # append observed accuracy and loss to arrays
-                training_state['va'][:, epoch] = vacc.squeeze()
-                training_state['vl'][:, epoch] = vloss.squeeze()
-
-                # metric to assess model performance on the validation set
-                epoch_acc = vacc.squeeze().mean()
-
-                # whether the model improved with respect to the previous epoch
-                if es.increased(epoch_acc, self.max_accuracy, self.delta):
-                    self.max_accuracy = epoch_acc
-                    # save model state if the model improved with
-                    # respect to the previous epoch
-                    _ = self.model.save(self.state_file,
-                                        self.optimizer,
-                                        self.bands,
-                                        self.state_path)
-
-                    # save losses and accuracy
-                    self._save_loss(training_state,
-                                    self.checkpoint,
-                                    self.checkpoint_state)
-
-                # whether the early stopping criterion is met
-                if es.stop(epoch_acc):
-                    break
-
-            else:
-                # if no early stopping is required, the model state is saved
-                # after each epoch
-                _ = self.model.save(self.state_file,
-                                    self.optimizer,
-                                    self.bands,
-                                    self.state_path)
-
-                # save losses and accuracy after each epoch
-                self._save_loss(training_state,
-                                self.checkpoint,
-                                self.checkpoint_state)
-
-        return training_state
-
-    def predict(self):
-
-        print('------------------------ Predicting --------------------------')
-
-        # send the model to the gpu if available
-        self.model = self.model.to(self.device)
-
-        # set the model to evaluation mode
-        print('Setting model to evaluation mode ...')
-        self.model.eval()
-
-        # create arrays of the observed losses and accuracies
-        accuracies = np.zeros(shape=(len(self.valid_dl), 1))
-        losses = np.zeros(shape=(len(self.valid_dl), 1))
-
-        # iterate over the validation/test set
-        print('Calculating accuracy on the validation set ...')
-        for batch, (inputs, labels) in enumerate(self.valid_dl):
-
-            # send the data to the gpu if available
-            inputs = inputs.to(self.device)
-            labels = labels.to(self.device)
-
-            # calculate network outputs
-            with torch.no_grad():
-                outputs = self.model(inputs)
-
-            # compute loss
-            loss = self.loss_function(outputs, labels.long())
-            losses[batch, 0] = loss.detach().numpy().item()
-
-            # calculate predicted class labels
-            pred = F.softmax(outputs, dim=1).argmax(dim=1)
-
-            # calculate accuracy on current batch
-            acc = accuracy_function(pred, labels)
-            accuracies[batch, 0] = acc
-
-            # print progress
-            print('Mini-batch: {:d}/{:d}, Accuracy: {:.2f}'
-                  .format(batch + 1, len(self.valid_dl), acc))
-
-        # calculate overall accuracy on the validation/test set
-        print('After training for {:d} epochs, we achieved an overall '
-              'accuracy of {:.2f}%  on the validation set!'
-              .format(self.model.epoch, accuracies.mean() * 100))
-
-        return accuracies, losses
-
-    def _init_state(self):
-
-        # file to save model state to
-        # format: network_dataset_seed_tilesize_batchsize_bands.pt
-
-        # get the band numbers
-        bformat = ''.join(band[0] +
-                          str(self.dataset.sensor.__members__[band].value) for
-                          band in self.bands)
-
-        # model state filename
-        self.state_file = ('{}_{}_s{}_t{}_b{}_{}.pt'
-                           .format(self.model.__name__,
-                                   self.dataset.__class__.__name__,
-                                   self.seed,
-                                   self.tile_size,
-                                   self.batch_size,
-                                   bformat))
-
-        # check whether a pretrained model was used and change state filename
-        # accordingly
-        if self.pretrained:
-            # add the configuration of the pretrained model to the state name
-            self.state_file = (self.state_file.replace('.pt', '_') +
-                               'pretrained_' + self.pretrained_model)
-
-        # path to model state
-        self.state = os.path.join(self.state_path, self.state_file)
-
-        # path to model loss/accuracy
-        self.loss_state = self.state.replace('.pt', '_loss.pt')
-
-    def _init_dataset(self):
-
-        # the dataset name
-        self.dataset_name = os.path.basename(self.root_dir)
-
-        # check whether the dataset is currently supported
-        if self.dataset_name not in SupportedDatasets.__members__:
-            raise ValueError('{} is not a valid dataset. '
-                             .format(self.dataset_name) +
-                             'Available datasets are: \n' +
-                             '\n'.join(name for name, _ in
-                                       SupportedDatasets.__members__.items()))
-        else:
-            self.dataset_class = SupportedDatasets.__members__[
-                self.dataset_name].value
-
-        # instanciate the dataset
-        self.dataset = self.dataset_class(
-                    self.root_dir,
-                    use_bands=self.bands,
-                    tile_size=self.tile_size,
-                    sort=self.sort,
-                    transforms=self.transforms,
-                    pad=self.pad,
-                    cval=self.cval,
-                    gt_pattern=self.gt_pattern
-                    )
-
-        # the mode to split
-        if self.split_mode not in VALID_SPLIT_MODES:
-            raise ValueError('{} is not supported. Valid modes are {}, see '
-                             'pysegcnn.main.config.py for a description of '
-                             'each mode.'.format(self.split_mode,
-                                                 VALID_SPLIT_MODES))
-        if self.split_mode == 'random':
-            self.subset = RandomTileSplit(self.dataset,
-                                          self.ttratio,
-                                          self.tvratio,
-                                          self.seed)
-        if self.split_mode == 'scene':
-            self.subset = RandomSceneSplit(self.dataset,
-                                           self.ttratio,
-                                           self.tvratio,
-                                           self.seed)
-        if self.split_mode == 'date':
-            self.subset = DateSplit(self.dataset,
-                                    self.date,
-                                    self.dateformat)
-
-        # the training, validation and test dataset
-        self.train_ds, self.valid_ds, self.test_ds = self.subset.split()
-
-        # whether to drop training samples with a fraction of pixels equal to
-        # the constant padding value self.cval >= self.drop
-        if self.pad and self.drop:
-            self._drop(self.train_ds)
-
-        # the shape of a single batch
-        self.batch_shape = (len(self.bands), self.tile_size, self.tile_size)
-
-        # the training dataloader
-        self.train_dl = None
-        if len(self.train_ds) > 0:
-            self.train_dl = DataLoader(self.train_ds,
-                                       self.batch_size,
-                                       shuffle=True,
-                                       drop_last=False)
-        # the validation dataloader
-        self.valid_dl = None
-        if len(self.valid_ds) > 0:
-            self.valid_dl = DataLoader(self.valid_ds,
-                                       self.batch_size,
-                                       shuffle=True,
-                                       drop_last=False)
-
-        # the test dataloader
-        self.test_dl = None
-        if len(self.test_ds) > 0:
-            self.test_dl = DataLoader(self.test_ds,
-                                      self.batch_size,
-                                      shuffle=True,
-                                      drop_last=False)
-
-    def _init_model(self):
-
-        # initial accuracy on the validation set
-        self.max_accuracy = 0
-
-        # set the model checkpoint to None, overwritten when resuming
-        # training from an existing model checkpoint
-        self.checkpoint_state = None
-
-        # case (1): build a model for the specified dataset
-        if not self.pretrained and not self.checkpoint:
-
-            # instanciate the model
-            self.model = self.model(in_channels=len(self.dataset.use_bands),
-                                    nclasses=len(self.dataset.labels),
-                                    filters=self.filters,
-                                    skip=self.skip_connection,
-                                    **self.kwargs)
-
-            # the optimizer used to update the model weights
-            self.optimizer = self.optimizer(self.model.parameters(), self.lr)
-
-        # case (2): using a pretrained model withouth existing checkpoint on
-        #           a new dataset, i.e. transfer learning
-        if self.pretrained and not self.checkpoint:
-            # load pretrained model
-            self.model = self.from_pretrained()
-
-            # the optimizer used to update the model weights
-            self.optimizer = self.optimizer(self.model.parameters(), self.lr)
-
-        # case (3): using a pretrained model with existing checkpoint on the
-        #           same dataset the pretrained model was trained on
-        elif self.checkpoint:
-
-            # instanciate the model
-            self.model = self.model(in_channels=len(self.dataset.use_bands),
-                                    nclasses=len(self.dataset.labels),
-                                    filters=self.filters,
-                                    skip=self.skip_connection,
-                                    **self.kwargs)
-
-            # the optimizer used to update the model weights
-            self.optimizer = self.optimizer(self.model.parameters(), self.lr)
-
-            # whether to resume training from an existing model checkpoint
-            if self.checkpoint:
-                (self.checkpoint_state,
-                 self.max_accuracy) = self.from_checkpoint()
-
-    # function to drop samples with a fraction of pixels equal to the constant
-    # padding value self.cval >= self.drop
-    def _drop(self, ds):
-
-        # iterate over the scenes returned by self.compose_scenes()
-        self.dropped = []
-        for pos, i in enumerate(ds.indices):
-
-            # the current scene
-            s = ds.dataset.scenes[i]
-
-            # the current tile in the ground truth
-            tile_gt = img2np(s['gt'], self.tile_size, s['tile'],
-                             self.pad, self.cval)
-
-            # percent of pixels equal to the constant padding value
-            npixels = (tile_gt[tile_gt == self.cval].size / tile_gt.size)
-
-            # drop samples where npixels >= self.drop
-            if npixels >= self.drop:
-                print('Skipping scene {}, tile {}: {:.2f}% padded pixels ...'
-                      .format(s['id'], s['tile'], npixels * 100))
-                self.dropped.append(s)
-                _ = ds.indices.pop(pos)
-
-    def _save_loss(self, training_state, checkpoint=False,
-                   checkpoint_state=None):
-
-        # save losses and accuracy
-        if checkpoint and checkpoint_state is not None:
-
-            # append values from checkpoint to current training
-            # state
-            torch.save({
-                k1: np.hstack([v1, v2]) for (k1, v1), (k2, v2) in
-                zip(checkpoint_state.items(), training_state.items())
-                if k1 == k2},
-                self.loss_state)
-        else:
-            torch.save(training_state, self.loss_state)
-
-    def __repr__(self):
-
-        # representation string to print
-        fs = self.__class__.__name__ + '(\n'
-
-        # dataset
-        fs += '    (dataset):\n        '
-        fs += ''.join(self.dataset.__repr__()).replace('\n', '\n        ')
-
-        # batch size
-        fs += '\n    (batch):\n        '
-        fs += '- batch size: {}\n        '.format(self.batch_size)
-        fs += '- batch shape (b, h, w): {}'.format(self.batch_shape)
-
-        # dataset split
-        fs += '\n    (split):\n        '
-        fs += ''.join(self.subset.__repr__()).replace('\n', '\n        ')
-
-        # model
-        fs += '\n    (model):\n        '
-        fs += ''.join(self.model.__repr__()).replace('\n', '\n        ')
-
-        # optimizer
-        fs += '\n    (optimizer):\n        '
-        fs += ''.join(self.optimizer.__repr__()).replace('\n', '\n        ')
-        fs += '\n)'
-
-        return fs
-
-
-class EarlyStopping(object):
-
-    def __init__(self, mode='max', min_delta=0, patience=10):
-
-        # check if mode is correctly specified
-        if mode not in ['min', 'max']:
-            raise ValueError('Mode "{}" not supported. '
-                             'Mode is either "min" (check whether the metric '
-                             'decreased, e.g. loss) or "max" (check whether '
-                             'the metric increased, e.g. accuracy).'
-                             .format(mode))
-
-        # mode to determine if metric improved
-        self.mode = mode
-
-        # whether to check for an increase or a decrease in a given metric
-        self.is_better = self.decreased if mode == 'min' else self.increased
-
-        # minimum change in metric to be classified as an improvement
-        self.min_delta = min_delta
-
-        # number of epochs to wait for improvement
-        self.patience = patience
-
-        # initialize best metric
-        self.best = None
-
-        # initialize early stopping flag
-        self.early_stop = False
-
-    def stop(self, metric):
-
-        if self.best is not None:
-
-            # if the metric improved, reset the epochs counter, else, advance
-            if self.is_better(metric, self.best, self.min_delta):
-                self.counter = 0
-                self.best = metric
-            else:
-                self.counter += 1
-                print('Early stopping counter: {}/{}'.format(self.counter,
-                                                             self.patience))
-
-            # if the metric did not improve over the last patience epochs,
-            # the early stopping criterion is met
-            if self.counter >= self.patience:
-                print('Early stopping criterion met, exiting training ...')
-                self.early_stop = True
-
-        else:
-            self.best = metric
-
-        return self.early_stop
-
-    def decreased(self, metric, best, min_delta):
-        return metric < best - min_delta
-
-    def increased(self, metric, best, min_delta):
-        return metric > best + min_delta
-- 
GitLab