From e6d43c8a5f7d5aa71775f14e60e6fc657f91764c Mon Sep 17 00:00:00 2001 From: "Daniel.Frisinghelli" <daniel.frisinghelli@eurac.edu> Date: Fri, 28 Aug 2020 17:19:58 +0200 Subject: [PATCH] Improving documentation. --- pysegcnn/core/split.py | 414 +++++++++++++++--------------- pysegcnn/core/trainer.py | 536 ++++++++++++++++++--------------------- 2 files changed, 460 insertions(+), 490 deletions(-) diff --git a/pysegcnn/core/split.py b/pysegcnn/core/split.py index db63f78..8df62d4 100644 --- a/pysegcnn/core/split.py +++ b/pysegcnn/core/split.py @@ -31,15 +31,15 @@ def _ds_len(ds, ratio): Parameters ---------- - ds : `collections.Sized` - An object with a __len__ attribute. + ds : :py:class:`collections.Sized` + An object with a :py:meth:__len__ method. ratio : `float` A ratio to multiply with the length of ``ds``. Returns ------- n_samples: `int` - Length of ``ds`` * ``ratio``. + Length of ``ds * ratio``. """ return int(np.round(len(ds) * ratio)) @@ -54,23 +54,23 @@ def random_tile_split(ds, tvratio, ttratio=1, seed=0): The parameters ``ttratio`` and ``tvratio`` control the size of the training, validation and test datasets. - Test dataset size : (1 - ``ttratio``) * len(``ds``) - Train dataset size : ``ttratio`` * ``tvratio`` * len(``ds``) - Validation dataset size: ``ttratio`` * (1 - ``tvratio``) * len(``ds``) + Test dataset size : ``(1 - ttratio) * len(ds)`` + Train dataset size : ``ttratio * tvratio * len(ds)`` + Validation dataset size: ``ttratio * (1 - tvratio) * len(ds)`` Parameters ---------- - ds : `pysegcnn.core.dataset.ImageDataset` - An instance of `~pysegcnn.core.dataset.ImageDataset`. + ds : :py:class:`pysegcnn.core.dataset.ImageDataset` + An instance of :py:class:`pysegcnn.core.dataset.ImageDataset`. tvratio : `float` - The ratio of training data to validation data, e.g. ``tvratio`` = 0.8 + The ratio of training data to validation data, e.g. ``tvratio=0.8`` means 80% training, 20% validation. ttratio : `float`, optional The ratio of training and validation data to test data, e.g. - ``ttratio`` = 0.6 means 60% for training and validation, 40% for - testing. The default is 1. + ``ttratio=0.6`` means 60% for training and validation, 40% for + testing. The default is `1`. seed : `int`, optional - The random seed for reproducibility. The default is 0. + The random seed for reproducibility. The default is `0`. Raises ------ @@ -82,11 +82,11 @@ def random_tile_split(ds, tvratio, ttratio=1, seed=0): subsets : `dict` Subset dictionary with keys: ``'train'`` - dictionary containing the training scenes. + The training scenes (`dict`). ``'valid'`` - dictionary containing the validation scenes. + The validation scenes (`dict`). ``'test'`` - dictionary containing the test scenes. + The test scenes (`dict`). """ # set the random seed for reproducibility @@ -128,7 +128,7 @@ def random_tile_split(ds, tvratio, ttratio=1, seed=0): def random_scene_split(ds, tvratio, ttratio=1, seed=0): - """Randomly split the tiles of a dataset. + """Semi-randomly split the tiles of a dataset. For each scene, all the tiles of the scene are included in either the training, validation or test set, respectively. @@ -136,23 +136,23 @@ def random_scene_split(ds, tvratio, ttratio=1, seed=0): The parameters ``ttratio`` and ``tvratio`` control the size of the training, validation and test datasets. - Test dataset size : (1 - ``ttratio``) * len(``ds``) - Train dataset size : ``ttratio`` * ``tvratio`` * len(``ds``) - Validation dataset size: ``ttratio`` * (1 - ``tvratio``) * len(``ds``) + Test dataset size : ``(1 - ttratio) * len(ds)`` + Train dataset size : ``ttratio * tvratio * len(ds)`` + Validation dataset size: ``ttratio * (1 - tvratio) * len(ds)`` Parameters ---------- - ds : `pysegcnn.core.dataset.ImageDataset` - An instance of `~pysegcnn.core.dataset.ImageDataset`. + ds : :py:class:`pysegcnn.core.dataset.ImageDataset` + An instance of :py:class:`pysegcnn.core.dataset.ImageDataset`. tvratio : `float` - The ratio of training data to validation data, e.g. ``tvratio`` = 0.8 + The ratio of training data to validation data, e.g. ``tvratio=0.8`` means 80% training, 20% validation. ttratio : `float`, optional The ratio of training and validation data to test data, e.g. - ``ttratio`` = 0.6 means 60% for training and validation, 40% for - testing. The default is 1. + ``ttratio=0.6`` means 60% for training and validation, 40% for + testing. The default is `1`. seed : `int`, optional - The random seed for reproducibility. The default is 0. + The random seed for reproducibility. The default is `0`. Raises ------ @@ -164,11 +164,11 @@ def random_scene_split(ds, tvratio, ttratio=1, seed=0): subsets : `dict` Subset dictionary with keys: ``'train'`` - dictionary containing the training scenes. + The training scenes (`dict`). ``'valid'`` - dictionary containing the validation scenes. + The validation scenes (`dict`). ``'test'`` - dictionary containing the test scenes. + The test scenes (`dict`). """ # set the random seed for reproducibility @@ -210,27 +210,24 @@ def random_scene_split(ds, tvratio, ttratio=1, seed=0): return subsets -# split the scenes of a dataset based on a date, useful for time series data -# scenes before date build the training set, scenes after the date build the -# validation set, the test set is empty def date_scene_split(ds, date, dateformat='%Y%m%d'): """Split the dataset based on a date. - Scenes before ``date`` build the training set, scenes after ``date`` build - the validation set, the test set is empty. + Scenes before ``date`` build the training dataset, scenes after ``date`` + the validation dataset. The test set is empty. Useful for time series data. Parameters ---------- - ds : `pysegcnn.core.dataset.ImageDataset` - An instance of `~pysegcnn.core.dataset.ImageDataset`. + ds : :py:class:`pysegcnn.core.dataset.ImageDataset` + An instance of :py:class:`pysegcnn.core.dataset.ImageDataset`. date : `str` - A date. + A date in the format ``dateformat``. dateformat : `str`, optional The format of ``date``. ``dateformat`` is used by - `datetime.datetime.strptime' to parse ``date`` to a `datetime.datetime` - object. The default is '%Y%m%d'. + :py:func:`datetime.datetime.strptime' to parse ``date`` to a + :py:class:`datetime.datetime` object. The default is `'%Y%m%d'`. Raises ------ @@ -242,11 +239,11 @@ def date_scene_split(ds, date, dateformat='%Y%m%d'): subsets : `dict` Subset dictionary with keys: ``'train'`` - dictionary containing the training scenes. + The training scenes (`dict`). ``'valid'`` - dictionary containing the validation scenes. + The validation scenes (`dict`). ``'test'`` - dictionary containing the test scenes, empty. + The test scenes (`dict`). """ # convert date to datetime object @@ -279,7 +276,7 @@ def pairwise_disjoint(sets): Parameters ---------- - sets : `list` [`collections.Sized`] + sets : `list` [:py:class:`collections.Sized`] A list of sized objects. Returns @@ -294,41 +291,50 @@ def pairwise_disjoint(sets): class CustomSubset(Subset): - """Custom subset inheriting `torch.utils.data.Subset`.""" + """Generic custom subset inheriting :py:class:`torch.utils.data.Subset`. - def __repr__(self): - """Representation of ``~pysegcnn.core.split.CustomSubset``.""" - # representation string - fs = '- {}: {:d} tiles ({:.2f}%)'.format( - self.name, len(self.scenes), 100 * len(self.scenes) / - len(self.dataset)) - - return fs + .. important:: + The training, validation and test datasets should be subclasses of + :py:class:`pysegcnn.core.split.CustomSubset`. -class SceneSubset(CustomSubset): - """A custom subset for dataset splits where the scenes are preserved. + See :py:class:`pysegcnn.core.split.RandomTileSplit` for an example + implementing the :py:class:`pysegcnn.core.split.RandomSubset` subset + class. - Parameters + Attributes ---------- - ds : `pysegcnn.core.dataset.ImageDataset` - An instance of `~pysegcnn.core.dataset.ImageDataset`. + dataset : :py:class:`pysegcnn.core.dataset.ImageDataset` + The dataset to split into subsets. indices : `list` [`int`] - List of the subset indices to access ``ds``. + List of indices to access the dataset. name : `str` Name of the subset. scenes : `list` [`dict`] List of the subset tiles. - scene_ids : `list` or `numpy.ndarray` - Container of the scene ids. - - Returns - ------- - None. + ids : `list` or :py:class:`numpy.ndarray` + Container of the scene identifiers. """ def __init__(self, ds, indices, name, scenes, scene_ids): + """Initialize. + + Parameters + ---------- + ds : :py:class:`pysegcnn.core.dataset.ImageDataset` + An instance of :py:class:`pysegcnn.core.dataset.ImageDataset`. + indices : `list` [`int`] + List of indices to access ``ds``. ``indices`` must be pairwise + disjoint for each subset derived from the same dataset ``ds``. + name : `str` + Name of the subset. + scenes : `list` [`dict`] + List of the subset tiles. + scene_ids : `list` or :py:class:`numpy.ndarray` + Container of the scene identifiers. + + """ super().__init__(dataset=ds, indices=indices) # the name of the subset @@ -340,68 +346,76 @@ class SceneSubset(CustomSubset): # the names of the scenes self.ids = scene_ids + def __repr__(self): + """Representation string. -class RandomSubset(CustomSubset): - """A custom subset for random dataset splits. + Returns + ------- + fs : `str` + The representation string. - Parameters - ---------- - ds : `pysegcnn.core.dataset.ImageDataset` - An instance of `~pysegcnn.core.dataset.ImageDataset`. - indices : `list` [`int`] - List of the subset indices to access ``ds``. - name : `str` - Name of the subset. - scenes : `list` [`dict`] - List of the subset tiles. - scene_ids : `list` or `numpy.ndarray` - Container of the scene ids. + """ + fs = '- {}: {:d} tiles ({:.2f}%)'.format( + self.name, len(self.scenes), 100 * len(self.scenes) / + len(self.dataset)) - Returns - ------- - None. + return fs - """ + +class SceneSubset(CustomSubset): + """A custom subset for dataset splits where the scenes are preserved.""" def __init__(self, ds, indices, name, scenes, scene_ids): - super().__init__(dataset=ds, indices=indices) + super().__init__(ds, indices, name, scenes, scene_ids) - # the name of the subset - self.name = name - # the scene in the subset - self.scenes = scenes +class RandomSubset(CustomSubset): + """A custom subset for random dataset splits.""" + + def __init__(self, ds, indices, name, scenes, scene_ids): + super().__init__(ds, indices, name, scenes, scene_ids) class Split(object): """Generic class handling how ``ds`` is split. - Inherit `~pysegcnn.core.split.Split` and implement the - `~pysegcnn.core.split.Split.subsets` and - `~pysegcnn.core.split.Split.subset_type` method. + Each dataset should be split by a subclass of + :py:class:`pysegcnn.core.split.Split`, by calling the + :py:meth:`pysegcnn.core.split.Split.split` method. - Parameters - ---------- - ds : `pysegcnn.core.dataset.ImageDataset` - An instance of `~pysegcnn.core.dataset.ImageDataset`. + .. important:: - Returns - ------- - None. + The :py:meth:`~pysegcnn.core.split.Split.subsets` and + :py:meth:`~pysegcnn.core.split.Split.subset_type` methods have to be + implemented when inheriting :py:class:`pysegcnn.core.split.Split`. + + See :py:class:`pysegcnn.core.split.RandomTileSplit` for an example. + + Attributes + ---------- + ds : :py:class:`pysegcnn.core.dataset.ImageDataset` + The dataset to split into training, validation and test set. """ def __init__(self, ds): + """Initialize. + Parameters + ---------- + ds : :py:class:`pysegcnn.core.dataset.ImageDataset` + An instance of :py:class:`pysegcnn.core.dataset.ImageDataset`. + + """ # the dataset to split self.ds = ds def split(self): """Split dataset into training, validation and test set. - `~pysegcnn.core.split.Split.split` works only if - `~pysegcnn.core.split.Split.subsets` and - `~pysegcnn.core.split.Split.subset_type` are implemented. + :py:meth:`~pysegcnn.core.split.Split.split` works only if + :py:meth:`~pysegcnn.core.split.Split.subsets` and + :py:meth:`~pysegcnn.core.split.Split.subset_type` are implemented. """ # build the subsets @@ -422,14 +436,14 @@ class Split(object): """Define training, validation and test sets. Wrapper method for - `pysegcnn.core.split.Split.random_tile_split`, - `pysegcnn.core.split.Split.random_scene_split` or - `pysegcnn.core.split.Split.date_scene_split`. + :py:func:`pysegcnn.core.split.Split.random_tile_split`, + :py:func:`pysegcnn.core.split.Split.random_scene_split` or + :py:func:`pysegcnn.core.split.Split.date_scene_split`. Raises ------ NotImplementedError - Raised if `pysegcnn.core.split.Split` is not inherited. + Raised if :py:class:`pysegcnn.core.split.Split` is not inherited. Returns ------- @@ -441,14 +455,13 @@ class Split(object): def subset_type(self): """Define the type of each subset. - Wrapper method for - `pysegcnn.core.split.RandomSubset` or - `pysegcnn.core.split.SceneSubset`. + Wrapper method for :py:class:`pysegcnn.core.split.RandomSubset` or + :py:class:`pysegcnn.core.split.SceneSubset`. Raises ------ NotImplementedError - Raised if `pysegcnn.core.split.Split` is not inherited. + Raised if :py:class:`pysegcnn.core.split.Split` is not inherited. Returns ------- @@ -459,28 +472,43 @@ class Split(object): class DateSplit(Split): - """Split the dataset based on a date. + """Split a dataset based on a date. - Class wrapper for `pysegcnn.core.split.Split.date_scene_split`. + .. important:: - Parameters - ---------- - ds : `pysegcnn.core.dataset.ImageDataset` - An instance of `~pysegcnn.core.dataset.ImageDataset`. - date : 'str' - A date. - dateformat : 'str', optional - The format of ``date``. ``dateformat`` is used by - `datetime.datetime.strptime' to parse ``date`` to a `datetime.datetime` - object. The default is '%Y%m%d'. + Scenes before ``date`` build the training dataset, scenes after + ``date`` the validation dataset. The test set is empty. - Returns - ------- - None. + Useful for time series data. + + Class wrapper for :py:func:`pysegcnn.core.split.date_scene_split`. + + Attributes + ---------- + ds : :py:class:`pysegcnn.core.dataset.ImageDataset` + The dataset to split into training, validation and test set. + date : `str` + The date used to split the dataset. + dateformat : `str` + The format of ``date``. """ def __init__(self, ds, date, dateformat): + """Initialize. + + Parameters + ---------- + ds : :py:class:`pysegcnn.core.dataset.ImageDataset` + An instance of :py:class:`pysegcnn.core.dataset.ImageDataset`. + date : `str` + A date in the format ``dateformat``. + dateformat : `str` + The format of ``date``. ``dateformat`` is used by + :py:func:`datetime.datetime.strptime' to parse ``date`` to a + :py:class:`datetime.datetime` object. + + """ super().__init__(ds) # the date to split the dataset @@ -492,28 +520,28 @@ class DateSplit(Split): self.dateformat = dateformat def subsets(self): - """Wrap `pysegcnn.core.split.Split.date_scene_split`. + """Wrap :py:func:`pysegcnn.core.split.Split.date_scene_split`. Returns ------- subsets : `dict` Subset dictionary with keys: ``'train'`` - dictionary containing the training scenes. + The training scenes (`dict`). ``'valid'`` - dictionary containing the validation scenes. + The validation scenes (`dict`). ``'test'`` - dictionary containing the test scenes, empty. + The test scenes, empty (`dict`). """ return date_scene_split(self.ds, self.date, self.dateformat) def subset_type(self): - """Wrap `pysegcnn.core.split.SceneSubset`. + """Wrap :py:class:`pysegcnn.core.split.SceneSubset`. Returns ------- - SceneSubset : `pysegcnn.core.split.SceneSubset` + SceneSubset : :py:class:`pysegcnn.core.split.SceneSubset` The subset type. """ @@ -521,31 +549,27 @@ class DateSplit(Split): class RandomSplit(Split): - """Randomly split the dataset. - - Generic class for random dataset splits. - - Parameters - ---------- - ds : `pysegcnn.core.dataset.ImageDataset` - An instance of `~pysegcnn.core.dataset.ImageDataset`. - tvratio : `float` - The ratio of training data to validation data, e.g. ``tvratio`` = 0.8 - means 80% training, 20% validation. - ttratio : `float`, optional - The ratio of training and validation data to test data, e.g. - ``ttratio`` = 0.6 means 60% for training and validation, 40% for - testing. The default is 1. - seed : `int`, optional - The random seed for reproducibility. The default is 0. - - Returns - ------- - None. - - """ + """Generic class for random dataset splits.""" def __init__(self, ds, ttratio, tvratio, seed): + """Initialize. + + Parameters + ---------- + ds : :py:class:`pysegcnn.core.dataset.ImageDataset` + An instance of :py:class:`pysegcnn.core.dataset.ImageDataset`. + tvratio : `float` + The ratio of training data to validation data, e.g. + ``tvratio=0.8`` means 80% training, 20% validation. + ttratio : `float` + The ratio of training and validation data to test data, e.g. + ``ttratio=0.6`` means 60% for training and validation, 40% for + testing. + seed : `int` + The random seed used to generate the split. Useful for + reproducibility. + + """ super().__init__(ds) # the training, validation and test set ratios @@ -559,28 +583,23 @@ class RandomSplit(Split): class RandomTileSplit(RandomSplit): """Randomly split the dataset. - For each scene, the tiles of the scene can be distributed among the - training, validation and test set. + .. important:: - Class wrapper for `pysegcnn.core.split.Split.random_tile_split`. + For each scene, the tiles of the scene can be distributed among the + training, validation and test set. - Parameters + Class wrapper for :py:func:`pysegcnn.core.split.random_tile_split`. + + Attributes ---------- - ds : `pysegcnn.core.dataset.ImageDataset` - An instance of `~pysegcnn.core.dataset.ImageDataset`. + ds : :py:class:`pysegcnn.core.dataset.ImageDataset` + The dataset to split into training, validation and test set. tvratio : `float` - The ratio of training data to validation data, e.g. ``tvratio`` = 0.8 - means 80% training, 20% validation. - ttratio : `float`, optional - The ratio of training and validation data to test data, e.g. - ``ttratio`` = 0.6 means 60% for training and validation, 40% for - testing. The default is 1. - seed : `int`, optional - The random seed for reproducibility. The default is 0. - - Returns - ------- - None. + The ratio of training data to validation data. + ttratio : `float` + The ratio of training and validation data to test data. + seed : `int` + The random seed used to generate the split. """ @@ -588,29 +607,29 @@ class RandomTileSplit(RandomSplit): super().__init__(ds, ttratio, tvratio, seed) def subsets(self): - """Wrap `pysegcnn.core.split.Split.random_tile_split`. + """Wrap :py:func:`pysegcnn.core.split.Split.random_tile_split`. Returns ------- subsets : `dict` Subset dictionary with keys: ``'train'`` - dictionary containing the training scenes. + The training scenes (`dict`). ``'valid'`` - dictionary containing the validation scenes. + The validation scenes (`dict`). ``'test'`` - dictionary containing the test scenes, empty. + The test scenes (`dict`). """ return random_tile_split(self.ds, self.tvratio, self.ttratio, self.seed) def subset_type(self): - """Wrap `pysegcnn.core.split.RandomSubset`. + """Wrap :py:class:`pysegcnn.core.split.RandomSubset`. Returns ------- - SceneSubset : `pysegcnn.core.split.RandomSubset` + SceneSubset : :py:class:`pysegcnn.core.split.RandomSubset` The subset type. """ @@ -618,30 +637,25 @@ class RandomTileSplit(RandomSplit): class RandomSceneSplit(RandomSplit): - """Randomly split the dataset. + """Semi-randomly split the dataset. - For each scene, all the tiles of the scene are included in either the - training, validation or test set, respectively. + .. important:: - Class wrapper for `pysegcnn.core.split.Split.random_scene_split`. + For each scene, all the tiles of the scene are included in either the + training, validation or test set, respectively. - Parameters + Class wrapper for :py:func:`pysegcnn.core.split.random_scene_split`. + + Attributes ---------- - ds : `pysegcnn.core.dataset.ImageDataset` - An instance of `~pysegcnn.core.dataset.ImageDataset`. + ds : :py:class:`pysegcnn.core.dataset.ImageDataset` + The dataset to split into training, validation and test set. tvratio : `float` - The ratio of training data to validation data, e.g. ``tvratio`` = 0.8 - means 80% training, 20% validation. - ttratio : `float`, optional - The ratio of training and validation data to test data, e.g. - ``ttratio`` = 0.6 means 60% for training and validation, 40% for - testing. The default is 1. - seed : `int`, optional - The random seed for reproducibility. The default is 0. - - Returns - ------- - None. + The ratio of training data to validation data. + ttratio : `float` + The ratio of training and validation data to test data. + seed : `int` + The random seed used to generate the split. """ @@ -649,29 +663,29 @@ class RandomSceneSplit(RandomSplit): super().__init__(ds, ttratio, tvratio, seed) def subsets(self): - """Wrap `pysegcnn.core.split.Split.random_scene_split`. + """Wrap :py:func:`pysegcnn.core.split.Split.random_scene_split`. Returns ------- subsets : `dict` Subset dictionary with keys: ``'train'`` - dictionary containing the training scenes. + The training scenes (`dict`). ``'valid'`` - dictionary containing the validation scenes. + The validation scenes (`dict`). ``'test'`` - dictionary containing the test scenes, empty. + The test scenes (`dict`). """ return random_scene_split(self.ds, self.tvratio, self.ttratio, self.seed) def subset_type(self): - """Wrap `pysegcnn.core.split.SceneSubset`. + """Wrap :py:class:`pysegcnn.core.split.SceneSubset`. Returns ------- - SceneSubset : `pysegcnn.core.split.SceneSubset` + SceneSubset : :py:class:`pysegcnn.core.split.SceneSubset` The subset type. """ diff --git a/pysegcnn/core/trainer.py b/pysegcnn/core/trainer.py index bc9714f..24f56fe 100644 --- a/pysegcnn/core/trainer.py +++ b/pysegcnn/core/trainer.py @@ -49,7 +49,7 @@ LOGGER = logging.getLogger(__name__) @dataclasses.dataclass class BaseConfig: - """Base `dataclasses.dataclass` for each configuration.""" + """Base :py:class:`dataclasses.dataclass` for each configuration.""" def __post_init__(self): """Check the type of each argument. @@ -60,10 +60,6 @@ class BaseConfig: Raised if the conversion to the specified type of the argument fails. - Returns - ------- - None. - """ # check input types for field in dataclasses.fields(self): @@ -88,44 +84,42 @@ class DatasetConfig(BaseConfig): Instanciate a dataset. - Parameters + Attributes ---------- dataset_name : `str` The name of the dataset. - root_dir : `pathlib.Path` + root_dir : `str` The root directory, path to the dataset. bands : `list` [`str`] A list of the spectral bands to use. - tile_size : `int` - The size of the tiles. Each scene is divided into square tiles of shape - (tile_size, tile_size). + tile_size : `int` or `None` + The size of the tiles. If not `None`, each scene is divided into + square tiles of shape ``(tile_size, tile_size)``. gt_pattern : `str` - A pattern to match the ground truth naming convention. All directories - and subdirectories in ``root_dir`` are searched for files matching - ``gt_pattern``. + A regural expression to match the ground truth naming convention. + All directories and subdirectories in ``root_dir`` are searched for + files matching ``gt_pattern``. seed : `int` - The random seed. Used to split the dataset into training, validation - and test set. Useful for reproducibility. The default is 0. + The random seed. Used to split the dataset into training, + validation and test set. Useful for reproducibility. sort : `bool`, optional Whether to chronologically sort the samples. Useful for time series - data. The default is False. - transforms : `list` [`pysegcnn.core.split.Augment`], optional - List of `pysegcnn.core.split.Augment` instances. Each item in - ``transforms`` generates a distinct transformed version of the dataset. - The total dataset is composed of the original untransformed dataset - together with each transformed version of it. - If ``transforms`` = [], only the original dataset is used. - The default is []. + data. The default is `False`. + transforms : `list`, optional + List of :py:class:`pysegcnn.core.transforms.Augment` instances. + Each item in ``transforms`` generates a distinct transformed + version of the dataset. The total dataset is composed of the + original untransformed dataset together with each transformed + version of it. If ``transforms=[]``, only the original dataset is + used. The default is `[]`. pad : `bool`, optional - Whether to center pad the input image. Set ``pad`` = True, if the - images are not evenly divisible by the ``tile_size``. The image data is - padded with a constant padding value of zero. For each image, the - corresponding ground truth image is padded with a "no data" label. - The default is False. - - Returns - ------- - None. + Whether to center pad the input image. Set ``pad=True``, if the + images are not evenly divisible by the ``tile_size``. The image + data is padded with a constant padding value of zero. For each + image, the corresponding ground truth image is padded with a + "no data" label. The default is `False`. + dataset_class : :py:class:`pysegcnn.core.dataset.ImageDataset` + A subclass of :py:class:`pysegcnn.core.dataset.ImageDataset`. """ @@ -150,11 +144,8 @@ class DatasetConfig(BaseConfig): Raised if ``root_dir`` does not exist. TypeError Raised if not each item in ``transforms`` is an instance of - `pysegcnn.core.split.Augment` in case ``transforms`` is not empty. - - Returns - ------- - None. + :py:class:`pysegcnn.core.split.Augment` in case ``transforms`` is + not empty. """ # check input types @@ -179,8 +170,8 @@ class DatasetConfig(BaseConfig): Returns ------- - dataset : `pysegcnn.core.dataset.ImageDataset` - An instance of `pysegcnn.core.dataset.ImageDataset`. + dataset : :py:class:`pysegcnn.core.dataset.ImageDataset` + An instance of :py:class:`pysegcnn.core.dataset.ImageDataset`. """ # instanciate the dataset @@ -204,31 +195,31 @@ class SplitConfig(BaseConfig): Split a dataset into training, validation and test set. - Parameters + Attributes ---------- split_mode : `str` The mode to split the dataset. ttratio : `float` The ratio of training and validation data to test data, e.g. - ``ttratio`` = 0.6 means 60% for training and validation, 40% for + ``ttratio= 0.6`` means 60% for training and validation, 40% for testing. tvratio : `float` - The ratio of training data to validation data, e.g. ``tvratio`` = 0.8 + The ratio of training data to validation data, e.g. ``tvratio=0.8`` means 80% training, 20% validation. date : `str`, optional - A date. Used if ``split_mode`` = 'date'. The default is 'yyyymmdd'. + A date. Used if ``split_mode='date'``. The default is `yyyymmdd`. dateformat : `str`, optional The format of ``date``. ``dateformat`` is used by - `datetime.datetime.strptime' to parse ``date`` to a `datetime.datetime` - object. The default is '%Y%m%d'. + :py:func:`datetime.datetime.strptime' to parse ``date`` to a + :py:class:`datetime.datetime` object. The default is `'%Y%m%d'`. drop : `float`, optional Whether to drop samples (during training only) with a fraction of - pixels equal to the constant padding value >= ``drop``. ``drop`` = 0 - means, do not drop any samples. The default is 0. - - Returns - ------- - None. + pixels equal to the constant padding value >= ``drop``. ``drop=0`` + means, do not drop any samples. The default is `0`. + split_class : :py:class:`pysegcnn.core.split.Split` + A subclass of :py:class:`pysegcnn.core.split.Split`. + dropped : `list` [`dict`] + List of the dropped samples. """ @@ -247,10 +238,6 @@ class SplitConfig(BaseConfig): ValueError Raised if ``split_mode`` is not supported. - Returns - ------- - None. - """ # check input types super().__post_init__() @@ -258,21 +245,22 @@ class SplitConfig(BaseConfig): # check if the split mode is valid self.split_class = item_in_enum(self.split_mode, SupportedSplits) + # list of dropped samples + self.dropped = [] + @staticmethod - def _drop_samples(ds, drop_threshold=1): + def drop_samples(ds, drop_threshold=1): """Drop samples with a fraction of pixels equal to the padding value. Parameters ---------- - ds : `pysegcnn.core.split.RandomSubset` or - `pysegcnn.core.split.SceneSubset`. - An instance of `pysegcnn.core.split.RandomSubset` or - `pysegcnn.core.split.SceneSubset`. + ds : :py:class:`pysegcnn.core.split.CustomSubset` + An instance of :py:class:`pysegcnn.core.split.CustomSubset`. drop_threshold : `float`, optional - The threshold above which samples are dropped. ``drop_threshold`` = - 1 means a sample is dropped, if all pixels are equal to the padding - value. ``drop_threshold`` = 0.8 means, drop a sample if 80% of the - pixels are equal to the padding value, etc. The default is 1. + The threshold above which samples are dropped. ``drop_threshold=1`` + means a sample is dropped, if all pixels are equal to the padding + value. ``drop_threshold=0.8`` means, drop a sample if 80% of the + pixels are equal to the padding value, etc. The default is `1`. Returns ------- @@ -308,25 +296,22 @@ class SplitConfig(BaseConfig): Parameters ---------- - ds : `pysegcnn.core.dataset.ImageDataset` - An instance of `pysegcnn.core.dataset.ImageDataset`. + ds : :py:class:`pysegcnn.core.dataset.ImageDataset` + An instance of :py:class:`pysegcnn.core.dataset.ImageDataset`. Raises ------ TypeError Raised if ``ds`` is not an instance of - `pysegcnn.core.dataset.ImageDataset`. + :py:class:`pysegcnn.core.dataset.ImageDataset`. Returns ------- - train_ds : `pysegcnn.core.split.RandomSubset` or - `pysegcnn.core.split.SceneSubset`. + train_ds : :py:class:`pysegcnn.core.split.CustomSubset`. The training set. - valid_ds : `pysegcnn.core.split.RandomSubset` or - `pysegcnn.core.split.SceneSubset`. + valid_ds : :py:class:`pysegcnn.core.split.CustomSubset`. The validation set. - test_ds : `pysegcnn.core.split.RandomSubset` or - `pysegcnn.core.split.SceneSubset`. + test_ds : :py:class:`pysegcnn.core.split.CustomSubset`. The test set. """ @@ -350,35 +335,35 @@ class SplitConfig(BaseConfig): # whether to drop training samples with a fraction of pixels equal to # the constant padding value cval >= drop if ds.pad and self.drop > 0: - self.dropped = self._drop_samples(train_ds, self.drop) + self.dropped = self.drop_samples(train_ds, self.drop) return train_ds, valid_ds, test_ds @staticmethod def dataloaders(*args, **kwargs): - """Build `torch.utils.data.DataLoader` instances. + """Build :py:class:`torch.utils.data.DataLoader` instances. Parameters ---------- - *args : `list` [`torch.utils.data.Dataset`] - List of instances of `torch.utils.data.Dataset`. + *args : `list` [:py:class:`torch.utils.data.Dataset`] + List of instances of :py:class:`torch.utils.data.Dataset`. **kwargs Additional keyword arguments passed to - `torch.utils.data.DataLoader`. + :py:class:`torch.utils.data.DataLoader`. Raises ------ TypeError Raised if not each item in ``args`` is an instance of - `torch.utils.data.Dataset`. + :py:class:`torch.utils.data.Dataset`. Returns ------- - loaders : `list` [`torch.utils.data.DataLoader`] - List of instances of `torch.utils.data.DataLoader`. If an instance - of `torch.utils.data.Dataset` in ``args`` is empty, `None` is - appended to ``loaders`` instead of an instance of - `torch.utils.data.DataLoader`. + loaders : `list` [:py:class:`torch.utils.data.DataLoader`] + List of instances of :py:class:`torch.utils.data.DataLoader`. If an + instance of :py:class:`torch.utils.data.Dataset` in ``args`` is + empty, `None` is appended to ``loaders`` instead of an instance of + :py:class:`torch.utils.data.DataLoader`. """ # check whether each dataset in args has the correct type @@ -405,7 +390,7 @@ class ModelConfig(BaseConfig): Instanciate a (pretrained) model. - Parameters + Attributes ---------- model_name : `str` The name of the model. @@ -419,58 +404,64 @@ class ModelConfig(BaseConfig): loss_name : `str` The name of the loss function measuring the model error. skip_connection : `bool`, optional - Whether to apply skip connections. The defaul is True. + Whether to apply skip connections. The default is `True`. kwargs: `dict`, optional The configuration for each convolution in the model. The default is - {'kernel_size': 3, 'stride': 1, 'dilation': 1}. + `{'kernel_size': 3, 'stride': 1, 'dilation': 1}`. batch_size : `int`, optional The model batch size. Determines the number of samples to process - before updating the model weights. The default is 64. + before updating the model weights. The default is `64`. checkpoint : `bool`, optional Whether to resume training from an existing model checkpoint. The - default is False. + default is `False`. transfer : `bool`, optional Whether to use a model for transfer learning on a new dataset. If True, the model architecture of ``pretrained_model`` is adjusted to a new - dataset. The default is False. + dataset. The default is `False`. pretrained_model : `str`, optional The name of the pretrained model to use for transfer learning. - The default is ''. + The default is `''`. lr : `float`, optional The learning rate used by the gradient descent algorithm. - The default is 0.001. + The default is `0.001`. early_stop : `bool`, optional - Whether to apply `early stopping`_. The default is False. + Whether to apply `Early Stopping`_. The default is `False`. mode : `str`, optional The mode of the early stopping. Depends on the metric measuring - performance. When using model loss as metric, use ``mode`` = 'min', - however, when using accuracy as metric, use ``mode`` = 'max'. For now, - only ``mode`` = 'max' is supported. Only used if ``early_stop`` = True. - The default is 'max'. + performance. When using model loss as metric, use ``mode='min'``, + however, when using accuracy as metric, use ``mode='max'``. For now, + only ``mode='max'`` is supported. Only used if ``early_stop=True``. + The default is `'max'`. delta : `float`, optional Minimum change in early stopping metric to be considered as an - improvement. Only used if ``early_stop`` = True. The default is 0. + improvement. Only used if ``early_stop=True``. The default is `0`. patience : `int`, optional The number of epochs to wait for an improvement in the early stopping metric. If the model does not improve over more than ``patience`` - epochs, quit training. Only used if ``early_stop`` = True. - The default is 10. + epochs, quit training. Only used if ``early_stop=True``. The default is + `10`. epochs : `int`, optional - The maximum number of epochs to train. The default is 50. + The maximum number of epochs to train. The default is `50`. nthreads : `int`, optional The number of cpu threads to use during training. The default is - torch.get_num_threads(). + :py:func:`torch.get_num_threads()`. save : `bool`, optional Whether to save the model state to disk. Model states are saved in - pysegcnn/main/_models. The default is True. - - .. _early stopping: + pysegcnn/main/_models. The default is `True`. + model_class : :py:class:`pysegcnn.core.models.Network` + A subclass of :py:class:`pysegcnn.core.models.Network`. + optim_class : :py:class:`torch.optim.Optimizer` + A subclass of :py:class:`torch.optim.Optimizer`. + loss_class : :py:class:`torch.nn.Module` + A subclass of :py:class:`torch.nn.Module` + state_path : :py:class:`pathlib.Path` + Path to save model states. + pretrained_path : :py:class:`pathlib.Path` + Path to the ``pretrained_model`` used if ``transfer=True``. + + .. _Early Stopping: https://en.wikipedia.org/wiki/Early_stopping - Returns - ------- - None. - """ model_name: str @@ -505,10 +496,6 @@ class ModelConfig(BaseConfig): Raised if the model ``model_name``, the optimizer ``optim_name`` or the loss function ``loss_name`` is not supported. - Returns - ------- - None. - """ # check input types super().__post_init__() @@ -533,13 +520,13 @@ class ModelConfig(BaseConfig): Parameters ---------- - model : `torch.nn.Module` - An instance of `torch.nn.Module`. + model : :py:class:`torch.nn.Module` + An instance of :py:class:`torch.nn.Module`. Returns ------- - optimizer : `torch.optim.Optimizer` - An instance of `torch.optim.Optimizer`. + optimizer : :py:class:`torch.optim.Optimizer` + An instance of :py:class:`torch.optim.Optimizer`. """ LOGGER.info('Optimizer: {}.'.format(repr(self.optim_class))) @@ -554,8 +541,8 @@ class ModelConfig(BaseConfig): Returns ------- - loss_function : `torch.nn.Module` - An instance of `torch.nn.Module`. + loss_function : :py:class:`torch.nn.Module` + An instance of :py:class:`torch.nn.Module`. """ LOGGER.info('Loss function: {}.'.format(repr(self.loss_class))) @@ -574,28 +561,30 @@ class ModelConfig(BaseConfig): Parameters ---------- - ds : `pysegcnn.core.dataset.ImageDataset` - An instance of `pysegcnn.core.dataset.ImageDataset`. - state_file : `pathlib.Path` + ds : :py:class:`pysegcnn.core.dataset.ImageDataset` + An instance of :py:class:`pysegcnn.core.dataset.ImageDataset`. + state_file : :py:class:`pathlib.Path` Path to a model checkpoint. Returns ------- - model : `pysegcnn.core.models.Network` - An instance of `pysegcnn.core.models.Network`. - optimizer : `torch.optim.Optimizer` - An instance of `torch.optim.Optimizer`. - checkpoint_state : `dict` [`str`, `numpy.ndarray`] + model : :py:class:`pysegcnn.core.models.Network` + An instance of :py:class:`pysegcnn.core.models.Network`. + optimizer : :py:class:`torch.optim.Optimizer` + An instance of :py:class:`torch.optim.Optimizer`. + checkpoint_state : `dict` [`str`, :py:class:`numpy.ndarray`] If the model checkpoint ``state_file`` exists, ``checkpoint_state`` has keys: ``'ta'`` - The accuracy on the training set (`numpy.ndarray`). + The accuracy on the training set + (:py:class:`numpy.ndarray`). ``'tl'`` - The loss on the training set (`numpy.ndarray`). + The loss on the training set (:py:class:`numpy.ndarray`). ``'va'`` - The accuracy on the validation set (`numpy.ndarray`). + The accuracy on the validation set + (:py:class:`numpy.ndarray`). ``'vl'`` - The loss on the validation set (`numpy.ndarray`). + The loss on the validation set (:py:class:`numpy.ndarray`). """ # write an initialization string to the log file @@ -643,30 +632,32 @@ class ModelConfig(BaseConfig): Parameters ---------- - model : `pysegcnn.core.models.Network` - An instance of `pysegcnn.core.models.Network`. - optimizer : `torch.optim.Optimizer` - An instance of `torch.optim.Optimizer`. - state_file : `pathlib.Path` + model : :py:class:`pysegcnn.core.models.Network` + An instance of :py:class:`pysegcnn.core.models.Network`. + optimizer : :py:class:`torch.optim.Optimizer` + An instance of :py:class:`torch.optim.Optimizer`. + state_file : :py:class:`pathlib.Path` Path to the model checkpoint. Returns ------- - model : `pysegcnn.core.models.Network` - An instance of `pysegcnn.core.models.Network`. - optimizer : `torch.optim.Optimizer` - An instance of `torch.optim.Optimizer`. - checkpoint_state : `dict` [`str`, `numpy.ndarray`] + model : :py:class:`pysegcnn.core.models.Network` + An instance of :py:class:`pysegcnn.core.models.Network`. + optimizer : :py:class:`torch.optim.Optimizer` + An instance of :py:class:`torch.optim.Optimizer`. + checkpoint_state : `dict` [`str`, :py:class:`numpy.ndarray`] If the model checkpoint ``state_file`` exists, ``checkpoint_state`` has keys: ``'ta'`` - The accuracy on the training set (`numpy.ndarray`). + The accuracy on the training set + (:py:class:`numpy.ndarray`). ``'tl'`` - The loss on the training set (`numpy.ndarray`). + The loss on the training set (:py:class:`numpy.ndarray`). ``'va'`` - The accuracy on the validation set (`numpy.ndarray`). + The accuracy on the validation set + (:py:class:`numpy.ndarray`). ``'vl'`` - The loss on the validation set (`numpy.ndarray`). + The loss on the validation set (:py:class:`numpy.ndarray`). """ # whether to resume training from an existing model checkpoint @@ -702,25 +693,25 @@ class ModelConfig(BaseConfig): Parameters ---------- - state_file : `pathlib.Path` + state_file : :py:class:`pathlib.Path` Path to a pretrained model. - ds : `pysegcnn.core.dataset.ImageDataset` - An instance of `pysegcnn.core.dataset.ImageDataset`. + ds : :py:class:`pysegcnn.core.dataset.ImageDataset` + An instance of :py:class:`pysegcnn.core.dataset.ImageDataset`. Raises ------ TypeError Raised if ``ds`` is not an instance of - `pysegcnn.core.dataset.ImageDataset`. + :py:class:`pysegcnn.core.dataset.ImageDataset`. ValueError Raised if the bands of ``ds`` do not match the bands of the dataset the pretrained model was trained with. Returns ------- - model : `pysegcnn.core.models.Network` - An instance of `pysegcnn.core.models.Network`. The pretrained model - adjusted to the new dataset. + model : :py:class:`pysegcnn.core.models.Network` + An instance of :py:class:`pysegcnn.core.models.Network`. The + pretrained model adjusted to the new dataset. """ # check input type @@ -768,20 +759,16 @@ class StateConfig(BaseConfig): Generate the model state filename according to the following naming convention: - model_dataset_optimizer_splitmode_splitparams_tilesize_batchsize_bands.pt + `model_dataset_optimizer_splitmode_splitparams_tilesize_batchsize_bands.pt` - Parameters + Attributes ---------- - ds : `pysegcnn.core.dataset.ImageDataset` - An instance of `pysegcnn.core.dataset.ImageDataset`. - sc : `pysegcnn.core.trainer.SplitConfig` - An instance of `pysegcnn.core.trainer.SplitConfig`. - mc : `pysegcnn.core.trainer.ModelConfig` - An instance of `pysegcnn.core.trainer.SplitConfig`. - - Returns - ------- - None. + ds : :py:class:`pysegcnn.core.dataset.ImageDataset` + An instance of :py:class:`pysegcnn.core.dataset.ImageDataset`. + sc : :py:class:`pysegcnn.core.trainer.SplitConfig` + An instance of :py:class:`pysegcnn.core.trainer.SplitConfig`. + mc : :py:class:`pysegcnn.core.trainer.ModelConfig` + An instance of :py:class:`pysegcnn.core.trainer.SplitConfig`. """ @@ -790,13 +777,7 @@ class StateConfig(BaseConfig): mc: ModelConfig def __post_init__(self): - """Check the type of each argument. - - Returns - ------- - None. - - """ + """Check the type of each argument.""" super().__post_init__() def init_state(self): @@ -804,7 +785,7 @@ class StateConfig(BaseConfig): Returns ------- - state : `pathlib.Path` + state : :py:class:`pathlib.Path` The path to the model state file. """ @@ -865,41 +846,38 @@ class EvalConfig(BaseConfig): Evaluate a model. - Parameters + Attributes ---------- - state_file : `pathlib.Path` + state_file : :py:class:`pathlib.Path` Path to the model to evaluate. test : `bool` or `None` - Whether to evaluate the model on the training(``test`` = `None`), the - validation (``test`` = False) or the test set (``test`` = True). + Whether to evaluate the model on the training(``test=None``), the + validation (``test=False``) or the test set (``test=True``). predict_scene : `bool`, optional The model prediction order. If False, the samples (tiles) of a dataset are predicted in any order and the scenes are not reconstructed. If True, the samples (tiles) are ordered according to the scene they belong to and a model prediction for each entire reconstructed scene is - returned. The default is False. + returned. The default is `False`. plot_samples : `bool`, optional Whether to save a plot of false color composite, ground truth and model - prediction for each sample (tile). Only used if ``predict_scene`` = - False. The default is False. + prediction for each sample (tile). Only used if ``predict_scene=False`` + . The default is `False`. plot_scenes : `bool`, optional Whether to save a plot of false color composite, ground truth and model - prediction for each entire scene. Only used if ``predict_scene`` = - True. The default is False. + prediction for each entire scene. Only used if ``predict_scene=True``. + The default is `False`. plot_bands : `list` [`str`], optional The bands to build the false color composite. The default is - ['nir', 'red', 'green']. + `['nir', 'red', 'green']`. cm : `bool`, optional - Whether to compute and plot the confusion matrix. The default is True. + Whether to compute and plot the confusion matrix. The default is `True` + . figsize : `tuple`, optional - The figure size in centimeters. The default is (10, 10). + The figure size in centimeters. The default is `(10, 10)`. alpha : `int`, optional The level of the percentiles for contrast stretching of the false color - compsite. The default is 0, i.e. no stretching. - - Returns - ------- - None. + compsite. The default is `0`, i.e. no stretching. """ @@ -924,10 +902,6 @@ class EvalConfig(BaseConfig): TypeError Raised if ``test`` is not of type `bool` or `None`. - Returns - ------- - None. - """ super().__post_init__() @@ -959,7 +933,7 @@ class LogConfig(BaseConfig): Parameters ---------- - state_file : `pathlib.Path` + state_file : :py:class:`pathlib.Path` Path to a model state file. """ @@ -971,10 +945,6 @@ class LogConfig(BaseConfig): Generate model log file. - Returns - ------- - None. - """ super().__post_init__() @@ -991,7 +961,7 @@ class LogConfig(BaseConfig): Returns ------- - date : `datetime.datetime` + date : :py:class:`datetime.datetime` The current date and time. """ @@ -1007,10 +977,6 @@ class LogConfig(BaseConfig): init_str : `str` The string to write to the model log file. - Returns - ------- - None. - """ LOGGER.info(80 * '-') LOGGER.info(init_str.format(LogConfig.now())) @@ -1021,70 +987,69 @@ class LogConfig(BaseConfig): class NetworkTrainer(BaseConfig): """Model training class. - Generic class to train an instance of `pysegcnn.core.models.Network` on - a dataset of type `pysegcnn.core.dataset.ImageDataset`. + Generic class to train an instance of + :py:class:`pysegcnn.core.models.Network` on a dataset of type + :py:class:`pysegcnn.core.dataset.ImageDataset`. Parameters ---------- - model : `pysegcnn.core.models.Network` - The model to train. An instance of `pysegcnn.core.models.Network`. - optimizer : `torch.optim.Optimizer` + model : :py:class:`pysegcnn.core.models.Network` + The model to train. An instance of + :py:class:`pysegcnn.core.models.Network`. + optimizer : :py:class:`torch.optim.Optimizer` The optimizer to update the model weights. An instance of - `torch.optim.Optimizer`. - loss_function : `torch.nn.Module` + :py:class:`torch.optim.Optimizer`. + loss_function : :py:class:`torch.nn.Module` The loss function to compute the model error. An instance of - `torch.nn.Module`. - train_dl : `torch.utils.data.DataLoader` - The training `torch.utils.data.DataLoader` instance. - valid_dl : `torch.utils.data.DataLoader` - The validation `torch.utils.data.DataLoader` instance. - test_dl : `torch.utils.data.DataLoader` - The test `torch.utils.data.DataLoader` instance. - state_file : `pathlib.Path` + :py:class:`torch.nn.Module`. + train_dl : :py:class:`torch.utils.data.DataLoader` + The training :py:class:`torch.utils.data.DataLoader` instance. + valid_dl : :py:class:`torch.utils.data.DataLoader` + The validation :py:class:`torch.utils.data.DataLoader` instance. + test_dl : :py:class:`torch.utils.data.DataLoader` + The test :py:class:`torch.utils.data.DataLoader` instance. + state_file : :py:class:`pathlib.Path` Path to save the model state. epochs : `int`, optional - The maximum number of epochs to train. The default is 1. + The maximum number of epochs to train. The default is `1`. nthreads : `int`, optional The number of cpu threads to use during training. The default is - torch.get_num_threads(). + :py:func:`torch.get_num_threads()`. early_stop : `bool`, optional - Whether to apply `early stopping`_. The default is False. + Whether to apply `Early Stopping`_. The default is `False`. mode : `str`, optional The mode of the early stopping. Depends on the metric measuring - performance. When using model loss as metric, use ``mode`` = 'min', - however, when using accuracy as metric, use ``mode`` = 'max'. For now, - only ``mode`` = 'max' is supported. Only used if ``early_stop`` = True. - The default is 'max'. + performance. When using model loss as metric, use ``mode='min'``, + however, when using accuracy as metric, use ``mode='max'``. For now, + only ``mode='max'`` is supported. Only used if ``early_stop=True``. + The default is `'max'`. delta : `float`, optional Minimum change in early stopping metric to be considered as an - improvement. Only used if ``early_stop`` = True. The default is 0. + improvement. Only used if ``early_stop=True``. The default is `0`. patience : `int`, optional The number of epochs to wait for an improvement in the early stopping metric. If the model does not improve over more than ``patience`` - epochs, quit training. Only used if ``early_stop`` = True. - The default is 10. - checkpoint_state : `dict` [`str`, `numpy.ndarray`], optional + epochs, quit training. Only used if ``early_stop=True``. The default is + `10`. + checkpoint_state : `dict` [`str`, :py:class:`numpy.ndarray`], optional A model checkpoint for ``model``. If specified, ``checkpoint_state`` should be a dictionary with keys: ``'ta'`` - The accuracy on the training set (`numpy.ndarray`). + The accuracy on the training set (:py:class:`numpy.ndarray`). ``'tl'`` - The loss on the training set (`numpy.ndarray`). + The loss on the training set (:py:class:`numpy.ndarray`). ``'va'`` - The accuracy on the validation set (`numpy.ndarray`). + The accuracy on the validation set (:py:class:`numpy.ndarray`). ``'vl'`` - The loss on the validation set (`numpy.ndarray`). - The default is {}. + The loss on the validation set (:py:class:`numpy.ndarray`). + The default is `{}`. save : `bool`, optional - Whether to save the model state to ``state_file``. The default is True. + Whether to save the model state to ``state_file``. The default is + `True`. - .. _early stopping: + .. _Early Stopping: https://en.wikipedia.org/wiki/Early_stopping - Returns - ------- - None. - """ model: Network @@ -1111,10 +1076,6 @@ class NetworkTrainer(BaseConfig): Configure early stopping if required. - Returns - ------- - None. - """ super().__post_init__() @@ -1148,13 +1109,13 @@ class NetworkTrainer(BaseConfig): training_state : `dict` [`str`, `numpy.ndarray`] The training state dictionary with keys: ``'ta'`` - The accuracy on the training set (`numpy.ndarray`). + The accuracy on the training set (:py:class:`numpy.ndarray`). ``'tl'`` - The loss on the training set (`numpy.ndarray`). + The loss on the training set (:py:class:`numpy.ndarray`). ``'va'`` - The accuracy on the validation set (`numpy.ndarray`). + The accuracy on the validation set (:py:class:`numpy.ndarray`). ``'vl'`` - The loss on the validation set (`numpy.ndarray`). + The loss on the validation set (:py:class:`numpy.ndarray`). """ LOGGER.info(35 * '-' + ' Training ' + 35 * '-') @@ -1264,10 +1225,10 @@ class NetworkTrainer(BaseConfig): Returns ------- - accuracies : `numpy.ndarray` + accuracies : :py:class:`numpy.ndarray` The mean model prediction accuracy on each mini-batch in the validation set. - losses : `numpy.ndarray` + losses : :py:class:`numpy.ndarray` The model loss for each mini-batch in the validation set. """ @@ -1313,13 +1274,7 @@ class NetworkTrainer(BaseConfig): return accuracies, losses def save_state(self): - """Save the model state. - - Returns - ------- - None. - - """ + """Save the model state.""" # whether to save the model state if self.save: @@ -1346,7 +1301,7 @@ class NetworkTrainer(BaseConfig): ) def __repr__(self): - """Representation of `~pysegcnn.core.trainer.NetworkTrainer`. + """Representation. Returns ------- @@ -1395,50 +1350,51 @@ class NetworkTrainer(BaseConfig): class EarlyStopping(object): - """`Early stopping`_ algorithm. + """`Early Stopping`_ algorithm. This implementation of the early stopping algorithm advances a counter each time a metric did not improve over a training epoch. If the metric does not improve over more than ``patience`` epochs, the early stopping criterion is met. - See `pysegcnn.core.trainer.NetworkTrainer.train` for an example - implementation. + See the :py:meth:`pysegcnn.core.trainer.NetworkTrainer.train` method for an + example implementation. - Parameters - ---------- - mode : `str`, optional - The mode of the early stopping. Depends on the metric measuring - performance. When using model loss as metric, use ``mode`` = 'min', - however, when using accuracy as metric, use ``mode`` = 'max'. The - default is 'max'. - best : `float`, optional - Threshold indicating the best metric score. At instanciation, set - ``best`` to the worst possible score of the metric. ``best`` will be - overwritten during training. The default is 0. - min_delta : `float`, optional - Minimum change in early stopping metric to be considered as an - improvement. The default is 0. - patience : `int`, optional - The number of epochs to wait for an improvement in the early stopping - metric. The default is 10. - - Raises - ------ - ValueError - Raised if ``mode`` is not either 'min' or 'max'. - - Returns - ------- - None. - - .. _Early stopping: + .. _Early Stopping: https://en.wikipedia.org/wiki/Early_stopping """ def __init__(self, mode='max', best=0, min_delta=0, patience=10): + """Initialize. + Parameters + ---------- + mode : `str`, optional + The mode of the early stopping. Depends on the metric measuring + performance. When using model loss as metric, use ``mode='min'``, + however, when using accuracy as metric, use ``mode='max'``. For + now, only ``mode='max'`` is supported. Only used if + ``early_stop=True``. The default is `'max'`. + best : `float`, optional + Threshold indicating the best metric score. At instanciation, set + ``best`` to the worst possible score of the metric. ``best`` will + be overwritten during training. The default is `0`. + min_delta : `float`, optional + Minimum change in early stopping metric to be considered as an + improvement. Only used if ``early_stop=True``. The default is `0`. + patience : `int`, optional + The number of epochs to wait for an improvement in the early + stopping metric. If the model does not improve over more than + ``patience`` epochs, quit training. Only used if + ``early_stop=True``. The default is `10`. + + Raises + ------ + ValueError + Raised if ``mode`` is not either 'min' or 'max'. + + """ # check if mode is correctly specified if mode not in ['min', 'max']: raise ValueError('Mode "{}" not supported. ' @@ -1546,7 +1502,7 @@ class EarlyStopping(object): return metric > best + min_delta def __repr__(self): - """Representation of `~pysegcnn.core.trainer.EarlyStopping`. + """Representation. Returns ------- -- GitLab