Skip to content
Snippets Groups Projects
Commit ad3d88eb authored by Frisinghelli Daniel's avatar Frisinghelli Daniel
Browse files

Implemented cross-validation subsampling.

parent b3b65b40
No related branches found
No related tags found
No related merge requests found
...@@ -15,11 +15,11 @@ License ...@@ -15,11 +15,11 @@ License
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# builtins # builtins
import datetime
import enum import enum
# externals # externals
import numpy as np import numpy as np
from sklearn.model_selection import KFold
from torch.utils.data.dataset import Subset from torch.utils.data.dataset import Subset
# the names of the subsets # the names of the subsets
...@@ -45,93 +45,50 @@ def _ds_len(ds, ratio): ...@@ -45,93 +45,50 @@ def _ds_len(ds, ratio):
return int(np.round(len(ds) * ratio)) return int(np.round(len(ds) * ratio))
def random_tile_split(ds, tvratio, ttratio=1, seed=0): def pairwise_disjoint(sets):
"""Randomly split the tiles of a dataset. """Check if ``sets`` are pairwise disjoint.
For each scene, the tiles of the scene can be distributed among the
training, validation and test set.
The parameters ``ttratio`` and ``tvratio`` control the size of the
training, validation and test datasets.
Test dataset size : ``(1 - ttratio) * len(ds)`` Sets are pairwise disjoint if the length of their union equals the sum of
Train dataset size : ``ttratio * tvratio * len(ds)`` their lengths.
Validation dataset size: ``ttratio * (1 - tvratio) * len(ds)``
Parameters Parameters
---------- ----------
ds : :py:class:`pysegcnn.core.dataset.ImageDataset` sets : `list` [:py:class:`collections.Sized`]
An instance of :py:class:`pysegcnn.core.dataset.ImageDataset`. A list of sized objects.
tvratio : `float`
The ratio of training data to validation data, e.g. ``tvratio=0.8``
means 80% training, 20% validation.
ttratio : `float`, optional
The ratio of training and validation data to test data, e.g.
``ttratio=0.6`` means 60% for training and validation, 40% for
testing. The default is `1`.
seed : `int`, optional
The random seed for reproducibility. The default is `0`.
Raises
------
AssertionError
Raised if the splits are not pairwise disjoint.
Returns Returns
------- -------
subsets : `dict` disjoint : `bool`
Subset dictionary with keys: Whether the sets are pairwise disjoint.
``'train'``
The training scenes (`dict`).
``'valid'``
The validation scenes (`dict`).
``'test'``
The test scenes (`dict`).
""" """
# set the random seed for reproducibility union = set().union(*sets)
np.random.seed(seed) n = sum(len(u) for u in sets)
return n == len(union)
# randomly permute indices to access dataset
indices = np.random.permutation(len(ds))
# length of the training and validation dataset
# number of samples: (ttratio * len(ds))
trav_len = _ds_len(indices, ttratio)
trav_indices = indices[:trav_len]
# length of the training dataset
# number of samples: (ttratio * tvratio * len(ds))
train_len = _ds_len(trav_indices, tvratio)
train_ind = trav_indices[:train_len]
# length of the validation dataset
# number of samples: (ttratio * (1- tvratio) * len(ds))
valid_ind = trav_indices[train_len:]
# length of the test dataset
# number of samples: ((1 - ttratio) * len(ds))
test_ind = indices[trav_len:]
# get the tiles of the scenes of each dataset
subsets = {}
for name, dataset in enumerate([train_ind, valid_ind, test_ind]):
# store the indices and corresponding tiles of the current subset to def index_dict(indices):
# dictionary """Generate the training, validation and test set index dictionary.
subsets[SUBSET_NAMES[name]] = {k: ds.scenes[k] for k in dataset}
# check if the splits are disjoint Parameters
assert pairwise_disjoint([s.keys() for s in subsets.values()]) ----------
indices : `list` [:py:class:`numpy.ndarray`]
An ordered list composed of three :py:class:`numpy.ndarray` containing
the indices to the training, validation and test set.
return subsets Returns
-------
index_dict : `dict`
The index dictionary, where the keys are equal to ``SUBSET_NAMES`` and
the values are py:class:`numpy.ndarray` containing the indices to the
training, validation and test set.
"""
return {k: v for k, v in zip(SUBSET_NAMES, indices)}
def random_scene_split(ds, tvratio, ttratio=1, seed=0):
"""Semi-randomly split the tiles of a dataset.
For each scene, all the tiles of the scene are included in either the def random_split(ds, tvratio=0.8, ttratio=1, seed=0, shuffle=True):
training, validation or test set, respectively. """Randomly split an iterable into training, validation and test set.
The parameters ``ttratio`` and ``tvratio`` control the size of the The parameters ``ttratio`` and ``tvratio`` control the size of the
training, validation and test datasets. training, validation and test datasets.
...@@ -142,17 +99,20 @@ def random_scene_split(ds, tvratio, ttratio=1, seed=0): ...@@ -142,17 +99,20 @@ def random_scene_split(ds, tvratio, ttratio=1, seed=0):
Parameters Parameters
---------- ----------
ds : :py:class:`pysegcnn.core.dataset.ImageDataset` ds : :py:class:`collections.Sized`
An instance of :py:class:`pysegcnn.core.dataset.ImageDataset`. An object with a :py:meth:`__len__` method.
tvratio : `float` tvratio : `float`, optional
The ratio of training data to validation data, e.g. ``tvratio=0.8`` The ratio of training data to validation data, e.g. ``tvratio=0.8``
means 80% training, 20% validation. means 80% training, 20% validation. The default is `0.8`.
ttratio : `float`, optional ttratio : `float`, optional
The ratio of training and validation data to test data, e.g. The ratio of training and validation data to test data, e.g.
``ttratio=0.6`` means 60% for training and validation, 40% for ``ttratio=0.6`` means 60% for training and validation, 40% for
testing. The default is `1`. testing. The default is `1`.
seed : `int`, optional seed : `int`, optional
The random seed for reproducibility. The default is `0`. The random seed for reproducibility. The default is `0`.
shuffle : `bool`, optional
Whether to shuffle the data before splitting into batches. The default
is `True`.
Raises Raises
------ ------
...@@ -161,568 +121,210 @@ def random_scene_split(ds, tvratio, ttratio=1, seed=0): ...@@ -161,568 +121,210 @@ def random_scene_split(ds, tvratio, ttratio=1, seed=0):
Returns Returns
------- -------
subsets : `dict` indices : `list` [`dict`]
Subset dictionary with keys: List of index dictionaries as composed by
``'train'`` :py:func:`pysegcnn.core.split.index_dict`.
The training scenes (`dict`).
``'valid'``
The validation scenes (`dict`).
``'test'``
The test scenes (`dict`).
""" """
# set the random seed for reproducibility # set the random seed for reproducibility
np.random.seed(seed) np.random.seed(seed)
# get the names of the scenes and generate random permutation # whether to shuffle the data before splitting
scene_ids = np.random.permutation(np.unique([s['id'] for s in ds.scenes])) indices = np.arange(len(ds))
if shuffle:
# randomly permute indices to access the iterable
indices = np.random.permutation(indices)
# the training and validation scenes # the training and validation scenes
# number of samples: (ttratio * nscenes) # number of samples: (ttratio * len(ds))
trav_len = _ds_len(scene_ids, ttratio) trav_len = _ds_len(ds, ttratio)
trav_scenes = scene_ids[:trav_len] trav_ids = indices[:trav_len]
# the training scenes
# number of samples: (ttratio * tvratio * nscenes)
train_len = _ds_len(trav_scenes, tvratio)
train_scenes = trav_scenes[:train_len]
# the validation scenes
# number of samples: (ttratio * (1- tvratio) * nscenes)
valid_scenes = trav_scenes[train_len:]
# the test scenes
# number of samples:((1 - ttratio) * nscenes)
test_scenes = scene_ids[trav_len:]
# get the tiles of the scenes of each dataset # the training dataset indices
subsets = {} # number of samples: (ttratio * tvratio * len(ds))
for name, dataset in enumerate([train_scenes, valid_scenes, test_scenes]): train_len = _ds_len(trav_ids, tvratio)
train_ids = trav_ids[:train_len]
# store the indices and corresponding tiles of the current subset to # the validation dataset indices
# dictionary # number of samples: (ttratio * (1- tvratio) * len(ds))
subsets[SUBSET_NAMES[name]] = {k: v for k, v in enumerate(ds.scenes) valid_ids = trav_ids[train_len:]
if v['id'] in dataset}
# check if the splits are disjoint # the test dataset indices
assert pairwise_disjoint([s.keys() for s in subsets.values()]) # number of samples:((1 - ttratio) * len(ds))
test_ids = trav_ids[trav_len:]
return subsets # check whether the different datasets or pairwise disjoint
indices = index_dict([train_ids, valid_ids, test_ids])
assert pairwise_disjoint(indices.values())
return [indices]
def date_scene_split(ds, date, dateformat='%Y%m%d'):
"""Split the dataset based on a date.
Scenes before ``date`` build the training dataset, scenes after ``date`` def kfold_split(ds, k_folds=5, seed=0, shuffle=True):
the validation dataset. The test set is empty. """Randomly split an iterable into ``k_folds`` folds.
Useful for time series data. This function uses the cross validation index generator
:py:class:`sklearn.model_selection.KFold`.
Parameters Parameters
---------- ----------
ds : :py:class:`pysegcnn.core.dataset.ImageDataset` ds : :py:class:`collections.Sized`
An instance of :py:class:`pysegcnn.core.dataset.ImageDataset`. An object with a :py:meth:`__len__` method.
date : `str` k_folds: `int`, optional
A date in the format ``dateformat``. The number of folds. Must be a least 2. The default is `5`.
dateformat : `str`, optional seed : `int`, optional
The format of ``date``. ``dateformat`` is used by The random seed for reproducibility. The default is `0`.
:py:func:`datetime.datetime.strptime' to parse ``date`` to a shuffle : `bool`, optional
:py:class:`datetime.datetime` object. The default is `'%Y%m%d'`. Whether to shuffle the data before splitting into batches. The default
is `True`.
Raises Raises
------ ------
AssertionError AssertionError
Raised if the splits are not pairwise disjoint. Raised if the (training, validation) folds are not pairwise disjoint.
Returns
-------
subsets : `dict`
Subset dictionary with keys:
``'train'``
The training scenes (`dict`).
``'valid'``
The validation scenes (`dict`).
``'test'``
The test scenes (`dict`).
""" """
# convert date to datetime object # set the random seed for reproducibility
date = datetime.datetime.strptime(date, dateformat) np.random.seed(seed)
# the training, validation and test scenes
train_scenes = {i: s for i, s in enumerate(ds.scenes) if s['date'] <= date}
valid_scenes = {i: s for i, s in enumerate(ds.scenes) if s['date'] > date}
test_scenes = {}
# build the training and test datasets
subsets = {}
for name, scenes in enumerate([train_scenes, valid_scenes, test_scenes]):
# store the indices and corresponding tiles of the current subset to
# dictionary
subsets[SUBSET_NAMES[name]] = scenes
# check if the splits are disjoint
assert pairwise_disjoint([s.keys() for s in subsets.values()])
return subsets
def pairwise_disjoint(sets):
"""Check if ``sets`` are pairwise disjoint.
Sets are pairwise disjoint if the length of their union equals the sum of
their lengths.
Parameters
----------
sets : `list` [:py:class:`collections.Sized`]
A list of sized objects.
Returns
-------
disjoint : `bool`
Whether the sets are pairwise disjoint.
"""
union = set().union(*sets)
n = sum(len(u) for u in sets)
return n == len(union)
class CustomSubset(Subset):
"""Generic custom subset inheriting :py:class:`torch.utils.data.Subset`.
.. important::
The training, validation and test datasets should be subclasses of # cross validation index generator from scikit-learn
:py:class:`pysegcnn.core.split.CustomSubset`. kf = KFold(k_folds, random_state=seed, shuffle=shuffle)
See :py:class:`pysegcnn.core.split.RandomTileSplit` for an example # generate the indices of the different folds
implementing the :py:class:`pysegcnn.core.split.RandomSubset` subset folds = []
class. for i, (train, valid) in enumerate(kf.split(ds)):
folds.append(index_dict([train, valid, np.array([])]))
assert pairwise_disjoint(folds[i].values())
return folds
Attributes
----------
dataset : :py:class:`pysegcnn.core.dataset.ImageDataset`
The dataset to split into subsets.
split_mode : `str`
The mode to split the dataset.
indices : `list` [`int`]
List of indices to access the dataset.
name : `str`
Name of the subset.
scenes : `list` [`dict`]
List of the subset tiles.
ids : `list` or :py:class:`numpy.ndarray`
Container of the scene identifiers.
""" class RandomSplit(object):
"""Base class for random splits of a `torch.utils.data.Dataset`."""
def __init__(self, ds, split_mode, indices, name, scenes, scene_ids): def __init__(self, ds, k_folds, seed=0, shuffle=True, tvratio=0.8,
"""Initialize. ttratio=1):
"""Randomly split a dataset into training, validation and test set.
Parameters Parameters
---------- ----------
ds : :py:class:`pysegcnn.core.dataset.ImageDataset` ds : :py:class:`collections.Sized`
An instance of :py:class:`pysegcnn.core.dataset.ImageDataset`. An object with a :py:meth:`__len__` method.
split_mode : `str` k_folds: `int`
The mode to split the dataset. The number of folds.
indices : `list` [`int`] seed : `int`, optional
List of indices to access ``ds``. ``indices`` must be pairwise The random seed for reproducibility. The default is `0`.
disjoint for each subset derived from the same dataset ``ds``. shuffle : `bool`, optional
name : `str` Whether to shuffle the data before splitting into batches. The
Name of the subset. default is `True`.
scenes : `list` [`dict`] tvratio : `float`, optional
List of the subset tiles. The ratio of training data to validation data, e.g. ``tvratio=0.8``
scene_ids : `list` or :py:class:`numpy.ndarray` means 80% training, 20% validation. The default is `0.8`. Used if
Container of the scene identifiers. ``k_folds=1``.
ttratio : `float`, optional
""" The ratio of training and validation data to test data, e.g.
super().__init__(dataset=ds, indices=indices) ``ttratio=0.6`` means 60% for training and validation, 40% for
testing. The default is `1`. Used if ``k_folds=1``.
# the mode to split the dataset
self.split_mode = split_mode
# the name of the subset
self.name = name
# the scene in the subset
self.scenes = scenes
# the names of the scenes
self.ids = scene_ids
def __repr__(self):
"""Representation string.
Returns
-------
fs : `str`
The representation string.
""" """
fs = '- {}: {:d} tiles ({:.2f}%), mode = {}'.format(
self.name, len(self.scenes), 100 * len(self.scenes) /
len(self.dataset), self.split_mode)
return fs
class SceneSubset(CustomSubset):
"""A custom subset for dataset splits where the scenes are preserved."""
def __init__(self, ds, split_mode, indices, name, scenes, scene_ids):
super().__init__(ds, split_mode, indices, name, scenes, scene_ids)
class RandomSubset(CustomSubset):
"""A custom subset for random dataset splits."""
def __init__(self, ds, split_mode, indices, name, scenes, scene_ids):
super().__init__(ds, split_mode, indices, name, scenes, scene_ids)
class Split(object):
"""Generic class handling how ``ds`` is split.
Each dataset should be split by a subclass of
:py:class:`pysegcnn.core.split.Split`, by calling the
:py:meth:`pysegcnn.core.split.Split.split` method.
.. important::
The :py:meth:`~pysegcnn.core.split.Split.subsets` and # instance attributes
:py:meth:`~pysegcnn.core.split.Split.subset_type` methods have to be
implemented when inheriting :py:class:`pysegcnn.core.split.Split`.
Furthermore, a class attribute ``split_mode`` (`str`) has to be
defined and added to :py:class:`pysegcnn.core.split.SupportedSplits`.
See :py:class:`pysegcnn.core.split.RandomTileSplit` for an example.
Attributes
----------
ds : :py:class:`pysegcnn.core.dataset.ImageDataset`
The dataset to split into training, validation and test set.
"""
def __init__(self, ds):
"""Initialize.
Parameters
----------
ds : :py:class:`pysegcnn.core.dataset.ImageDataset`
An instance of :py:class:`pysegcnn.core.dataset.ImageDataset`.
"""
# the dataset to split
self.ds = ds self.ds = ds
self.k_folds = k_folds
self.seed = seed
self.shuffle = shuffle
def split(self): # instance attributes: training/validation/test split ratios
"""Split dataset into training, validation and test set. # used if kfolds=1
self.tvratio = tvratio
:py:meth:`~pysegcnn.core.split.Split.split` works only if self.ttratio = ttratio
:py:meth:`~pysegcnn.core.split.Split.subsets` and
:py:meth:`~pysegcnn.core.split.Split.subset_type` are implemented.
"""
# build the subsets
ds_split = []
for name, sub in self.subsets().items():
# the scene identifiers of the current subset: preserve the order
# of the scene identifiers
ids, idx = np.unique([s['id'] for s in sub.values()],
return_index=True)
ids = ids[np.argsort(idx)]
# build the subset
sbst = self.subset_type()(self.ds, self.split_mode,
list(sub.keys()), name,
list(sub.values()), ids)
ds_split.append(sbst)
return ds_split
def subsets(self):
"""Define training, validation and test sets.
Wrapper method for def generate_splits(self):
:py:func:`pysegcnn.core.split.Split.random_tile_split`,
:py:func:`pysegcnn.core.split.Split.random_scene_split` or
:py:func:`pysegcnn.core.split.Split.date_scene_split`.
Raises # check whether to generate a single or multiple folds
------ if self.k_folds > 1:
NotImplementedError # k-fold split
Raised if :py:class:`pysegcnn.core.split.Split` is not inherited. indices = kfold_split(
self.indices_to_split, self.k_folds, self.seed, self.shuffle)
else:
# single-fold split
indices = random_split(
self.indices_to_split, self.tvratio, self.ttratio, self.seed,
self.shuffle)
Returns return indices
-------
None.
""" @property
def indices_to_split(self):
raise NotImplementedError raise NotImplementedError
def subset_type(self): @property
"""Define the type of each subset. def indices(self):
Wrapper method for :py:class:`pysegcnn.core.split.RandomSubset` or
:py:class:`pysegcnn.core.split.SceneSubset`.
Raises
------
NotImplementedError
Raised if :py:class:`pysegcnn.core.split.Split` is not inherited.
Returns
-------
None.
"""
raise NotImplementedError raise NotImplementedError
def split(self):
class DateSplit(Split): # initialize training, validation and test subsets
"""Split a dataset based on a date. subsets = []
.. important::
Scenes before ``date`` build the training dataset, scenes after
``date`` the validation dataset. The test set is empty.
Useful for time series data.
Class wrapper for :py:func:`pysegcnn.core.split.date_scene_split`.
Attributes
----------
split_mode : `str`
The mode to split the dataset, i.e. `'date'`.
ds : :py:class:`pysegcnn.core.dataset.ImageDataset`
The dataset to split into training, validation and test set.
date : `str`
The date used to split the dataset.
dateformat : `str`
The format of ``date``.
"""
# the split mode
split_mode = 'date'
def __init__(self, ds, date, dateformat):
"""Initialize.
Parameters
----------
ds : :py:class:`pysegcnn.core.dataset.ImageDataset`
An instance of :py:class:`pysegcnn.core.dataset.ImageDataset`.
date : `str`
A date in the format ``dateformat``.
dateformat : `str`
The format of ``date``. ``dateformat`` is used by
:py:func:`datetime.datetime.strptime' to parse ``date`` to a
:py:class:`datetime.datetime` object.
"""
super().__init__(ds)
# the date to split the dataset
# before: training set
# after : validation set
self.date = date
# the format of the date
self.dateformat = dateformat
def subsets(self):
"""Wrap :py:func:`pysegcnn.core.split.Split.date_scene_split`.
Returns
-------
subsets : `dict`
Subset dictionary with keys:
``'train'``
The training scenes (`dict`).
``'valid'``
The validation scenes (`dict`).
``'test'``
The test scenes, empty (`dict`).
"""
return date_scene_split(self.ds, self.date, self.dateformat)
def subset_type(self):
"""Wrap :py:class:`pysegcnn.core.split.SceneSubset`.
Returns
-------
SceneSubset : :py:class:`pysegcnn.core.split.SceneSubset`
The subset type.
"""
return SceneSubset
class RandomSplit(Split):
"""Generic class for random dataset splits."""
def __init__(self, ds, ttratio, tvratio, seed):
"""Initialize.
Parameters
----------
ds : :py:class:`pysegcnn.core.dataset.ImageDataset`
An instance of :py:class:`pysegcnn.core.dataset.ImageDataset`.
tvratio : `float`
The ratio of training data to validation data, e.g.
``tvratio=0.8`` means 80% training, 20% validation.
ttratio : `float`
The ratio of training and validation data to test data, e.g.
``ttratio=0.6`` means 60% for training and validation, 40% for
testing.
seed : `int`
The random seed used to generate the split. Useful for
reproducibility.
"""
super().__init__(ds)
# the training, validation and test set ratios # the training, validation and test indices
self.ttratio = ttratio for folds in self.indices:
self.tvratio = tvratio subsets.append(
index_dict([Subset(self.ds, ids) for ids in folds.values()]))
# the random seed: useful for reproducibility return subsets
self.seed = seed
class RandomTileSplit(RandomSplit): class RandomTileSplit(RandomSplit):
"""Randomly split the dataset. """Split a :py:class:`pysegcnn.core.dataset.ImageDataset` into tiles."""
.. important:: def __init__(self, ds, k_folds, seed=0, shuffle=True, tvratio=0.8,
ttratio=1):
# initialize super class
super().__init__(ds, k_folds, seed, shuffle, tvratio, ttratio)
For each scene, the tiles of the scene can be distributed among the @property
training, validation and test set. def indices_to_split(self):
return np.arange(len(self.ds))
Class wrapper for :py:func:`pysegcnn.core.split.random_tile_split`. @property
def indices(self):
Attributes return self.generate_splits()
----------
split_mode : `str`
The mode to split the dataset, i.e. `'random'`.
ds : :py:class:`pysegcnn.core.dataset.ImageDataset`
The dataset to split into training, validation and test set.
tvratio : `float`
The ratio of training data to validation data.
ttratio : `float`
The ratio of training and validation data to test data.
seed : `int`
The random seed used to generate the split.
"""
# the split mode
split_mode = 'random'
def __init__(self, ds, ttratio, tvratio, seed):
super().__init__(ds, ttratio, tvratio, seed)
def subsets(self):
"""Wrap :py:func:`pysegcnn.core.split.Split.random_tile_split`.
Returns
-------
subsets : `dict`
Subset dictionary with keys:
``'train'``
The training scenes (`dict`).
``'valid'``
The validation scenes (`dict`).
``'test'``
The test scenes (`dict`).
"""
return random_tile_split(self.ds, self.tvratio, self.ttratio,
self.seed)
def subset_type(self):
"""Wrap :py:class:`pysegcnn.core.split.RandomSubset`.
Returns
-------
SceneSubset : :py:class:`pysegcnn.core.split.RandomSubset`
The subset type.
"""
return RandomSubset
class RandomSceneSplit(RandomSplit): class RandomSceneSplit(RandomSplit):
"""Semi-randomly split the dataset. """Split a :py:class:`pysegcnn.core.dataset.ImageDataset` into scenes."""
.. important::
For each scene, all the tiles of the scene are included in either the
training, validation or test set, respectively.
Class wrapper for :py:func:`pysegcnn.core.split.random_scene_split`.
Attributes
----------
split_mode : `str`
The mode to split the dataset, i.e. `'scene'`.
ds : :py:class:`pysegcnn.core.dataset.ImageDataset`
The dataset to split into training, validation and test set.
tvratio : `float`
The ratio of training data to validation data.
ttratio : `float`
The ratio of training and validation data to test data.
seed : `int`
The random seed used to generate the split.
""" def __init__(self, ds, k_folds, seed=0, shuffle=True, tvratio=0.8,
ttratio=1):
# the split mode # initialize super class
split_mode = 'scene' super().__init__(ds, k_folds, seed, shuffle, tvratio, ttratio)
def __init__(self, ds, ttratio, tvratio, seed):
super().__init__(ds, ttratio, tvratio, seed)
def subsets(self): # the number of the scenes in the dataset
"""Wrap :py:func:`pysegcnn.core.split.Split.random_scene_split`. self.scenes = np.array([v['scene'] for v in self.ds.scenes])
Returns @property
------- def indices_to_split(self):
subsets : `dict` return np.unique(self.scenes)
Subset dictionary with keys:
``'train'``
The training scenes (`dict`).
``'valid'``
The validation scenes (`dict`).
``'test'``
The test scenes (`dict`).
""" @property
return random_scene_split(self.ds, self.tvratio, self.ttratio, def indices(self):
self.seed) # indices of the different scene identifiers
indices = self.generate_splits()
def subset_type(self):
"""Wrap :py:class:`pysegcnn.core.split.SceneSubset`.
Returns # iterate over the different folds
------- scene_indices = []
SceneSubset : :py:class:`pysegcnn.core.split.SceneSubset` for folds in indices:
The subset type. # iterate over the training, validation and test set
subset = {}
for name, ids in folds.items():
subset[name] = np.where(np.isin(self.scenes, ids))[0]
scene_indices.append(subset)
""" return scene_indices
return SceneSubset
class SupportedSplits(enum.Enum): class SupportedSplits(enum.Enum):
"""Names and corresponding classes of the implemented split modes.""" """Names and corresponding classes of the implemented split modes."""
random = RandomTileSplit tile = RandomTileSplit
scene = RandomSceneSplit scene = RandomSceneSplit
date = DateSplit
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment