Skip to content
Snippets Groups Projects
Commit e30dbfac authored by Frisinghelli Daniel's avatar Frisinghelli Daniel
Browse files

Implemented a more efficient way to calculate class distribution.

parent f8e77f34
No related branches found
No related tags found
No related merge requests found
......@@ -632,14 +632,18 @@ class ImageDataset(Dataset):
def class_distribution(self):
# initialize dictionary of class spectral distribution
cls_ds = {k: np.empty(shape=(0, len(self.use_bands))) for k, _ in
self.labels.items()}
# initialize class distribution dataframe
columns = [band.capitalize() for band in self.use_bands] + ['Class']
cls_df = pd.DataFrame(columns=columns)
# columns = [band.capitalize() for band in self.use_bands] + ['Class']
# cls_df = pd.DataFrame(columns=columns)
# create the lookup table to replace the class identifiers by their
# corresponding labels
lookup = np.array(list({k: v['label'] for k, v in self.labels.items()}
.items())).astype(object)
# lookup = np.array(list({k: v['label'] for k, v in self.labels.items()}
# .items())).astype(object)
# iterate over the samples of the dataset
for i in range(len(self)):
......@@ -647,17 +651,27 @@ class ImageDataset(Dataset):
LOGGER.info('Sample: {}/{}'.format(i + 1, len(self)))
x, y = self[i]
# reshape the current sample
data = np.hstack([x.flatten(start_dim=1).T, np.expand_dims(
array_replace(y.flatten(), lookup), axis=1)])
# iterate over the different classes
for k, v in self.labels.items():
# get values equal to the current class
mask = np.where(y == k)
# subset input to current class
cls_ds[k] = np.vstack([cls_ds[k], x[:, mask[0], mask[1]].T])
return cls_ds
# reshape the current sample
# data = np.hstack([x.flatten(start_dim=1).T, np.expand_dims(
# array_replace(y.flatten(), lookup), axis=1)])
# the pixels of the current sample to the dataframe
df = pd.DataFrame(data, columns=columns)
# # the pixels of the current sample to the dataframe
# df = pd.DataFrame(data, columns=columns)
# update class distribution dataframe
cls_df = cls_df.append(df)
# # update class distribution dataframe
# cls_df = cls_df.append(df)
return cls_df
# return cls_df
def __repr__(self):
"""Dataset representation.
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment