From 65165449dacf5fa281c42264402ffa13c9881deb Mon Sep 17 00:00:00 2001 From: "Daniel.Frisinghelli" <daniel.frisinghelli@eurac.edu> Date: Thu, 1 Jul 2021 16:22:04 +0200 Subject: [PATCH] Do not load entire dataset to memory. --- climax/core/dataset.py | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/climax/core/dataset.py b/climax/core/dataset.py index 35fa8d8..1d393df 100644 --- a/climax/core/dataset.py +++ b/climax/core/dataset.py @@ -19,7 +19,7 @@ from pysegcnn.core.utils import search_files class EoDataset(torch.utils.data.Dataset): @staticmethod - def to_tensor(x, dtype): + def to_tensor(x, dtype=torch.float32): """Convert ``x`` to :py:class:`torch.Tensor`. Parameters @@ -43,8 +43,6 @@ class NetCDFDataset(EoDataset): def __init__(self, X, y, dim='time'): - # TODO: check if conversion to array is more efficient - # NetCDF dataset containing predictor variables (ERA5) self.X = X @@ -58,12 +56,8 @@ class NetCDFDataset(EoDataset): return len(self.X[self.dim]) def __getitem__(self, idx): - return (self.to_tensor(self.X.isel({self.dim: idx}), torch.float32), - self.to_tensor(self.y.isel({self.dim: idx}), torch.float32)) - - @staticmethod - def clip_period(ds, period): - return ds.sel(time=slice(period[0], period[1])) + return (self.to_tensor(self.X.isel({self.dim: idx}).to_array().values), + self.to_tensor(self.y.isel({self.dim: idx}).to_array().values)) class ERA5Dataset(EoDataset): -- GitLab