From 65165449dacf5fa281c42264402ffa13c9881deb Mon Sep 17 00:00:00 2001
From: "Daniel.Frisinghelli" <daniel.frisinghelli@eurac.edu>
Date: Thu, 1 Jul 2021 16:22:04 +0200
Subject: [PATCH] Do not load entire dataset to memory.

---
 climax/core/dataset.py | 12 +++---------
 1 file changed, 3 insertions(+), 9 deletions(-)

diff --git a/climax/core/dataset.py b/climax/core/dataset.py
index 35fa8d8..1d393df 100644
--- a/climax/core/dataset.py
+++ b/climax/core/dataset.py
@@ -19,7 +19,7 @@ from pysegcnn.core.utils import search_files
 class EoDataset(torch.utils.data.Dataset):
 
     @staticmethod
-    def to_tensor(x, dtype):
+    def to_tensor(x, dtype=torch.float32):
         """Convert ``x`` to :py:class:`torch.Tensor`.
 
         Parameters
@@ -43,8 +43,6 @@ class NetCDFDataset(EoDataset):
 
     def __init__(self, X, y, dim='time'):
 
-        # TODO: check if conversion to array is more efficient
-
         # NetCDF dataset containing predictor variables (ERA5)
         self.X = X
 
@@ -58,12 +56,8 @@ class NetCDFDataset(EoDataset):
         return len(self.X[self.dim])
 
     def __getitem__(self, idx):
-        return (self.to_tensor(self.X.isel({self.dim: idx}), torch.float32),
-                self.to_tensor(self.y.isel({self.dim: idx}), torch.float32))
-
-    @staticmethod
-    def clip_period(ds, period):
-        return ds.sel(time=slice(period[0], period[1]))
+        return (self.to_tensor(self.X.isel({self.dim: idx}).to_array().values),
+                self.to_tensor(self.y.isel({self.dim: idx}).to_array().values))
 
 
 class ERA5Dataset(EoDataset):
-- 
GitLab