From ece1151a425c471e86ec0af07e0778f2ab69e340 Mon Sep 17 00:00:00 2001
From: Rufai Omowunmi Balogun <rbalogun@eurac.edu>
Date: Thu, 26 Oct 2023 18:10:03 +0200
Subject: [PATCH] improve downloader module

---
 README.md                                     |  64 ++---
 docs/source/guide/downloader.rst              |  59 +++-
 examples/download.json                        |  35 +++
 examples/download_example.ipynb               | 252 ++++++++++++++++++
 .../smodex_example.ipynb                      |  17 ++
 src/smodex/downloader.py                      |   2 +-
 tests/test_downloader.py                      |  83 +-----
 7 files changed, 391 insertions(+), 121 deletions(-)
 create mode 100644 examples/download.json
 create mode 100644 examples/download_example.ipynb
 rename smodex_example.ipynb => examples/smodex_example.ipynb (90%)

diff --git a/README.md b/README.md
index 284f612..f54b68c 100644
--- a/README.md
+++ b/README.md
@@ -36,7 +36,7 @@ Soil moisture and soil moisture anomalies are critical markers of dryness and ag
 
 Installation
 ==================
-The SMODEX package is compatible with Python 3.6+ and would be distributed through the _Python Package Index (PyPI)_ and can installed: 
+The SMODEX package is compatible with Python 3.6+ and is distributed through the [_Python Package Index (PyPI)_](https://pypi.org/project/smodex/) and can installed via:
 
 
 ```bash
@@ -48,50 +48,44 @@ pip install smodex
 Usage examples
 ===============
 Currently, the main subpackages in SMODEX include:
--- **sm_downloader**: contains a request to the ERA5 Climate Datastore for downloading and saving soil water contents based on the specified timeline necessary for computing an anomaly, 
--- **sm_climatology**: contains functionalities for computing soil moisture climatology with a given reference, and rolling mean at a monthly or dekadal scale,
--- **sm_anomaly**: contains functionalities for computing and saving soil moisture anomalies,
--- **sm_visualize**: contains different functionalities for visualizing the soil moisture and soil moisture anomalies across spatial and temporal dimensions (coming soon), 
--- **tests**: test for data handling, and computation moisture and moisture anomalies computation,
--- **docs**: readthedocs implementation
+- **downloader**: contains a request to the ERA5 Climate Datastore for downloading and saving soil water contents based on the specified timeline necessary for computing an anomaly, 
+- **climates**: contains functionalities for computing soil moisture climatology with a given reference, and rolling mean at a weekly rolling window, dekadal, monthly or annual scale, 
+- **anomaly**: contains functionalities for computing, saving soil moisture anomalies,
+- **share**: contains functionalities for sharing computed datasets with other scientists using the STAC specification (coming soon),
+- **plots**: contains different functionalities for visualizing the soil moisture and soil moisture anomalies across spatial and temporal dimensions (coming soon),
 
 
+Downloader
+----------
+To use the downloader module, see our quick start example [here]("examples/download_example.ipynb")
 
-Download Module
----------------
-You can use the `sm_downloader` with your python script or notebook this way:
-
-
-
-```python 
-from smodex import sm_downloader
-
-download = sm_downloader.SMDownload(
-    start_date='2020-05-05',
-    end_date='2021-05-10',
-    api="era5",
-    area=[50.775, 2.775, 42.275, 18.025],
-    depth=[1, 2, 3, 4],
-    download_path= "./sm_downloaded/"
-    )
+Within a python script, this can serve as a quick guide: 
+```python
+from smodex.downloader import cds_downloader
 
-download.downloader()
+start_date = '2020-12-15'
+end_date = '2021-01-03'
+conf_path = 'download.json'
+download_path = 'moisture_data/'
 
+cds_downloader(start_date,
+               end_date,
+               conf_path,
+               download_path)
 ```
 
-
-
-But note, for this module to work locally, you shold have the `ERA5 CDS API` installed on your machine. 
-To do this, you simply run:
+However, this requires the installation of the `ERA5 CDS API` as well as a specification of your API keys in a `.env` file following this examples:
+Run:
 
 
 ```bash 
 pip install cdsapi
 ```
-Additionally, create  an `.env` file and set:
 
+And set the following values to those of your CDS API keys
 
 ```yaml
+# .env 
 CDSAPI_URL=https://cds.climate.copernicus.eu/api/v2
 CDSAPI_KEY=XXXXXX:XXXXXX-XXXXXX-XXXX-XXXXX
 ```
@@ -99,16 +93,6 @@ Remember to change the CDSAPI_KEY to that of your personal or organization's acc
 You can find more specific details on how to access your personal CDS API key [here](https://cds.climate.copernicus.eu/api-how-to)
 
 
-Other modules can also be imported and used by:
-
-
-```python
-import smodex
-smodex.sm_downloader()
-smodex.compute_climatology()
-smodex.compute_anomalies()
-```
-
 
 Contributing
 ================
diff --git a/docs/source/guide/downloader.rst b/docs/source/guide/downloader.rst
index 728a4a2..c6a232a 100644
--- a/docs/source/guide/downloader.rst
+++ b/docs/source/guide/downloader.rst
@@ -1,12 +1,57 @@
 Downloader
-============
+==========
+The smodex downloader provides a quick way to download climate datasets for your use case from the 
+[ERA5 Climate Data Store](https://cds.climate.copernicus.eu/cdsapp#!/search?type=dataset) by 
+specifying the details of the requested datasets in a JSON file and providing all the neccesary 
+information for downloading the datasets you need.
 
-ERA5 API
-----------
 
-Reference Period
-------------------
 
-Area of Interest
-------------------
+Step One
+--------
+Ensure you have the `ERA5 CDS API` installed in your development environment. This can be done by:
 
+.. code-block:: bash
+    pip install cdsapi
+
+This provides all the neccesary functionalities and backends for accessing the CDS datasets. 
+
+Next to this, create a configuration file (JSON file) that contains all the data specification for 
+the data you would like to request, example:
+
+.. code-block:: json-object
+    {
+        "product_type": "reanalysis", 
+        "variable": [
+            "volumetric_soil_water_layer_1",
+            "volumetric_soil_water_layer_2",
+            "volumetric_soil_water_layer_3",
+            "volumetric_soil_water_layer_4"
+            ],
+        "year": 1981,
+        "month": [ "01", "02", "03"],
+        "day": ["01", "08", "16", "24", "30"],
+        "time": ["00:00", "06:00", "12:00", "18:00"],
+        "area": [47.148, 10.255, 46.297, 12.542],
+        "format": "netcdf"
+        }
+
+This JSON file in general should contain the information on your **Area of Interest** and other 
+specific information on the datasets you would like to download.
+
+Step Two:
+---------
+Specify the time range (start date and end date) and download your data to the specified path:
+
+.. code-block:: python
+    from smodex.downloader import cds_downloader
+    start_date = '2010-01-01'
+    end_date = '2020-12-31'
+    conf_path = 'download.json'
+    download_path = 'moisture_data/'
+    
+    cds_downloader(start_date = start_date,
+                   end_date = end_date,
+                   conf_path = conf_path,
+                   download_path = download_path
+                   )
diff --git a/examples/download.json b/examples/download.json
new file mode 100644
index 0000000..5ccff84
--- /dev/null
+++ b/examples/download.json
@@ -0,0 +1,35 @@
+{
+    "product_type": "reanalysis", 
+    "variable": [
+        "volumetric_soil_water_layer_1",
+        "volumetric_soil_water_layer_2",
+        "volumetric_soil_water_layer_3",
+        "volumetric_soil_water_layer_4"
+    ],
+    "year": 1981,
+    "month": [
+        "01",
+        "02",
+        "03",
+        "04",
+        "05"
+    ],
+    "day": [
+        "01",
+        "02",
+        "03",
+        "04",
+        "05",
+        "06",
+        "07",
+        "08"
+    ],
+    "time": [
+        "00:00",
+        "06:00",
+        "12:00",
+        "18:00"
+    ],
+    "area": [47.148, 10.255, 46.297, 12.542],
+    "format": "netcdf"
+}
diff --git a/examples/download_example.ipynb b/examples/download_example.ipynb
new file mode 100644
index 0000000..53fb3c0
--- /dev/null
+++ b/examples/download_example.ipynb
@@ -0,0 +1,252 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## smodex downloader\n",
+    "In this notebook, we show an example of how to quickly utilize the `smodex downloader` module with the `cds_downloader` functionality that allows you to download climate datasets from the Climate Data Store using your own your personal or organization CDS token. \n",
+    "\n",
+    "In order for this function to effectively work with the CDS API, you are expected to specify your API keys (consult the [README.md](https://gitlab.inf.unibz.it/earth_observation_public/smodex/-/blob/master/README.md?ref_type=heads)) in a .env file. \n",
+    "\n",
+    "Additionally, you can create a JSON file that contains the specification of the dataset you would like to download (see examples/download.json for example on the expected structure)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "In this notebook, we showcase an example on how to use the smodex package to quickly download a set of volumetric soil moisture layers for the **Autonomonous Province of Bolzano, South Tyrol** for only the synoptic hours *(\"00:00\", \"06:00\", \"12:00\", \"18:00\")* \n",
+    "\n",
+    "Take a look at the download.json file to see how this configuration were specified."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from smodex.downloader import cds_downloader"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "2023-10-26 17:37:24,830 [INFO] downloading ERA5 Soil Moisture from CDS API for 2020\n",
+      "2023-10-26 17:37:24,830 [INFO]:smodex.downloader:downloading ERA5 Soil Moisture from CDS API for 2020\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "2023-10-26 17:37:24,930 INFO Welcome to the CDS\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "2023-10-26 17:37:24,930 [INFO]:cdsapi:Welcome to the CDS\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "2023-10-26 17:37:24,931 INFO Sending request to https://cds.climate.copernicus.eu/api/v2/resources/reanalysis-era5-single-levels\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "2023-10-26 17:37:24,931 [INFO]:cdsapi:Sending request to https://cds.climate.copernicus.eu/api/v2/resources/reanalysis-era5-single-levels\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "2023-10-26 17:37:25,048 INFO Request is queued\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "2023-10-26 17:37:25,048 [INFO]:cdsapi:Request is queued\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "2023-10-26 17:47:43,336 INFO Request is completed\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "2023-10-26 17:47:43,336 [INFO]:cdsapi:Request is completed\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "2023-10-26 17:47:43,337 INFO Downloading https://download-0012-clone.copernicus-climate.eu/cache-compute-0012/cache/data9/adaptor.mars.internal-1698335182.29258-11653-11-20c97bc4-15df-4311-8780-07fd8a46ad9d.nc to ./ERA5_SM_2020.nc (52.5K)\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "2023-10-26 17:47:43,337 [INFO]:cdsapi:Downloading https://download-0012-clone.copernicus-climate.eu/cache-compute-0012/cache/data9/adaptor.mars.internal-1698335182.29258-11653-11-20c97bc4-15df-4311-8780-07fd8a46ad9d.nc to ./ERA5_SM_2020.nc (52.5K)\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "2023-10-26 17:47:43,573 INFO Download rate 222.6K/s\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "2023-10-26 17:47:43,573 [INFO]:cdsapi:Download rate 222.6K/s\n",
+      "2023-10-26 17:47:43,593 [INFO] Downloaded soil moisture now available at ./ERA5_SM_2020.nc\n",
+      "2023-10-26 17:47:43,593 [INFO]:smodex.downloader:Downloaded soil moisture now available at ./ERA5_SM_2020.nc\n",
+      "2023-10-26 17:47:43,594 [INFO] downloading ERA5 Soil Moisture from CDS API for 2021\n",
+      "2023-10-26 17:47:43,594 [INFO]:smodex.downloader:downloading ERA5 Soil Moisture from CDS API for 2021\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "2023-10-26 17:47:43,607 INFO Welcome to the CDS\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "2023-10-26 17:47:43,607 [INFO]:cdsapi:Welcome to the CDS\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "2023-10-26 17:47:43,607 INFO Sending request to https://cds.climate.copernicus.eu/api/v2/resources/reanalysis-era5-single-levels\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "2023-10-26 17:47:43,607 [INFO]:cdsapi:Sending request to https://cds.climate.copernicus.eu/api/v2/resources/reanalysis-era5-single-levels\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "2023-10-26 17:47:43,677 INFO Request is queued\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "2023-10-26 17:47:43,677 [INFO]:cdsapi:Request is queued\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "2023-10-26 17:56:01,903 INFO Request is completed\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "2023-10-26 17:56:01,903 [INFO]:cdsapi:Request is completed\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "2023-10-26 17:56:01,904 INFO Downloading https://download-0018.copernicus-climate.eu/cache-compute-0018/cache/data1/adaptor.mars.internal-1698335725.4849708-27865-3-aab3472d-41b9-4b4f-9465-5a7ca14efe8b.nc to ./ERA5_SM_2021.nc (52.5K)\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "2023-10-26 17:56:01,904 [INFO]:cdsapi:Downloading https://download-0018.copernicus-climate.eu/cache-compute-0018/cache/data1/adaptor.mars.internal-1698335725.4849708-27865-3-aab3472d-41b9-4b4f-9465-5a7ca14efe8b.nc to ./ERA5_SM_2021.nc (52.5K)\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "2023-10-26 17:56:02,044 INFO Download rate 378K/s\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "2023-10-26 17:56:02,044 [INFO]:cdsapi:Download rate 378K/s\n",
+      "2023-10-26 17:56:02,065 [INFO] Downloaded soil moisture now available at ./ERA5_SM_2021.nc\n",
+      "2023-10-26 17:56:02,065 [INFO]:smodex.downloader:Downloaded soil moisture now available at ./ERA5_SM_2021.nc\n"
+     ]
+    }
+   ],
+   "source": [
+    "start_date = \"2020-12-15\"\n",
+    "end_date = \"2021-01-03\"\n",
+    "conf_path = \"download.json\"\n",
+    "download_path = \"./\"\n",
+    "\n",
+    "cds_downloader(start_date, end_date, conf_path, download_path)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "ado_test",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.13"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/smodex_example.ipynb b/examples/smodex_example.ipynb
similarity index 90%
rename from smodex_example.ipynb
rename to examples/smodex_example.ipynb
index 21a8edb..1c25395 100644
--- a/smodex_example.ipynb
+++ b/examples/smodex_example.ipynb
@@ -8,6 +8,23 @@
     "Soil Moisture Anomalies package -- usage example"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Requirement already satisfied: smodex in /home/rbalogun/.conda/envs/ado_test/lib/python3.8/site-packages (1.0.0b0)\n"
+     ]
+    }
+   ],
+   "source": [
+    "! pip install smodex"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 2,
diff --git a/src/smodex/downloader.py b/src/smodex/downloader.py
index 44d5794..3fc301c 100644
--- a/src/smodex/downloader.py
+++ b/src/smodex/downloader.py
@@ -45,7 +45,7 @@ def cds_downloader(start_date: str, end_date: str, conf_path: str, download_path
     year_ranges = np.unique([date.year for date in date_ranges])
 
     for yr in year_ranges:
-        conf["year"] = yr
+        conf["year"] = int(yr)
         logger.info(f"downloading ERA5 Soil Moisture from CDS API for {yr}")
         if not os.path.exists(download_path):
             os.makedirs(download_path)
diff --git a/tests/test_downloader.py b/tests/test_downloader.py
index e685d9f..5f14c4f 100644
--- a/tests/test_downloader.py
+++ b/tests/test_downloader.py
@@ -4,8 +4,7 @@ from unittest.mock import patch
 
 import cdsapi
 import pytest
-from smodex.sm_downloader import downloader
-from smodex.sm_downloader import era5_sm_downloader
+from smodex.downloader import cds_downloader
 
 
 @pytest.fixture
@@ -15,85 +14,23 @@ def mock_cdsapi_client():
     return mock_client
 
 
-@pytest.mark.skip(reason="cdsapi api retrieve method not called correctly")
-@patch("cdsapi.Client")
-def test_era5_sm_downloader(mock_cdsapi_client, tmp_path):
-    year = "2023"
-    download_path = str(tmp_path)
-    depth = [1, 2]
-    area = [50.775, 2.775, 42.275, 18.025]
-
-    # Set up the mock CDS API client
-    mock_cdsapi_client.retrieve.return_value = None
-
-    # Call the function to be tested
-    era5_sm_downloader(year, download_path, depth, area)
-
-    # Check if the CDS API client's retrieve method was called with the expected arguments
-    mock_cdsapi_client.retrieve.assert_called_once_with(
-        "reanalysis-era5-single-levels",
-        {
-            "product_type": "reanalysis",
-            "variable": ["volumetric_soil_water_layer_1", "volumetric_soil_water_layer_2"],
-            "year": year,
-            "month": ["01", "02", "03", "04", "05", "06", "07", "08", "09", "10", "11", "12"],
-            "day": [
-                "01",
-                "02",
-                "03",
-                "04",
-                "05",
-                "06",
-                "07",
-                "08",
-                "09",
-                "10",
-                "11",
-                "12",
-                "13",
-                "14",
-                "15",
-                "16",
-                "17",
-                "18",
-                "19",
-                "20",
-                "21",
-                "22",
-                "23",
-                "24",
-                "25",
-                "26",
-                "27",
-                "28",
-                "29",
-                "30",
-                "31",
-            ],
-            "time": ["00:00", "06:00", "12:00", "18:00"],
-            "area": area,
-            "format": "netcdf",
-        },
-        download_path + f"ERA5_SM_{year}.nc",
-    )
-
-
 def test_downloader(tmp_path, monkeypatch, caplog):
     # test data
-    start_date = "2023-01-01"
-    end_date = "2023-12-31"
+    start_date = "2021-01-01"
+    end_date = "2021-12-31"
+    conf_path = str("download.json")
     download_path = str(tmp_path)
-    api = "era5"
-    depth = [1, 2]
-    area = [50.775, 2.775, 42.275, 18.025]
 
     # Monkeypatch the os.makedirs function to avoid creating directories during testing
     monkeypatch.setattr(os, "makedirs", lambda path: None)
 
-    downloader(start_date, end_date, download_path, api, depth, area)
+    cds_downloader(start_date, end_date, conf_path, download_path)
 
-    assert "Initiating downloading of ERA5 Soil Moisture for 2023" in caplog.text
-    assert "Downloaded ERA5 Soil Moisture for 2023" in caplog.text
+    assert "downloading ERA5 Soil Moisture from CDS API for 2021" in caplog.text
+    assert (
+        f"Downloaded soil moisture now available at {download_path+f'ERA5_SM_{2021}.nc'}"
+        in caplog.text
+    )
 
 
 if __name__ == "__main__":
-- 
GitLab