From ece1151a425c471e86ec0af07e0778f2ab69e340 Mon Sep 17 00:00:00 2001 From: Rufai Omowunmi Balogun <rbalogun@eurac.edu> Date: Thu, 26 Oct 2023 18:10:03 +0200 Subject: [PATCH] improve downloader module --- README.md | 64 ++--- docs/source/guide/downloader.rst | 59 +++- examples/download.json | 35 +++ examples/download_example.ipynb | 252 ++++++++++++++++++ .../smodex_example.ipynb | 17 ++ src/smodex/downloader.py | 2 +- tests/test_downloader.py | 83 +----- 7 files changed, 391 insertions(+), 121 deletions(-) create mode 100644 examples/download.json create mode 100644 examples/download_example.ipynb rename smodex_example.ipynb => examples/smodex_example.ipynb (90%) diff --git a/README.md b/README.md index 284f612..f54b68c 100644 --- a/README.md +++ b/README.md @@ -36,7 +36,7 @@ Soil moisture and soil moisture anomalies are critical markers of dryness and ag Installation ================== -The SMODEX package is compatible with Python 3.6+ and would be distributed through the _Python Package Index (PyPI)_ and can installed: +The SMODEX package is compatible with Python 3.6+ and is distributed through the [_Python Package Index (PyPI)_](https://pypi.org/project/smodex/) and can installed via: ```bash @@ -48,50 +48,44 @@ pip install smodex Usage examples =============== Currently, the main subpackages in SMODEX include: --- **sm_downloader**: contains a request to the ERA5 Climate Datastore for downloading and saving soil water contents based on the specified timeline necessary for computing an anomaly, --- **sm_climatology**: contains functionalities for computing soil moisture climatology with a given reference, and rolling mean at a monthly or dekadal scale, --- **sm_anomaly**: contains functionalities for computing and saving soil moisture anomalies, --- **sm_visualize**: contains different functionalities for visualizing the soil moisture and soil moisture anomalies across spatial and temporal dimensions (coming soon), --- **tests**: test for data handling, and computation moisture and moisture anomalies computation, --- **docs**: readthedocs implementation +- **downloader**: contains a request to the ERA5 Climate Datastore for downloading and saving soil water contents based on the specified timeline necessary for computing an anomaly, +- **climates**: contains functionalities for computing soil moisture climatology with a given reference, and rolling mean at a weekly rolling window, dekadal, monthly or annual scale, +- **anomaly**: contains functionalities for computing, saving soil moisture anomalies, +- **share**: contains functionalities for sharing computed datasets with other scientists using the STAC specification (coming soon), +- **plots**: contains different functionalities for visualizing the soil moisture and soil moisture anomalies across spatial and temporal dimensions (coming soon), +Downloader +---------- +To use the downloader module, see our quick start example [here]("examples/download_example.ipynb") -Download Module ---------------- -You can use the `sm_downloader` with your python script or notebook this way: - - - -```python -from smodex import sm_downloader - -download = sm_downloader.SMDownload( - start_date='2020-05-05', - end_date='2021-05-10', - api="era5", - area=[50.775, 2.775, 42.275, 18.025], - depth=[1, 2, 3, 4], - download_path= "./sm_downloaded/" - ) +Within a python script, this can serve as a quick guide: +```python +from smodex.downloader import cds_downloader -download.downloader() +start_date = '2020-12-15' +end_date = '2021-01-03' +conf_path = 'download.json' +download_path = 'moisture_data/' +cds_downloader(start_date, + end_date, + conf_path, + download_path) ``` - - -But note, for this module to work locally, you shold have the `ERA5 CDS API` installed on your machine. -To do this, you simply run: +However, this requires the installation of the `ERA5 CDS API` as well as a specification of your API keys in a `.env` file following this examples: +Run: ```bash pip install cdsapi ``` -Additionally, create an `.env` file and set: +And set the following values to those of your CDS API keys ```yaml +# .env CDSAPI_URL=https://cds.climate.copernicus.eu/api/v2 CDSAPI_KEY=XXXXXX:XXXXXX-XXXXXX-XXXX-XXXXX ``` @@ -99,16 +93,6 @@ Remember to change the CDSAPI_KEY to that of your personal or organization's acc You can find more specific details on how to access your personal CDS API key [here](https://cds.climate.copernicus.eu/api-how-to) -Other modules can also be imported and used by: - - -```python -import smodex -smodex.sm_downloader() -smodex.compute_climatology() -smodex.compute_anomalies() -``` - Contributing ================ diff --git a/docs/source/guide/downloader.rst b/docs/source/guide/downloader.rst index 728a4a2..c6a232a 100644 --- a/docs/source/guide/downloader.rst +++ b/docs/source/guide/downloader.rst @@ -1,12 +1,57 @@ Downloader -============ +========== +The smodex downloader provides a quick way to download climate datasets for your use case from the +[ERA5 Climate Data Store](https://cds.climate.copernicus.eu/cdsapp#!/search?type=dataset) by +specifying the details of the requested datasets in a JSON file and providing all the neccesary +information for downloading the datasets you need. -ERA5 API ----------- -Reference Period ------------------- -Area of Interest ------------------- +Step One +-------- +Ensure you have the `ERA5 CDS API` installed in your development environment. This can be done by: +.. code-block:: bash + pip install cdsapi + +This provides all the neccesary functionalities and backends for accessing the CDS datasets. + +Next to this, create a configuration file (JSON file) that contains all the data specification for +the data you would like to request, example: + +.. code-block:: json-object + { + "product_type": "reanalysis", + "variable": [ + "volumetric_soil_water_layer_1", + "volumetric_soil_water_layer_2", + "volumetric_soil_water_layer_3", + "volumetric_soil_water_layer_4" + ], + "year": 1981, + "month": [ "01", "02", "03"], + "day": ["01", "08", "16", "24", "30"], + "time": ["00:00", "06:00", "12:00", "18:00"], + "area": [47.148, 10.255, 46.297, 12.542], + "format": "netcdf" + } + +This JSON file in general should contain the information on your **Area of Interest** and other +specific information on the datasets you would like to download. + +Step Two: +--------- +Specify the time range (start date and end date) and download your data to the specified path: + +.. code-block:: python + from smodex.downloader import cds_downloader + start_date = '2010-01-01' + end_date = '2020-12-31' + conf_path = 'download.json' + download_path = 'moisture_data/' + + cds_downloader(start_date = start_date, + end_date = end_date, + conf_path = conf_path, + download_path = download_path + ) diff --git a/examples/download.json b/examples/download.json new file mode 100644 index 0000000..5ccff84 --- /dev/null +++ b/examples/download.json @@ -0,0 +1,35 @@ +{ + "product_type": "reanalysis", + "variable": [ + "volumetric_soil_water_layer_1", + "volumetric_soil_water_layer_2", + "volumetric_soil_water_layer_3", + "volumetric_soil_water_layer_4" + ], + "year": 1981, + "month": [ + "01", + "02", + "03", + "04", + "05" + ], + "day": [ + "01", + "02", + "03", + "04", + "05", + "06", + "07", + "08" + ], + "time": [ + "00:00", + "06:00", + "12:00", + "18:00" + ], + "area": [47.148, 10.255, 46.297, 12.542], + "format": "netcdf" +} diff --git a/examples/download_example.ipynb b/examples/download_example.ipynb new file mode 100644 index 0000000..53fb3c0 --- /dev/null +++ b/examples/download_example.ipynb @@ -0,0 +1,252 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## smodex downloader\n", + "In this notebook, we show an example of how to quickly utilize the `smodex downloader` module with the `cds_downloader` functionality that allows you to download climate datasets from the Climate Data Store using your own your personal or organization CDS token. \n", + "\n", + "In order for this function to effectively work with the CDS API, you are expected to specify your API keys (consult the [README.md](https://gitlab.inf.unibz.it/earth_observation_public/smodex/-/blob/master/README.md?ref_type=heads)) in a .env file. \n", + "\n", + "Additionally, you can create a JSON file that contains the specification of the dataset you would like to download (see examples/download.json for example on the expected structure)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In this notebook, we showcase an example on how to use the smodex package to quickly download a set of volumetric soil moisture layers for the **Autonomonous Province of Bolzano, South Tyrol** for only the synoptic hours *(\"00:00\", \"06:00\", \"12:00\", \"18:00\")* \n", + "\n", + "Take a look at the download.json file to see how this configuration were specified." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "from smodex.downloader import cds_downloader" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2023-10-26 17:37:24,830 [INFO] downloading ERA5 Soil Moisture from CDS API for 2020\n", + "2023-10-26 17:37:24,830 [INFO]:smodex.downloader:downloading ERA5 Soil Moisture from CDS API for 2020\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-10-26 17:37:24,930 INFO Welcome to the CDS\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2023-10-26 17:37:24,930 [INFO]:cdsapi:Welcome to the CDS\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-10-26 17:37:24,931 INFO Sending request to https://cds.climate.copernicus.eu/api/v2/resources/reanalysis-era5-single-levels\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2023-10-26 17:37:24,931 [INFO]:cdsapi:Sending request to https://cds.climate.copernicus.eu/api/v2/resources/reanalysis-era5-single-levels\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-10-26 17:37:25,048 INFO Request is queued\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2023-10-26 17:37:25,048 [INFO]:cdsapi:Request is queued\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-10-26 17:47:43,336 INFO Request is completed\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2023-10-26 17:47:43,336 [INFO]:cdsapi:Request is completed\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-10-26 17:47:43,337 INFO Downloading https://download-0012-clone.copernicus-climate.eu/cache-compute-0012/cache/data9/adaptor.mars.internal-1698335182.29258-11653-11-20c97bc4-15df-4311-8780-07fd8a46ad9d.nc to ./ERA5_SM_2020.nc (52.5K)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2023-10-26 17:47:43,337 [INFO]:cdsapi:Downloading https://download-0012-clone.copernicus-climate.eu/cache-compute-0012/cache/data9/adaptor.mars.internal-1698335182.29258-11653-11-20c97bc4-15df-4311-8780-07fd8a46ad9d.nc to ./ERA5_SM_2020.nc (52.5K)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-10-26 17:47:43,573 INFO Download rate 222.6K/s\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2023-10-26 17:47:43,573 [INFO]:cdsapi:Download rate 222.6K/s\n", + "2023-10-26 17:47:43,593 [INFO] Downloaded soil moisture now available at ./ERA5_SM_2020.nc\n", + "2023-10-26 17:47:43,593 [INFO]:smodex.downloader:Downloaded soil moisture now available at ./ERA5_SM_2020.nc\n", + "2023-10-26 17:47:43,594 [INFO] downloading ERA5 Soil Moisture from CDS API for 2021\n", + "2023-10-26 17:47:43,594 [INFO]:smodex.downloader:downloading ERA5 Soil Moisture from CDS API for 2021\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-10-26 17:47:43,607 INFO Welcome to the CDS\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2023-10-26 17:47:43,607 [INFO]:cdsapi:Welcome to the CDS\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-10-26 17:47:43,607 INFO Sending request to https://cds.climate.copernicus.eu/api/v2/resources/reanalysis-era5-single-levels\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2023-10-26 17:47:43,607 [INFO]:cdsapi:Sending request to https://cds.climate.copernicus.eu/api/v2/resources/reanalysis-era5-single-levels\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-10-26 17:47:43,677 INFO Request is queued\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2023-10-26 17:47:43,677 [INFO]:cdsapi:Request is queued\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-10-26 17:56:01,903 INFO Request is completed\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2023-10-26 17:56:01,903 [INFO]:cdsapi:Request is completed\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-10-26 17:56:01,904 INFO Downloading https://download-0018.copernicus-climate.eu/cache-compute-0018/cache/data1/adaptor.mars.internal-1698335725.4849708-27865-3-aab3472d-41b9-4b4f-9465-5a7ca14efe8b.nc to ./ERA5_SM_2021.nc (52.5K)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2023-10-26 17:56:01,904 [INFO]:cdsapi:Downloading https://download-0018.copernicus-climate.eu/cache-compute-0018/cache/data1/adaptor.mars.internal-1698335725.4849708-27865-3-aab3472d-41b9-4b4f-9465-5a7ca14efe8b.nc to ./ERA5_SM_2021.nc (52.5K)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-10-26 17:56:02,044 INFO Download rate 378K/s\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2023-10-26 17:56:02,044 [INFO]:cdsapi:Download rate 378K/s\n", + "2023-10-26 17:56:02,065 [INFO] Downloaded soil moisture now available at ./ERA5_SM_2021.nc\n", + "2023-10-26 17:56:02,065 [INFO]:smodex.downloader:Downloaded soil moisture now available at ./ERA5_SM_2021.nc\n" + ] + } + ], + "source": [ + "start_date = \"2020-12-15\"\n", + "end_date = \"2021-01-03\"\n", + "conf_path = \"download.json\"\n", + "download_path = \"./\"\n", + "\n", + "cds_downloader(start_date, end_date, conf_path, download_path)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "ado_test", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.13" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/smodex_example.ipynb b/examples/smodex_example.ipynb similarity index 90% rename from smodex_example.ipynb rename to examples/smodex_example.ipynb index 21a8edb..1c25395 100644 --- a/smodex_example.ipynb +++ b/examples/smodex_example.ipynb @@ -8,6 +8,23 @@ "Soil Moisture Anomalies package -- usage example" ] }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: smodex in /home/rbalogun/.conda/envs/ado_test/lib/python3.8/site-packages (1.0.0b0)\n" + ] + } + ], + "source": [ + "! pip install smodex" + ] + }, { "cell_type": "code", "execution_count": 2, diff --git a/src/smodex/downloader.py b/src/smodex/downloader.py index 44d5794..3fc301c 100644 --- a/src/smodex/downloader.py +++ b/src/smodex/downloader.py @@ -45,7 +45,7 @@ def cds_downloader(start_date: str, end_date: str, conf_path: str, download_path year_ranges = np.unique([date.year for date in date_ranges]) for yr in year_ranges: - conf["year"] = yr + conf["year"] = int(yr) logger.info(f"downloading ERA5 Soil Moisture from CDS API for {yr}") if not os.path.exists(download_path): os.makedirs(download_path) diff --git a/tests/test_downloader.py b/tests/test_downloader.py index e685d9f..5f14c4f 100644 --- a/tests/test_downloader.py +++ b/tests/test_downloader.py @@ -4,8 +4,7 @@ from unittest.mock import patch import cdsapi import pytest -from smodex.sm_downloader import downloader -from smodex.sm_downloader import era5_sm_downloader +from smodex.downloader import cds_downloader @pytest.fixture @@ -15,85 +14,23 @@ def mock_cdsapi_client(): return mock_client -@pytest.mark.skip(reason="cdsapi api retrieve method not called correctly") -@patch("cdsapi.Client") -def test_era5_sm_downloader(mock_cdsapi_client, tmp_path): - year = "2023" - download_path = str(tmp_path) - depth = [1, 2] - area = [50.775, 2.775, 42.275, 18.025] - - # Set up the mock CDS API client - mock_cdsapi_client.retrieve.return_value = None - - # Call the function to be tested - era5_sm_downloader(year, download_path, depth, area) - - # Check if the CDS API client's retrieve method was called with the expected arguments - mock_cdsapi_client.retrieve.assert_called_once_with( - "reanalysis-era5-single-levels", - { - "product_type": "reanalysis", - "variable": ["volumetric_soil_water_layer_1", "volumetric_soil_water_layer_2"], - "year": year, - "month": ["01", "02", "03", "04", "05", "06", "07", "08", "09", "10", "11", "12"], - "day": [ - "01", - "02", - "03", - "04", - "05", - "06", - "07", - "08", - "09", - "10", - "11", - "12", - "13", - "14", - "15", - "16", - "17", - "18", - "19", - "20", - "21", - "22", - "23", - "24", - "25", - "26", - "27", - "28", - "29", - "30", - "31", - ], - "time": ["00:00", "06:00", "12:00", "18:00"], - "area": area, - "format": "netcdf", - }, - download_path + f"ERA5_SM_{year}.nc", - ) - - def test_downloader(tmp_path, monkeypatch, caplog): # test data - start_date = "2023-01-01" - end_date = "2023-12-31" + start_date = "2021-01-01" + end_date = "2021-12-31" + conf_path = str("download.json") download_path = str(tmp_path) - api = "era5" - depth = [1, 2] - area = [50.775, 2.775, 42.275, 18.025] # Monkeypatch the os.makedirs function to avoid creating directories during testing monkeypatch.setattr(os, "makedirs", lambda path: None) - downloader(start_date, end_date, download_path, api, depth, area) + cds_downloader(start_date, end_date, conf_path, download_path) - assert "Initiating downloading of ERA5 Soil Moisture for 2023" in caplog.text - assert "Downloaded ERA5 Soil Moisture for 2023" in caplog.text + assert "downloading ERA5 Soil Moisture from CDS API for 2021" in caplog.text + assert ( + f"Downloaded soil moisture now available at {download_path+f'ERA5_SM_{2021}.nc'}" + in caplog.text + ) if __name__ == "__main__": -- GitLab