From b0cdf5cd3e0a59d053205c048171ec88ca9457e5 Mon Sep 17 00:00:00 2001 From: Rufai Omowunmi Balogun <rbalogun@eurac.edu> Date: Thu, 26 Oct 2023 15:33:04 +0200 Subject: [PATCH] refactor downloader module --- src/smodex/__init__.py | 4 +- src/smodex/downloader.py | 93 +++++++++++++++++++++ src/smodex/sm_downloader.py | 161 ------------------------------------ 3 files changed, 95 insertions(+), 163 deletions(-) create mode 100644 src/smodex/downloader.py delete mode 100644 src/smodex/sm_downloader.py diff --git a/src/smodex/__init__.py b/src/smodex/__init__.py index 45b4eb8..7493314 100644 --- a/src/smodex/__init__.py +++ b/src/smodex/__init__.py @@ -1,8 +1,8 @@ +from smodex import downloader from smodex import sm_anomaly from smodex import sm_climatology -from smodex import sm_downloader from smodex import version from smodex import visual_sma_ts -__all__ = ["sm_anomaly", "sm_climatology", "sm_downloader", "visual_sma_ts", "version"] +__all__ = ["sm_anomaly", "sm_climatology", "downloader", "visual_sma_ts", "version"] diff --git a/src/smodex/downloader.py b/src/smodex/downloader.py new file mode 100644 index 0000000..44d5794 --- /dev/null +++ b/src/smodex/downloader.py @@ -0,0 +1,93 @@ +""" + Soil Moisture Downloader: Configured to download datasets from the Climate Data Store + Downloads hourly soil moisture datasets for full year +""" + +import argparse +import json +import logging +import os +import sys + +import numpy as np +import pandas as pd + +import cdsapi + + +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) +formatter = logging.Formatter("%(asctime)s [%(levelname)s] %(message)s") +stream_handler = logging.StreamHandler(sys.stdout) +stream_handler.setFormatter(formatter) +logger.addHandler(stream_handler) + + +api_client = cdsapi.Client() + + +def cds_downloader(start_date: str, end_date: str, conf_path: str, download_path: str) -> None: + """Downloads specified variables (e.g. volumetric soil water content) detailed in a + config file from ERA5 Climate Data Store for a specified time range (start and end date) + + Args: + start_date (str): initial date to start downloading datasets from e.g. '2001-01-01' + end_date (str): last date to stop downloading datasets for e.g. '2045-12-31' + conf_path (str): directory to CDS API configuration json file e.g. \ + configs/download_conf.json + download_path (str): directory to save the downloaded datasets in e.g. era_sm/ + """ + + with open(conf_path) as file: + conf = json.load(file) + + date_ranges = pd.date_range(start=start_date, end=end_date) + year_ranges = np.unique([date.year for date in date_ranges]) + + for yr in year_ranges: + conf["year"] = yr + logger.info(f"downloading ERA5 Soil Moisture from CDS API for {yr}") + if not os.path.exists(download_path): + os.makedirs(download_path) + api_client.retrieve( + "reanalysis-era5-single-levels", conf, download_path + f"ERA5_SM_{yr}.nc" + ) + logger.info(f"Downloaded soil moisture now available at {download_path+f'ERA5_SM_{yr}.nc'}") + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="Downloads soil moisture datasets from start date \ + to end date from ERA5 Climate Data Store" + ) + + parser.add_argument( + "start_date", + type=str, + help="initial date to start \ + downloading from e.g. 2001-01-01", + ) + + parser.add_argument( + "end_date", + type=str, + help="end date to stop \ + downloading datasets from e.g. 2030-12-31", + ) + + parser.add_argument( + "conf_path", + type=str, + help="directory to download configuration json file e.g. configs/download.json", + ) + + parser.add_argument( + "download_path", + type=str, + help="directory to save the \ + downloaded datasets e.g. sm_downloaded/", + ) + + args = parser.parse_args() + + cds_downloader(args.start_date, args.end_date, args.conf_path, args.download_path) diff --git a/src/smodex/sm_downloader.py b/src/smodex/sm_downloader.py deleted file mode 100644 index 5b2e4e2..0000000 --- a/src/smodex/sm_downloader.py +++ /dev/null @@ -1,161 +0,0 @@ -""" - Soil Moisture Downloader: Configured to download datasets from the Climate Data Store - Downloads hourly soil moisture datasets for full year -""" -import argparse -import json -import logging -import os -import sys - -import numpy as np -import pandas as pd - -import cdsapi - - -logger = logging.getLogger(__name__) -logger.setLevel(logging.INFO) -formatter = logging.Formatter("%(asctime)s [%(levelname)s] %(message)s") -stream_handler = logging.StreamHandler(sys.stdout) -stream_handler.setFormatter(formatter) -logger.addHandler(stream_handler) - - -c = cdsapi.Client() - - -def era5_sm_downloader(year: str, download_path: str, depth: list, area: list) -> None: - """downloader from ERA5 API""" - - variables = [] - for dep in depth: - variables.append(f"volumetric_soil_water_layer_{dep}") - - c.retrieve( - "reanalysis-era5-single-levels", - { - "product_type": "reanalysis", - "variable": variables, - "year": int(year), - "month": [ - "01", - "02", - "03", - "04", - "05", - "06", - "07", - "08", - "09", - "10", - "11", - "12", - ], - "day": [ - "01", - "02", - "03", - "04", - "05", - "06", - "07", - "08", - "09", - "10", - "11", - "12", - "13", - "14", - "15", - "16", - "17", - "18", - "19", - "20", - "21", - "22", - "23", - "24", - "25", - "26", - "27", - "28", - "29", - "30", - "31", - ], - "time": [ - "00:00", - "06:00", - "12:00", - "18:00", - ], - "area": area, - "format": "netcdf", - }, - download_path + f"ERA5_SM_{year}.nc", - ) - - -def downloader( - start_date: str, end_date: str, download_path: str, api: str, depth: list, area: list -) -> None: - """download""" - - date_ranges = pd.date_range(start=start_date, end=end_date) - date_ranges = [date.year for date in date_ranges] - years = np.unique(date_ranges) - - for yr in years: - if api == "era5": - logger.info(f"Initiating downloading of ERA5 Soil Moisture for {yr}") - if not os.path.exists(download_path): - os.makedirs(download_path) - era5_sm_downloader(yr, download_path, depth, area) - logger.info(f"Downloaded ERA5 Soil Moisture for {yr}") - - -if __name__ == "__main__": - # command line option - parser = argparse.ArgumentParser( - description="Downloads soil moisture \ - datasets from start date to end date" - ) - parser.add_argument( - "start_date", - type=str, - help="initial date to start \ - downloading from e.g. 1990-01-01", - ) - parser.add_argument( - "end_date", - type=str, - help="end date to stop \ - downloading datasets from e.g. 2030-12-31", - ) - parser.add_argument("api", type=str, help="download portal API e.g. era5, lpdaac, etc.") - parser.add_argument( - "-a", - "--area", - type=json.loads, - help="bounding box area for downloading \ - datasets e.g. [50.775, 2.775, 42.275, 18.025]", - ) - parser.add_argument( - "-d", - "--depth", - type=json.loads, - help="volumetric \ - soil moisture depths e.g. [1, 2, 3, 4]", - ) - parser.add_argument( - "path", - type=str, - help="directory to save the \ - downloaded datasets e.g. /sm_downloaded/", - ) - - args = parser.parse_args() - - downloader(args.start_date, args.end_date, args.path, args.api, args.depth, args.area) -- GitLab