Skip to content
Snippets Groups Projects
Commit b0cdf5cd authored by Rufai Omowunmi Balogun's avatar Rufai Omowunmi Balogun
Browse files

refactor downloader module

parent c2e2b5db
No related branches found
Tags v1.1.0b0
No related merge requests found
from smodex import downloader
from smodex import sm_anomaly
from smodex import sm_climatology
from smodex import sm_downloader
from smodex import version
from smodex import visual_sma_ts
__all__ = ["sm_anomaly", "sm_climatology", "sm_downloader", "visual_sma_ts", "version"]
__all__ = ["sm_anomaly", "sm_climatology", "downloader", "visual_sma_ts", "version"]
"""
Soil Moisture Downloader: Configured to download datasets from the Climate Data Store
Downloads hourly soil moisture datasets for full year
"""
import argparse
import json
import logging
import os
import sys
import numpy as np
import pandas as pd
import cdsapi
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
formatter = logging.Formatter("%(asctime)s [%(levelname)s] %(message)s")
stream_handler = logging.StreamHandler(sys.stdout)
stream_handler.setFormatter(formatter)
logger.addHandler(stream_handler)
api_client = cdsapi.Client()
def cds_downloader(start_date: str, end_date: str, conf_path: str, download_path: str) -> None:
"""Downloads specified variables (e.g. volumetric soil water content) detailed in a
config file from ERA5 Climate Data Store for a specified time range (start and end date)
Args:
start_date (str): initial date to start downloading datasets from e.g. '2001-01-01'
end_date (str): last date to stop downloading datasets for e.g. '2045-12-31'
conf_path (str): directory to CDS API configuration json file e.g. \
configs/download_conf.json
download_path (str): directory to save the downloaded datasets in e.g. era_sm/
"""
with open(conf_path) as file:
conf = json.load(file)
date_ranges = pd.date_range(start=start_date, end=end_date)
year_ranges = np.unique([date.year for date in date_ranges])
for yr in year_ranges:
conf["year"] = yr
logger.info(f"downloading ERA5 Soil Moisture from CDS API for {yr}")
if not os.path.exists(download_path):
os.makedirs(download_path)
api_client.retrieve(
"reanalysis-era5-single-levels", conf, download_path + f"ERA5_SM_{yr}.nc"
)
logger.info(f"Downloaded soil moisture now available at {download_path+f'ERA5_SM_{yr}.nc'}")
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="Downloads soil moisture datasets from start date \
to end date from ERA5 Climate Data Store"
)
parser.add_argument(
"start_date",
type=str,
help="initial date to start \
downloading from e.g. 2001-01-01",
)
parser.add_argument(
"end_date",
type=str,
help="end date to stop \
downloading datasets from e.g. 2030-12-31",
)
parser.add_argument(
"conf_path",
type=str,
help="directory to download configuration json file e.g. configs/download.json",
)
parser.add_argument(
"download_path",
type=str,
help="directory to save the \
downloaded datasets e.g. sm_downloaded/",
)
args = parser.parse_args()
cds_downloader(args.start_date, args.end_date, args.conf_path, args.download_path)
"""
Soil Moisture Downloader: Configured to download datasets from the Climate Data Store
Downloads hourly soil moisture datasets for full year
"""
import argparse
import json
import logging
import os
import sys
import numpy as np
import pandas as pd
import cdsapi
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
formatter = logging.Formatter("%(asctime)s [%(levelname)s] %(message)s")
stream_handler = logging.StreamHandler(sys.stdout)
stream_handler.setFormatter(formatter)
logger.addHandler(stream_handler)
c = cdsapi.Client()
def era5_sm_downloader(year: str, download_path: str, depth: list, area: list) -> None:
"""downloader from ERA5 API"""
variables = []
for dep in depth:
variables.append(f"volumetric_soil_water_layer_{dep}")
c.retrieve(
"reanalysis-era5-single-levels",
{
"product_type": "reanalysis",
"variable": variables,
"year": int(year),
"month": [
"01",
"02",
"03",
"04",
"05",
"06",
"07",
"08",
"09",
"10",
"11",
"12",
],
"day": [
"01",
"02",
"03",
"04",
"05",
"06",
"07",
"08",
"09",
"10",
"11",
"12",
"13",
"14",
"15",
"16",
"17",
"18",
"19",
"20",
"21",
"22",
"23",
"24",
"25",
"26",
"27",
"28",
"29",
"30",
"31",
],
"time": [
"00:00",
"06:00",
"12:00",
"18:00",
],
"area": area,
"format": "netcdf",
},
download_path + f"ERA5_SM_{year}.nc",
)
def downloader(
start_date: str, end_date: str, download_path: str, api: str, depth: list, area: list
) -> None:
"""download"""
date_ranges = pd.date_range(start=start_date, end=end_date)
date_ranges = [date.year for date in date_ranges]
years = np.unique(date_ranges)
for yr in years:
if api == "era5":
logger.info(f"Initiating downloading of ERA5 Soil Moisture for {yr}")
if not os.path.exists(download_path):
os.makedirs(download_path)
era5_sm_downloader(yr, download_path, depth, area)
logger.info(f"Downloaded ERA5 Soil Moisture for {yr}")
if __name__ == "__main__":
# command line option
parser = argparse.ArgumentParser(
description="Downloads soil moisture \
datasets from start date to end date"
)
parser.add_argument(
"start_date",
type=str,
help="initial date to start \
downloading from e.g. 1990-01-01",
)
parser.add_argument(
"end_date",
type=str,
help="end date to stop \
downloading datasets from e.g. 2030-12-31",
)
parser.add_argument("api", type=str, help="download portal API e.g. era5, lpdaac, etc.")
parser.add_argument(
"-a",
"--area",
type=json.loads,
help="bounding box area for downloading \
datasets e.g. [50.775, 2.775, 42.275, 18.025]",
)
parser.add_argument(
"-d",
"--depth",
type=json.loads,
help="volumetric \
soil moisture depths e.g. [1, 2, 3, 4]",
)
parser.add_argument(
"path",
type=str,
help="directory to save the \
downloaded datasets e.g. /sm_downloaded/",
)
args = parser.parse_args()
downloader(args.start_date, args.end_date, args.path, args.api, args.depth, args.area)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment