Skip to content
Snippets Groups Projects
conversion.py 2.94 KiB
Newer Older
import os
import time
import xarray as xr
import rasterio
from rasterio.io import MemoryFile
import pyproj
from tqdm import tqdm
from multiprocessing import Pool
from datetime import datetime


def netcdf_to_cog(netcdf_path: str, cog_dir: str) -> None:
    """
    Convert a SWE netCDF file to a series of Cloud Optimized GeoTIFF (COG) files.

    Parameters
    ----------
    netcdf_path : str
        Path to the netCDF file.
    cog_dir : str
        Directory for the output COG files.
    swe_name_year : str
        Name of the SWE dataset and year. This is used in the COG file names.
    """
    time_start: float = time.perf_counter()

    os.makedirs(cog_dir, exist_ok=True)

    dataset = xr.open_dataset(netcdf_path)
    spatial_ref = dataset["spatial_ref"].attrs["crs_wkt"]
    crs = pyproj.CRS.from_string(spatial_ref)
    transform = rasterio.transform.from_bounds(
        dataset.x[0],
        dataset.y[-1],
        dataset.x[-1],
        dataset.y[0],
        dataset.sizes["x"],
        dataset.sizes["y"],
    )

Zvolenský Juraj's avatar
Zvolenský Juraj committed
    for i in tqdm(range(dataset.sizes["time"])):
        time_slice = dataset.isel(time=i)
        timestamp = time_slice.time.values.astype("datetime64[D]").astype(str)

        date = datetime.strptime(timestamp, "%Y-%m-%d")

        formatted_timestamp = date.strftime("%Y%m%d")

        generic_var_name = "swe"
        method_standard = "reconstruction"
        var_type = "c"
        spatial_support = "50m"
        depth_reference = "s"
        bounding_box = "it"
        epsg_code = "epsg.3035"
        current_date = datetime.now()
        version_code = "v" + current_date.strftime("%Y%m%d")

        cog_file = f"{generic_var_name}_{method_standard}_{var_type}_{spatial_support}_{depth_reference}_{formatted_timestamp}_{bounding_box}_{epsg_code}_{version_code}.tif"
        cog_path = os.path.join(cog_dir, cog_file)

        with rasterio.open(
            "temp.tif",
            "w",
            driver="GTiff",
            height=time_slice.y.size,
            width=time_slice.x.size,
            count=1,
            dtype=str(time_slice.SWE.dtype),
            crs=crs,
            transform=transform,
        ) as temp:
            temp.write(time_slice.SWE.values, 1)

        with rasterio.open("temp.tif") as src:
            profile = src.profile
            profile.update(
                driver="COG",
                dtype=rasterio.float32,
                nodata=0,
                compress="deflate",
                predictor=2,
                blockxsize=256,
                blockysize=256,
                tiled=True,
            )

            with rasterio.open(cog_path, "w", **profile) as dst:
                dst.write(src.read(1), 1)

    os.remove("temp.tif")

    time_end: float = time.perf_counter()
    print(f"COG creation time: {time_end - time_start:.2f} seconds")


def process_multiple_datasets(netcdf_paths, cog_dirs):
Zvolenský Juraj's avatar
Zvolenský Juraj committed
    for netcdf_path, cog_dir in zip(netcdf_paths, cog_dirs):
        netcdf_to_cog(netcdf_path, cog_dir)