Skip to content
Snippets Groups Projects
Commit 9679e303 authored by Zvolenský Juraj's avatar Zvolenský Juraj
Browse files

Conversion and validation to COG

parent ce21a3f4
No related branches found
No related tags found
No related merge requests found
%% Cell type:markdown id: tags:
## Comparing the data before and after
For quality control, the original NetCDF data is compared to the newly created COGs. \
This is to ensure that the spatial referencing and data values are consistent between the two formats. \
%% Cell type:code id: tags:
``` python
import xarray as xr
import numpy as np
import rasterio
import matplotlib.pyplot as plt
```
%% Cell type:code id: tags:
``` python
swe_netcdf_path = './data/venosta_hy1718_swe.nc'
swe_cog_path = './output_cog/venosta_1718_cog/venosta_1718_swe_2017-10-03.tif'
swe_nc_data = xr.open_dataset(swe_netcdf_path)
swe_cog_data = rasterio.open(swe_cog_path)
```
%% Cell type:code id: tags:
``` python
swe_nc_data.SWE.isel(time=2).plot()
```
%% Cell type:code id: tags:
``` python
band1 = swe_cog_data.read(1)
vmin, vmax = np.nanmin(band1), np.nanmax(band1)
# Use these values to set the color limits in the plot
im = plt.imshow(band1, cmap='viridis', aspect='auto', vmin=vmin, vmax=vmax)
plt.colorbar(im, label='Your variable label')
plt.show()
```
%% Cell type:code id: tags:
``` python
# Compare the statistics of the two datasets to verify that the COG values match the NetCDF values
netcdf_data = swe_nc_data
netcdf_values = netcdf_data['SWE'].isel(time=2).values
cog_values = band1
```
%% Cell type:code id: tags:
``` python
# Verify the coordinates of the COG and NetCDF data
# Get coordinates from NetCDF data
x_index, y_index = 0, 0
x_coord_netcdf, y_coord_netcdf = get_netcdf_coords(netcdf_data, x_index, y_index)
print(f"NetCDF coordinates: {x_coord_netcdf}, {y_coord_netcdf}")
# Get coordinates from COG data
with rasterio.open('./output_cog/venosta_1718_cog/venosta_1718_swe_2017-10-03.tif') as ds:
# Get coordinates from COG data
row_index, col_index = 0, 0 # Replace with your desired indices
x_coord_cog, y_coord_cog = get_cog_coords(ds, row_index, col_index)
print(f"COG coordinates: {x_coord_cog}, {y_coord_cog}")
```
%% Cell type:code id: tags:
``` python
validate_cogs(swe_nc_data, './output_cog/venosta_1718_cog')
```
%% Cell type:code id: tags:
``` python
```
%% Cell type:markdown id: tags:
Title
## Converting NetCDF into COGs
Copying the data from the network drive using a wildcard to get SWE maps. \
Then using a function defined in utils.py to convert the NetCDF files into COGs.
%% Cell type:code id: tags:
``` python
import glob
import shutil
import time
import warnings
import xarray as xr
import rasterio
from rasterio.transform import from_origin
import matplotlib.pyplot as plt
from rasterio.errors import NotGeoreferencedWarning
from rio_cogeo.cogeo import cog_translate
from rio_cogeo.profiles import cog_profiles
from utils import netcdf_to_cog
from utils import copy_netcdf_data, netcdf_to_cog
warnings.filterwarnings("ignore", category=NotGeoreferencedWarning)
```
%% Cell type:code id: tags:
``` python
# Find and copy the SWE data to the data folder
pattern = '/mnt/CEPH_PROJECTS/PROSNOW/SENTINEL-2/Daily_SCA/Venosta/*_swe.nc'
save_folder = './data'
netcdf_path = '/mnt/CEPH_PROJECTS/PROSNOW/SENTINEL-2/Daily_SCA/Venosta/*_swe.nc'
output = './data'
files = glob.glob(pattern)
time_start = time.perf_counter()
for file in files:
shutil.copy(file, save_folder)
time_end = time.perf_counter()
print(f"Time to copy {len(files)} files: {round(time_end - time_start, 2)} seconds")
```
%% Cell type:code id: tags:
``` python
dataset = xr.open_dataset('./data/venosta_hy1718_swe.nc', engine='netcdf4')
print(dataset.keys())
copy_netcdf_data(netcdf_path, output)
```
%% Output
KeysView(<xarray.Dataset>
Dimensions: (x: 1220, y: 960, time: 365)
Coordinates:
* x (x) float64 6e+05 6.001e+05 6.001e+05 ... 6.609e+05 6.61e+05
* y (y) float64 5.193e+06 5.193e+06 ... 5.145e+06 5.145e+06
* time (time) datetime64[ns] 2017-10-01 2017-10-02 ... 2018-09-30
reference_time datetime64[ns] ...
Data variables:
spatial_ref int64 ...
SWE (time, y, x) float32 ...)
%% Cell type:code id: tags:
``` python
netcdf_path = './data/venosta_hy1718_swe.nc'
cog_dir = './output_cog/venosta_1718_cog/'
cogs= netcdf_to_cog(netcdf_path, cog_dir)
cogs= netcdf_to_cog(netcdf_path = './data/venosta_hy1718_swe.nc',
cog_dir = './output_cog/venosta_1718_cog/',
swe_name_year = 'venosta_1718_swe'
)
```
%% Output
Reading input: temp.tif
Adding overviews...
Updating dataset tags...
Writing output to: ./output_cog/venosta_1718_cog/cog_2017-10-01.tif
Reading input: temp.tif
Adding overviews...
Updating dataset tags...
Writing output to: ./output_cog/venosta_1718_cog/cog_2017-10-01.tif/cog_2017-10-02.tif
---------------------------------------------------------------------------
CPLE_OpenFailedError Traceback (most recent call last)
Cell In[4], line 4
1 netcdf_path = './data/venosta_hy1718_swe.nc'
2 cog_path = './output_cog/venosta_1718_cog/'
----> 4 cogs= netcdf_to_cog(netcdf_path, cog_path)
File ~/eurac/projects/swe-maps-stac/swe_maps_stac/utils.py:40, in netcdf_to_cog(netcdf_path, cog_path)
37 temp.write(time_slice.SWE.values, 1)
39 cog_profile = cog_profiles.get('deflate')
---> 40 cog_translate('temp.tif', cog_path, cog_profile)
42 time_end = time.perf_counter()
43 print(f'COG creation time: {time_end - time_start:.2f} seconds')
File ~/eurac/projects/swe-maps-stac/.venv/lib/python3.10/site-packages/rio_cogeo/cogeo.py:407, in cog_translate(source, dst_path, dst_kwargs, indexes, nodata, dtype, add_mask, overview_level, overview_resampling, web_optimized, tms, zoom_level_strategy, zoom_level, aligned_levels, resampling, in_memory, config, allow_intermediate_compression, forward_band_tags, forward_ns_tags, quiet, progress_out, temporary_compression, colormap, additional_cog_metadata, use_cog_driver)
404 copy(tmp_dst, dst_path, **dst_kwargs)
406 else:
--> 407 copy(tmp_dst, dst_path, copy_src_overviews=True, **dst_kwargs)
File ~/eurac/projects/swe-maps-stac/.venv/lib/python3.10/site-packages/rasterio/env.py:451, in ensure_env_with_credentials.<locals>.wrapper(*args, **kwds)
448 session = DummySession()
450 with env_ctor(session=session):
--> 451 return f(*args, **kwds)
File rasterio/shutil.pyx:139, in rasterio.shutil.copy()
File rasterio/_err.pyx:221, in rasterio._err.exc_wrap_pointer()
CPLE_OpenFailedError: Attempt to create new tiff file './output_cog/venosta_1718_cog/cog_2017-10-01.tif/cog_2017-10-02.tif' failed: Not a directory
%% Cell type:markdown id: tags:
%% Cell type:code id: tags:
``` python
```
%% Cell type:code id: tags:
/home/jzvolensky/eurac/projects/swe-maps-stac/swe_maps_stac/utils.py:30: FutureWarning: The return type of `Dataset.dims` will be changed to return a set of dimension names in future, in order to be more consistent with `DataArray.dims`. To access a mapping from dimension names to lengths, please use `Dataset.sizes`.
transform = rasterio.transform.from_bounds(dataset.x[0], dataset.y[-1], dataset.x[-1], dataset.y[0], dataset.dims['x'], dataset.dims['y'])
``` python
# Visual check of the COG
```
COG creation time: 10.03 seconds
......
import os
import re
import time
import glob
import shutil
import xarray as xr
import numpy as np
import rasterio
import pyproj
from rasterio.transform import from_origin
from rio_cogeo.cogeo import cog_translate
from rio_cogeo.profiles import cog_profiles
from multiprocessing import Pool
def netcdf_to_cog(netcdf_path: str, cog_dir: str):
def netcdf_to_cog(netcdf_path: str, cog_dir: str, swe_name_year: str) -> None:
"""
Convert a SWE netCDF file to a series of Cloud Optimized GeoTIFF (COG) files.
......@@ -17,27 +19,165 @@ def netcdf_to_cog(netcdf_path: str, cog_dir: str):
Path to the netCDF file.
cog_dir : str
Directory for the output COG files.
swe_name_year : str
Name of the SWE dataset and year. This is used in the COG file names.
"""
time_start = time.perf_counter()
time_start: float = time.perf_counter()
os.makedirs(cog_dir, exist_ok=True)
dataset = xr.open_dataset(netcdf_path)
spatial_ref = dataset['spatial_ref'].attrs['crs_wkt']
spatial_ref = dataset["spatial_ref"].attrs["crs_wkt"]
crs = pyproj.CRS.from_string(spatial_ref)
transform = from_origin(0, 0, 1, 1)
transform = rasterio.transform.from_bounds(
dataset.x[0],
dataset.y[-1],
dataset.x[-1],
dataset.y[0],
dataset.dims["x"],
dataset.dims["y"],
)
for i in range(len(dataset.time)):
# for i in range(len(dataset.time)):
for i in range(10):
time_slice = dataset.isel(time=i)
timestamp = time_slice.time.values.astype('datetime64[D]').astype(str)
cog_file = f'cog_{timestamp}.tif'
timestamp = time_slice.time.values.astype("datetime64[D]").astype(str)
cog_file = f"{swe_name_year}_{timestamp}.tif"
cog_path = os.path.join(cog_dir, cog_file)
with rasterio.open('temp.tif', 'w', driver='GTiff', height=time_slice.y.size, width=time_slice.x.size, count=1, dtype=str(time_slice.SWE.dtype), crs=crs, transform=transform) as temp:
with rasterio.open(
"temp.tif",
"w",
driver="GTiff",
height=time_slice.y.size,
width=time_slice.x.size,
count=1,
dtype=str(time_slice.SWE.dtype),
crs=crs,
transform=transform,
) as temp:
temp.write(time_slice.SWE.values, 1)
cog_profile = cog_profiles.get('deflate')
cog_translate('temp.tif', cog_path, cog_profile)
with rasterio.open("temp.tif") as src:
profile = src.profile
profile.update(
driver="COG",
dtype=rasterio.float32,
nodata=0,
compress="deflate",
predictor=2,
blockxsize=256,
blockysize=256,
tiled=True,
)
with rasterio.open(cog_path, "w", **profile) as dst:
dst.write(src.read(1), 1)
os.remove("temp.tif")
time_end: float = time.perf_counter()
print(f"COG creation time: {time_end - time_start:.2f} seconds")
def copy_netcdf_data(netcdf_path: str, output: str) -> None:
"""
Copy the data from a netCDF file to a new netCDF file.
Parameters
----------
netcdf_path : str
Path to the original netCDF file.
Note: This can be a wildcard path.
output : str
Path where the NetCDF is stored.
"""
files = glob.glob(netcdf_path)
time_start = time.perf_counter()
for file in files:
shutil.copy(file, output)
time_end = time.perf_counter()
print(f"Time to copy {len(files)} files: {round(time_end - time_start, 2)} seconds")
def get_netcdf_coords(dataset, x_index, y_index, engine="netcdf4"):
"""
Extract the x and y coordinates from a netCDF file.
Used for validating the conversion
"""
x_coord = float(dataset.x[x_index].values)
y_coord = float(dataset.y[y_index].values)
return x_coord, y_coord
def get_cog_coords(ds, row_index, col_index):
"""
Extract the x and y coordinates from a COG file.
Used for validating the conversion
"""
x_coord, y_coord = ds.transform * (col_index, row_index)
return x_coord, y_coord
def validate_cogs(netcdf_data, cog_dir):
"""
Valide the COGs by comparing the values of coordinates and statistics
with the respective NetCDF timestamp.
Parameters:
netcdf_data : xarray.Dataset
The netCDF dataset
cog_dir : str
The directory containing the COG files
"""
time_start = time.perf_counter()
cog_files = sorted(glob.glob(f"{cog_dir}/*.tif"))
for cog_file in cog_files:
timestamp = re.search(r"\d{4}-\d{2}-\d{2}", cog_file).group(0)
time_slice = netcdf_data.sel(time=timestamp)
with rasterio.open(cog_file) as ds:
cog_values = ds.read(1)
x_coord_cog, y_coord_cog = ds.transform * (0, 0)
x_coord_netcdf, y_coord_netcdf = (
time_slice["x"].values[0],
time_slice["y"].values[0],
)
if abs(x_coord_netcdf - x_coord_cog) > abs(y_coord_netcdf - y_coord_cog):
print(f"Coordinates do not match for timestamp: {timestamp}")
netcdf_min, netcdf_max, netcdf_mean, netcdf_std = (
np.nanmin(time_slice["SWE"].values),
np.nanmax(time_slice["SWE"].values),
np.nanmean(time_slice["SWE"].values),
np.nanstd(time_slice["SWE"].values),
)
cog_min, cog_max, cog_mean, cog_std = (
np.nanmin(cog_values),
np.nanmax(cog_values),
np.nanmean(cog_values),
np.nanstd(cog_values),
)
print(f"\nTimestamp Checked: {timestamp}")
if (
abs(netcdf_min - cog_min)
<= abs(netcdf_max - cog_max)
<= abs(netcdf_mean - cog_mean)
<= abs(netcdf_std - cog_std)
):
print("pass")
else:
print("fail")
time_end = time.perf_counter()
print(f'COG creation time: {time_end - time_start:.2f} seconds')
\ No newline at end of file
print(f"Validation time: {time_end - time_start:.2f} seconds")
print(f"Number of COGs validated: {len(cog_files)}")
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment