From e0898c8c12df6fd6c9ddc6a4a7050998e42c0702 Mon Sep 17 00:00:00 2001 From: "Daniel.Frisinghelli" <daniel.frisinghelli@eurac.edu> Date: Fri, 11 Dec 2020 15:07:14 +0100 Subject: [PATCH] Added support for hdf file format. --- pysegcnn/core/utils.py | 174 +++++++++++++++++++++++++++++++++++++++-- 1 file changed, 169 insertions(+), 5 deletions(-) diff --git a/pysegcnn/core/utils.py b/pysegcnn/core/utils.py index ba5f553..3a5ee0b 100644 --- a/pysegcnn/core/utils.py +++ b/pysegcnn/core/utils.py @@ -82,7 +82,7 @@ def img2np(path, tile_size=None, tile=None, pad=False, cval=0): Parameters ---------- - path : `str` or `None` or :py:class:`numpy.ndarray` + path : `str` or :py:class:`pathlib.Path` or :py:class:`numpy.ndarray` The image to read. tile_size : `None` or `int`, optional The size of a tile. The default is `None`. @@ -113,11 +113,10 @@ def img2np(path, tile_size=None, tile=None, pad=False, cval=0): """ # check the type of path - if isinstance(path, str): - if not os.path.exists(path): - raise FileNotFoundError('{} does not exist.'.format(path)) + if isinstance(path, str) or isinstance(path, pathlib.Path): + # the image dataset as returned by gdal - img = gdal.Open(path) + img = gdal.Open(str(path)) # number of bands bands = img.RasterCount @@ -252,6 +251,171 @@ def img2np(path, tile_size=None, tile=None, pad=False, cval=0): return image +def read_hdf(path, **kwargs): + """Read a file in Hierarchical Data Format (HDF) to a dictionary. + + Parameters + ---------- + path : `str` or py:class:`pathlib.Path` + The path to the hdf file to read. + **kwargs: + Additional keyword arguments passed to + :py:func:`pysegcnn.core.utils.img2np`. + + Raises + ------ + ValueError + Raised if ``path`` is not an hdf file. + + Returns + ------- + hdf_ds : `dict` [`str`, :py:class:`numpy.ndarray`] + The hdf file as a dictionary. The keys are the names of the + different datasets in the hdf file and the values are the corresponding + data as a :py:class:`numpy.ndarray`. + + """ + # check if the path points to an hdf file + if not str(path).endswith(('.h4', '.hdf', '.hdf4', '.hdf5', '.he2', '.h5', + '.he5')): + raise ValueError('{} is not an hdf file.'.format(path)) + + # read the hdf dataset + hdf = gdal.Open(str(path)).GetSubDatasets() + + # iterate over the different subsets and store arrays in dictionary + hdf_ds = {} + for ds in hdf: + # name of the current dataset + name = ds[0].split(':')[-1] + + # read the dataset to a numpy array + data = img2np(ds[0], **kwargs) + + # store in dictionary + hdf_ds[name] = data + + return hdf_ds + + +def hdf2tifs(path, outpath=None, overwrite=False, create_stack=True, **kwargs): + """Convert a file in Hierarchical Data Format (HDF) to GeoTIFFs. + + The GeoTIFFs share the same filename as ``path``, appended by the name of + the respective subdatasets. + + The default (``outpath=None``) is to save the GeoTIFFs in a directory named + after the filename of ``path``, within the parent directory of ``path``. + + Parameters + ---------- + path : `str` or py:class:`pathlib.Path` + The path to the hdf file to convert. + outpath : `str` or py:class:`pathlib.Path`, optional + Path to save the GeoTIFF files. The default is `None`. + overwrite : `bool`, optional + Whether to overwrite existing GeoTIFF files in ``outpath``. The default + is `False`. + create_stack : `bool`, optional + Whether to create a GeoTIFF stack of all the subdatasets in ``path``. + The default is `True`. + **kwargs : + Additional keyword arguments passed to :py:func:`gdal.Translate`. + + """ + # check if the path points to an hdf file + path = pathlib.Path(path) + if not str(path).endswith(('.h4', '.hdf', '.hdf4', '.hdf5', '.he2', '.h5', + '.he5')): + raise ValueError('{} is not an hdf file.'.format(path)) + + # check whether an output path is defined + if outpath is None: + outpath = path.parent + else: + outpath = pathlib.Path(outpath) + + # create the output directory + outpath = outpath.joinpath(path.stem.replace('.', '_')) + + # check whether the output path exists + if not outpath.exists(): + LOGGER.info('mkdir {}'.format(str(outpath))) + outpath.mkdir(parents=True, exist_ok=True) + + # check whether the output path contains GeoTIFF files + tifs = [f for f in outpath.iterdir() if f.suffix in ['.tif', '.TIF']] + + # check whether to overwrite existing files + if tifs: + LOGGER.info('The following files already exist in {}' + .format(str(outpath))) + LOGGER.info('\n'.join(['{}'.format(str(tif.name)) for tif in tifs]) + ) + if not overwrite: + # return if not overwriting + LOGGER.info('Aborting...') + return + + # remove existing files and prepare to overwrite + LOGGER.info('Overwrite {}'.format(str(outpath))) + for tif in tifs: + tif.unlink() + + # read the hdf dataset + hdf = gdal.Open(str(path)).GetSubDatasets() + + # iterate over the different subdatasets in the hdf + for ds in hdf: + + # name of the current subdataset + name = ds[0].split(':')[-1] + + # filename of the GeoTIFF + tif_name = outpath.joinpath(path.name.replace(path.suffix, + '_{}.tif'.format(name))) + + # convert hdf subdataset to GeoTIFF + gdal.Translate(str(tif_name), gdal.Open(ds[0]), **kwargs) + + # check whether to create a GeoTIFF stack + if create_stack: + # filename for the GeoTIFF stack + stk = tif_name.parent.joinpath(path.name.replace(path.suffix, '.tif')) + LOGGER.info('Creating GeoTIFF stack: {}'.format(stk)) + + # generated GeoTIFF files + tifs = [str(f) for f in outpath.iterdir() if f.suffix in + ['.tif', '.TIF']] + + # create stacked GeoTIFF + stack_tifs(str(stk), tifs) + + return + + +def stack_tifs(filename, tifs, **kwargs): + """Create a stacked GeoTIFF from a list of single-band GeoTIFFs. + + Parameters + ---------- + filename : `str` or py:class:`pathlib.Path` + The filename of the stacked GeoTIFF ending with `.tif`. + tifs : `list` [`str`] + The list of the paths to the GeoTIFF files to stack. + **kwargs : + Additional keyword arguments passed to :py:func:`gdal.Translate`. + + """ + # build virtual raster dataset + vrt = str(filename).replace('.tif', '.vrt') + vrt_ds = gdal.BuildVRT(str(vrt), tifs, separate=True) + + # create GeoTIFF stack + gdal.Translate(str(filename), vrt_ds, **kwargs) + + return + def is_divisible(img_size, tile_size, pad=False): """Check whether an image is evenly divisible into square tiles. -- GitLab