diff --git a/pysegcnn/core/cli.py b/pysegcnn/core/cli.py new file mode 100644 index 0000000000000000000000000000000000000000..5d734df9a425ca2f78858e747a56a85a218bb4e0 --- /dev/null +++ b/pysegcnn/core/cli.py @@ -0,0 +1,65 @@ +"""Command line interface parsers. + +License +------- + + Copyright (c) 2020 Daniel Frisinghelli + + This source code is licensed under the GNU General Public License v3. + + See the LICENSE file in the repository's root directory. + +""" + +# !/usr/bin/env python +# -*- coding: utf-8 -*- + +# builtins +import pathlib +import argparse + + +# epilogue to display at the end of each parser +EPILOGUE = 'Author: Daniel Frisinghelli, daniel.frisinghelli@gmail.com' + + +def structure_parser(): + """Command line argument parser to standardize dataset structure. + + Returns + ------- + None. + + """ + parser = argparse.ArgumentParser( + description='Standardize the dataset directory structure.', + epilog=EPILOGUE, + formatter_class=lambda prog: argparse.RawDescriptionHelpFormatter( + prog, max_help_position=50, indent_increment=2)) + + # positional arguments + + # positional argument: path to the archive + parser.add_argument('archive', type=pathlib.Path, + help='Path to the dataset archive.') + + # positional argument: path to extract and restructure the dataset + parser.add_argument('target', type=pathlib.Path, + help='Path to save standardized dataset structure.') + + # optional arguments + + # default values + default = '(default: %(default)s)' + + # optional argument: whether to overwrite existing files + parser.add_argument('-o', '--overwrite', type=bool, + help='Overwrite files {}'.format(default), + default=False, nargs='?', const=True, metavar='') + + # optional argument: whether to copy or move extracted files + parser.add_argument('-r', '--remove', type=bool, + help='Remove original dataset {}'.format(default), + default=False, nargs='?', const=True, metavar='') + + return parser diff --git a/pysegcnn/preprocessing/sparcs.py b/pysegcnn/preprocessing/sparcs.py index a8096f6f17a411a717b1ca95a224901475b60727..e8fc8d64856c3ad552478cd782feaf660d3a4628 100644 --- a/pysegcnn/preprocessing/sparcs.py +++ b/pysegcnn/preprocessing/sparcs.py @@ -15,192 +15,46 @@ License # coding: utf-8 # builtins -import os -import glob -import shutil - -# externals -import gdal -import numpy as np +import sys +from logging.config import dictConfig # locals -from pylandsat.core.untar import extract_data -from pylandsat.core.calibration import landsat_radiometric_calibration - - -def sparcs2pylandsat(source_path, target_path, overwrite=True): - """Convert the Sparcs dataset structure to standard EO structure. - - Parameters - ---------- - source_path : `str` - Path to the Sparcs archive downloaded `here`_. - target_path : `str` - Path to save the preprocessed sparcs dataset. - overwrite : `bool` - Whether to overwrite existing files. - - Returns - ------- - None. - - .. _here: - https://www.usgs.gov/land-resources/nli/landsat/spatial-procedures-automated-removal-cloud-and-shadow-sparcs-validation - - """ - # create a directory for each scene - for dirpath, dirnames, filenames in os.walk(source_path): - - # check if there are files in the current folder - if not filenames: - continue - - # iterate over the files to modify - for file in filenames: - - # get the path to the file - old_path = os.path.join(dirpath, file) - - # get name of the scene - fname = file.split('_')[0] - - # define the new path to the file - new_path = os.path.join(target_path, fname) - - # check if file is the metadata file - if file.endswith('_mtl.txt'): - - # add the collection number to the metadata file - with open(old_path, 'a') as mfile: - mfile.write('COLLECTION_NUMBER = 1') - - # replace file ending - file = file.replace('mtl', 'MTL') - - # move files to new directory - if os.path.isfile(new_path + os.sep + file) and not overwrite: - print('{} already exists.'.format(new_path + os.sep + file)) - continue - else: - os.makedirs(new_path, exist_ok=True) - shutil.move(old_path, new_path + os.sep + file) - - # remove old file location - shutil.rmtree(source_path) - - -def destack_sparcs_raster(inpath, outpath=None, suffix='*_toa.tif'): - """Destack a TIFF with more than one band into a TIFF file for each band. - - Parameters - ---------- - inpath : `str` - Path to a directory containing the TIFF file to destack. - outpath : `str`, optional - Path to save the output TIFF files. The default is None. If None, - ``outpath`` = ``inpath``. - - Returns - ------- - None. - - """ - # default: output directory is equal to the input directory - if outpath is None: - outpath = inpath - - # check if output directory exists - if not os.path.exists(outpath): - os.makedirs(outpath) - - # get the TIFF to destack - tif = glob.glob(inpath + os.sep + '*data.tif').pop() - - # open the raster - img = gdal.Open(tif) - - # check whether the current scene was already processed - processed = glob.glob(inpath + os.sep + suffix) - if len(processed) == img.RasterCount: - print('Scene: {} already processed.'.format(os.path.basename(inpath))) - img = None - os.unlink(tif) - return - - # image driver - driver = gdal.GetDriverByName('GTiff') - driver.Register() - - # output image type: digital numbers unsigned integer 16bit - codage = gdal.GDT_UInt16 - nptype = np.uint16 - - # image size and tiles - cols = img.RasterXSize - rows = img.RasterYSize - bands = img.RasterCount - - # print progress - imgname = os.path.basename(tif) - print('Processing: {}'.format(imgname)) - - # iterate the bands of the raster - for b in range(1, bands + 1): - # output file: replace for band name - fname = os.path.join(outpath, imgname.replace('data', 'B' + str(b))) - outDs = driver.Create(fname, cols, rows, 1, codage) - - # read the data of band b - band = img.GetRasterBand(b) - data = band.ReadAsArray().astype(nptype) - - # define output band - outband = outDs.GetRasterBand(1) - - # write array to output band - outband.WriteArray(data) - outband.FlushCache() - - # Set the geographic information - outDs.SetProjection(img.GetProjection()) - outDs.SetGeoTransform(img.GetGeoTransform()) - - # clear memory - del outband, band, data, outDs - - # remove old stacked GeoTIFF - img = None - os.unlink(tif) +from pysegcnn.core.utils import (destack_tiff, standard_eo_structure, + extract_archive) +from pysegcnn.core.logging import log_conf +from pysegcnn.core.cli import structure_parser if __name__ == '__main__': - # define path to working directory - # wd = 'C:/Eurac/2020/' - wd = '/mnt/CEPH_PROJECTS/cci_snow/dfrisinghelli/' - - # path to the downloaded sparcs archive - sparcs_archive = os.path.join(wd, '_Datasets/Archives/l8cloudmasks.zip') - - # path to save preprocessed sparcs dataset - sparcs_path = os.path.join(wd, '_Datasets/Sparcs') - - # extract the raw archive to the output path - location = extract_data(sparcs_archive, sparcs_path) - - # transform SPARCS directory structure to pylandsat standard - sparcs2pylandsat(source_path=location, target_path=sparcs_path, - overwrite=False) - - # destack the TIFF rasterstack to a single TIFF for each band and perform - # radiometric calibration - for scene in os.listdir(sparcs_path): - # path to the current scene - scene_path = os.path.join(sparcs_path, scene) - - # build the GeoTIFFs for each band - destack_sparcs_raster(scene_path, suffix='*_toa.tif') + # configure logging + dictConfig(log_conf(__file__.replace('.py', '.log'))) + + # the argument parser + parser = structure_parser() + + # parse the command line arguments + args = sys.argv[1:] + if not args: + parser.print_help() + sys.exit() + else: + args = parser.parse_args(args) + + # extract the archive + extracted = extract_archive(args.archive, args.target, args.overwrite) + + # transform SPARCS directory structure to standard structure + standard_eo_structure(source_path=extracted, target_path=args.target, + overwrite=args.overwrite, move=args.remove) + + # destack the TIFF raster to a single TIFF for each band + for scene in args.target.iterdir(): + # the TIFF file containing the bands + try: + data = next(scene.glob('*data.tif')) + except StopIteration: + continue - # convert the digital number format to top of atmosphere reflectance - landsat_radiometric_calibration(scene_path, exclude=[], suffix='_toa', - overwrite=False, remove_raw=True) + # build the TIFFs for each band + destack_tiff(data, overwrite=args.overwrite, remove=args.remove)