Skip to content
Snippets Groups Projects
Commit 48fbdb1f authored by Frisinghelli Daniel's avatar Frisinghelli Daniel
Browse files

Added a command line interface to preprocess the SPARCS dataset

parent 3f51854f
No related branches found
No related tags found
No related merge requests found
"""Command line interface parsers.
License
-------
Copyright (c) 2020 Daniel Frisinghelli
This source code is licensed under the GNU General Public License v3.
See the LICENSE file in the repository's root directory.
"""
# !/usr/bin/env python
# -*- coding: utf-8 -*-
# builtins
import pathlib
import argparse
# epilogue to display at the end of each parser
EPILOGUE = 'Author: Daniel Frisinghelli, daniel.frisinghelli@gmail.com'
def structure_parser():
"""Command line argument parser to standardize dataset structure.
Returns
-------
None.
"""
parser = argparse.ArgumentParser(
description='Standardize the dataset directory structure.',
epilog=EPILOGUE,
formatter_class=lambda prog: argparse.RawDescriptionHelpFormatter(
prog, max_help_position=50, indent_increment=2))
# positional arguments
# positional argument: path to the archive
parser.add_argument('archive', type=pathlib.Path,
help='Path to the dataset archive.')
# positional argument: path to extract and restructure the dataset
parser.add_argument('target', type=pathlib.Path,
help='Path to save standardized dataset structure.')
# optional arguments
# default values
default = '(default: %(default)s)'
# optional argument: whether to overwrite existing files
parser.add_argument('-o', '--overwrite', type=bool,
help='Overwrite files {}'.format(default),
default=False, nargs='?', const=True, metavar='')
# optional argument: whether to copy or move extracted files
parser.add_argument('-r', '--remove', type=bool,
help='Remove original dataset {}'.format(default),
default=False, nargs='?', const=True, metavar='')
return parser
......@@ -15,192 +15,46 @@ License
# coding: utf-8
# builtins
import os
import glob
import shutil
# externals
import gdal
import numpy as np
import sys
from logging.config import dictConfig
# locals
from pylandsat.core.untar import extract_data
from pylandsat.core.calibration import landsat_radiometric_calibration
def sparcs2pylandsat(source_path, target_path, overwrite=True):
"""Convert the Sparcs dataset structure to standard EO structure.
Parameters
----------
source_path : `str`
Path to the Sparcs archive downloaded `here`_.
target_path : `str`
Path to save the preprocessed sparcs dataset.
overwrite : `bool`
Whether to overwrite existing files.
Returns
-------
None.
.. _here:
https://www.usgs.gov/land-resources/nli/landsat/spatial-procedures-automated-removal-cloud-and-shadow-sparcs-validation
"""
# create a directory for each scene
for dirpath, dirnames, filenames in os.walk(source_path):
# check if there are files in the current folder
if not filenames:
continue
# iterate over the files to modify
for file in filenames:
# get the path to the file
old_path = os.path.join(dirpath, file)
# get name of the scene
fname = file.split('_')[0]
# define the new path to the file
new_path = os.path.join(target_path, fname)
# check if file is the metadata file
if file.endswith('_mtl.txt'):
# add the collection number to the metadata file
with open(old_path, 'a') as mfile:
mfile.write('COLLECTION_NUMBER = 1')
# replace file ending
file = file.replace('mtl', 'MTL')
# move files to new directory
if os.path.isfile(new_path + os.sep + file) and not overwrite:
print('{} already exists.'.format(new_path + os.sep + file))
continue
else:
os.makedirs(new_path, exist_ok=True)
shutil.move(old_path, new_path + os.sep + file)
# remove old file location
shutil.rmtree(source_path)
def destack_sparcs_raster(inpath, outpath=None, suffix='*_toa.tif'):
"""Destack a TIFF with more than one band into a TIFF file for each band.
Parameters
----------
inpath : `str`
Path to a directory containing the TIFF file to destack.
outpath : `str`, optional
Path to save the output TIFF files. The default is None. If None,
``outpath`` = ``inpath``.
Returns
-------
None.
"""
# default: output directory is equal to the input directory
if outpath is None:
outpath = inpath
# check if output directory exists
if not os.path.exists(outpath):
os.makedirs(outpath)
# get the TIFF to destack
tif = glob.glob(inpath + os.sep + '*data.tif').pop()
# open the raster
img = gdal.Open(tif)
# check whether the current scene was already processed
processed = glob.glob(inpath + os.sep + suffix)
if len(processed) == img.RasterCount:
print('Scene: {} already processed.'.format(os.path.basename(inpath)))
img = None
os.unlink(tif)
return
# image driver
driver = gdal.GetDriverByName('GTiff')
driver.Register()
# output image type: digital numbers unsigned integer 16bit
codage = gdal.GDT_UInt16
nptype = np.uint16
# image size and tiles
cols = img.RasterXSize
rows = img.RasterYSize
bands = img.RasterCount
# print progress
imgname = os.path.basename(tif)
print('Processing: {}'.format(imgname))
# iterate the bands of the raster
for b in range(1, bands + 1):
# output file: replace for band name
fname = os.path.join(outpath, imgname.replace('data', 'B' + str(b)))
outDs = driver.Create(fname, cols, rows, 1, codage)
# read the data of band b
band = img.GetRasterBand(b)
data = band.ReadAsArray().astype(nptype)
# define output band
outband = outDs.GetRasterBand(1)
# write array to output band
outband.WriteArray(data)
outband.FlushCache()
# Set the geographic information
outDs.SetProjection(img.GetProjection())
outDs.SetGeoTransform(img.GetGeoTransform())
# clear memory
del outband, band, data, outDs
# remove old stacked GeoTIFF
img = None
os.unlink(tif)
from pysegcnn.core.utils import (destack_tiff, standard_eo_structure,
extract_archive)
from pysegcnn.core.logging import log_conf
from pysegcnn.core.cli import structure_parser
if __name__ == '__main__':
# define path to working directory
# wd = 'C:/Eurac/2020/'
wd = '/mnt/CEPH_PROJECTS/cci_snow/dfrisinghelli/'
# path to the downloaded sparcs archive
sparcs_archive = os.path.join(wd, '_Datasets/Archives/l8cloudmasks.zip')
# path to save preprocessed sparcs dataset
sparcs_path = os.path.join(wd, '_Datasets/Sparcs')
# extract the raw archive to the output path
location = extract_data(sparcs_archive, sparcs_path)
# transform SPARCS directory structure to pylandsat standard
sparcs2pylandsat(source_path=location, target_path=sparcs_path,
overwrite=False)
# destack the TIFF rasterstack to a single TIFF for each band and perform
# radiometric calibration
for scene in os.listdir(sparcs_path):
# path to the current scene
scene_path = os.path.join(sparcs_path, scene)
# build the GeoTIFFs for each band
destack_sparcs_raster(scene_path, suffix='*_toa.tif')
# configure logging
dictConfig(log_conf(__file__.replace('.py', '.log')))
# the argument parser
parser = structure_parser()
# parse the command line arguments
args = sys.argv[1:]
if not args:
parser.print_help()
sys.exit()
else:
args = parser.parse_args(args)
# extract the archive
extracted = extract_archive(args.archive, args.target, args.overwrite)
# transform SPARCS directory structure to standard structure
standard_eo_structure(source_path=extracted, target_path=args.target,
overwrite=args.overwrite, move=args.remove)
# destack the TIFF raster to a single TIFF for each band
for scene in args.target.iterdir():
# the TIFF file containing the bands
try:
data = next(scene.glob('*data.tif'))
except StopIteration:
continue
# convert the digital number format to top of atmosphere reflectance
landsat_radiometric_calibration(scene_path, exclude=[], suffix='_toa',
overwrite=False, remove_raw=True)
# build the TIFFs for each band
destack_tiff(data, overwrite=args.overwrite, remove=args.remove)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment