Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
P
PySegCNN
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Deploy
Releases
Package Registry
Container Registry
Model registry
Operate
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
earth_observation_public
PySegCNN
Commits
48fbdb1f
Commit
48fbdb1f
authored
4 years ago
by
Frisinghelli Daniel
Browse files
Options
Downloads
Patches
Plain Diff
Added a command line interface to preprocess the SPARCS dataset
parent
3f51854f
No related branches found
Branches containing commit
No related tags found
No related merge requests found
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
pysegcnn/core/cli.py
+65
-0
65 additions, 0 deletions
pysegcnn/core/cli.py
pysegcnn/preprocessing/sparcs.py
+36
-182
36 additions, 182 deletions
pysegcnn/preprocessing/sparcs.py
with
101 additions
and
182 deletions
pysegcnn/core/cli.py
0 → 100644
+
65
−
0
View file @
48fbdb1f
"""
Command line interface parsers.
License
-------
Copyright (c) 2020 Daniel Frisinghelli
This source code is licensed under the GNU General Public License v3.
See the LICENSE file in the repository
'
s root directory.
"""
# !/usr/bin/env python
# -*- coding: utf-8 -*-
# builtins
import
pathlib
import
argparse
# epilogue to display at the end of each parser
EPILOGUE
=
'
Author: Daniel Frisinghelli, daniel.frisinghelli@gmail.com
'
def
structure_parser
():
"""
Command line argument parser to standardize dataset structure.
Returns
-------
None.
"""
parser
=
argparse
.
ArgumentParser
(
description
=
'
Standardize the dataset directory structure.
'
,
epilog
=
EPILOGUE
,
formatter_class
=
lambda
prog
:
argparse
.
RawDescriptionHelpFormatter
(
prog
,
max_help_position
=
50
,
indent_increment
=
2
))
# positional arguments
# positional argument: path to the archive
parser
.
add_argument
(
'
archive
'
,
type
=
pathlib
.
Path
,
help
=
'
Path to the dataset archive.
'
)
# positional argument: path to extract and restructure the dataset
parser
.
add_argument
(
'
target
'
,
type
=
pathlib
.
Path
,
help
=
'
Path to save standardized dataset structure.
'
)
# optional arguments
# default values
default
=
'
(default: %(default)s)
'
# optional argument: whether to overwrite existing files
parser
.
add_argument
(
'
-o
'
,
'
--overwrite
'
,
type
=
bool
,
help
=
'
Overwrite files {}
'
.
format
(
default
),
default
=
False
,
nargs
=
'
?
'
,
const
=
True
,
metavar
=
''
)
# optional argument: whether to copy or move extracted files
parser
.
add_argument
(
'
-r
'
,
'
--remove
'
,
type
=
bool
,
help
=
'
Remove original dataset {}
'
.
format
(
default
),
default
=
False
,
nargs
=
'
?
'
,
const
=
True
,
metavar
=
''
)
return
parser
This diff is collapsed.
Click to expand it.
pysegcnn/preprocessing/sparcs.py
+
36
−
182
View file @
48fbdb1f
...
...
@@ -15,192 +15,46 @@ License
# coding: utf-8
# builtins
import
os
import
glob
import
shutil
# externals
import
gdal
import
numpy
as
np
import
sys
from
logging.config
import
dictConfig
# locals
from
pylandsat.core.untar
import
extract_data
from
pylandsat.core.calibration
import
landsat_radiometric_calibration
def
sparcs2pylandsat
(
source_path
,
target_path
,
overwrite
=
True
):
"""
Convert the Sparcs dataset structure to standard EO structure.
Parameters
----------
source_path : `str`
Path to the Sparcs archive downloaded `here`_.
target_path : `str`
Path to save the preprocessed sparcs dataset.
overwrite : `bool`
Whether to overwrite existing files.
Returns
-------
None.
.. _here:
https://www.usgs.gov/land-resources/nli/landsat/spatial-procedures-automated-removal-cloud-and-shadow-sparcs-validation
"""
# create a directory for each scene
for
dirpath
,
dirnames
,
filenames
in
os
.
walk
(
source_path
):
# check if there are files in the current folder
if
not
filenames
:
continue
# iterate over the files to modify
for
file
in
filenames
:
# get the path to the file
old_path
=
os
.
path
.
join
(
dirpath
,
file
)
# get name of the scene
fname
=
file
.
split
(
'
_
'
)[
0
]
# define the new path to the file
new_path
=
os
.
path
.
join
(
target_path
,
fname
)
# check if file is the metadata file
if
file
.
endswith
(
'
_mtl.txt
'
):
# add the collection number to the metadata file
with
open
(
old_path
,
'
a
'
)
as
mfile
:
mfile
.
write
(
'
COLLECTION_NUMBER = 1
'
)
# replace file ending
file
=
file
.
replace
(
'
mtl
'
,
'
MTL
'
)
# move files to new directory
if
os
.
path
.
isfile
(
new_path
+
os
.
sep
+
file
)
and
not
overwrite
:
print
(
'
{} already exists.
'
.
format
(
new_path
+
os
.
sep
+
file
))
continue
else
:
os
.
makedirs
(
new_path
,
exist_ok
=
True
)
shutil
.
move
(
old_path
,
new_path
+
os
.
sep
+
file
)
# remove old file location
shutil
.
rmtree
(
source_path
)
def
destack_sparcs_raster
(
inpath
,
outpath
=
None
,
suffix
=
'
*_toa.tif
'
):
"""
Destack a TIFF with more than one band into a TIFF file for each band.
Parameters
----------
inpath : `str`
Path to a directory containing the TIFF file to destack.
outpath : `str`, optional
Path to save the output TIFF files. The default is None. If None,
``outpath`` = ``inpath``.
Returns
-------
None.
"""
# default: output directory is equal to the input directory
if
outpath
is
None
:
outpath
=
inpath
# check if output directory exists
if
not
os
.
path
.
exists
(
outpath
):
os
.
makedirs
(
outpath
)
# get the TIFF to destack
tif
=
glob
.
glob
(
inpath
+
os
.
sep
+
'
*data.tif
'
).
pop
()
# open the raster
img
=
gdal
.
Open
(
tif
)
# check whether the current scene was already processed
processed
=
glob
.
glob
(
inpath
+
os
.
sep
+
suffix
)
if
len
(
processed
)
==
img
.
RasterCount
:
print
(
'
Scene: {} already processed.
'
.
format
(
os
.
path
.
basename
(
inpath
)))
img
=
None
os
.
unlink
(
tif
)
return
# image driver
driver
=
gdal
.
GetDriverByName
(
'
GTiff
'
)
driver
.
Register
()
# output image type: digital numbers unsigned integer 16bit
codage
=
gdal
.
GDT_UInt16
nptype
=
np
.
uint16
# image size and tiles
cols
=
img
.
RasterXSize
rows
=
img
.
RasterYSize
bands
=
img
.
RasterCount
# print progress
imgname
=
os
.
path
.
basename
(
tif
)
print
(
'
Processing: {}
'
.
format
(
imgname
))
# iterate the bands of the raster
for
b
in
range
(
1
,
bands
+
1
):
# output file: replace for band name
fname
=
os
.
path
.
join
(
outpath
,
imgname
.
replace
(
'
data
'
,
'
B
'
+
str
(
b
)))
outDs
=
driver
.
Create
(
fname
,
cols
,
rows
,
1
,
codage
)
# read the data of band b
band
=
img
.
GetRasterBand
(
b
)
data
=
band
.
ReadAsArray
().
astype
(
nptype
)
# define output band
outband
=
outDs
.
GetRasterBand
(
1
)
# write array to output band
outband
.
WriteArray
(
data
)
outband
.
FlushCache
()
# Set the geographic information
outDs
.
SetProjection
(
img
.
GetProjection
())
outDs
.
SetGeoTransform
(
img
.
GetGeoTransform
())
# clear memory
del
outband
,
band
,
data
,
outDs
# remove old stacked GeoTIFF
img
=
None
os
.
unlink
(
tif
)
from
pysegcnn.core.utils
import
(
destack_tiff
,
standard_eo_structure
,
extract_archive
)
from
pysegcnn.core.logging
import
log_conf
from
pysegcnn.core.cli
import
structure_parser
if
__name__
==
'
__main__
'
:
# define path to working directory
# wd = 'C:/Eurac/2020/'
wd
=
'
/mnt/CEPH_PROJECTS/cci_snow/dfrisinghelli/
'
# path to the downloaded sparcs archive
sparcs_archive
=
os
.
path
.
join
(
wd
,
'
_Datasets/Archives/l8cloudmasks.zip
'
)
# path to save preprocessed sparcs dataset
sparcs_path
=
os
.
path
.
join
(
wd
,
'
_Datasets/Sparcs
'
)
# extract the raw archive to the output path
location
=
extract_data
(
sparcs_archive
,
sparcs_path
)
# transform SPARCS directory structure to pylandsat standard
sparcs2pylandsat
(
source_path
=
location
,
target_path
=
sparcs_path
,
overwrite
=
False
)
# destack the TIFF rasterstack to a single TIFF for each band and perform
# radiometric calibration
for
scene
in
os
.
listdir
(
sparcs_path
):
# path to the current scene
scene_path
=
os
.
path
.
join
(
sparcs_path
,
scene
)
# build the GeoTIFFs for each band
destack_sparcs_raster
(
scene_path
,
suffix
=
'
*_toa.tif
'
)
# configure logging
dictConfig
(
log_conf
(
__file__
.
replace
(
'
.py
'
,
'
.log
'
)))
# the argument parser
parser
=
structure_parser
()
# parse the command line arguments
args
=
sys
.
argv
[
1
:]
if
not
args
:
parser
.
print_help
()
sys
.
exit
()
else
:
args
=
parser
.
parse_args
(
args
)
# extract the archive
extracted
=
extract_archive
(
args
.
archive
,
args
.
target
,
args
.
overwrite
)
# transform SPARCS directory structure to standard structure
standard_eo_structure
(
source_path
=
extracted
,
target_path
=
args
.
target
,
overwrite
=
args
.
overwrite
,
move
=
args
.
remove
)
# destack the TIFF raster to a single TIFF for each band
for
scene
in
args
.
target
.
iterdir
():
# the TIFF file containing the bands
try
:
data
=
next
(
scene
.
glob
(
'
*data.tif
'
))
except
StopIteration
:
continue
# convert the digital number format to top of atmosphere reflectance
landsat_radiometric_calibration
(
scene_path
,
exclude
=
[],
suffix
=
'
_toa
'
,
overwrite
=
False
,
remove_raw
=
True
)
# build the TIFFs for each band
destack_tiff
(
data
,
overwrite
=
args
.
overwrite
,
remove
=
args
.
remove
)
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment