Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
P
PySegCNN
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Deploy
Releases
Package registry
Container Registry
Model registry
Operate
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
earth_observation_public
PySegCNN
Commits
598a7fa5
Commit
598a7fa5
authored
4 years ago
by
Frisinghelli Daniel
Browse files
Options
Downloads
Patches
Plain Diff
Added distinct directory for dataset preprocessing
parent
9c9b59b7
No related branches found
Branches containing commit
No related tags found
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
preprocessing/sparcs.py
+195
-0
195 additions, 0 deletions
preprocessing/sparcs.py
with
195 additions
and
0 deletions
preprocessing/sparcs.py
0 → 100755
+
195
−
0
View file @
598a7fa5
"""
Functions to preprocess the Sparcs dataset to work with pylandsat.
"""
# !/usr/bin/env python
# coding: utf-8
# builtins
import
os
import
glob
import
shutil
# externals
import
gdal
import
numpy
as
np
# locals
from
pylandsat.core.untar
import
extract_data
from
pylandsat.core.calibration
import
landsat_radiometric_calibration
def
sparcs2pylandsat
(
source_path
,
target_path
,
overwrite
=
True
):
"""
Convert the Sparcs dataset structure to the pylandsat standard.
Parameters
----------
source_path : string
path to the Sparcs archive downloaded `here`_
target_path : string
path to save the preprocessed sparcs dataset
overwrite : bool
whether to overwrite existing files
Returns
-------
None.
.. _here:
https://www.usgs.gov/land-resources/nli/landsat/spatial-procedures-automated-removal-cloud-and-shadow-sparcs-validation
"""
# create a directory for each scene
for
dirpath
,
dirnames
,
filenames
in
os
.
walk
(
source_path
):
# check if there are files in the current folder
if
not
filenames
:
continue
# iterate over the files to modify
for
file
in
filenames
:
# get the path to the file
old_path
=
os
.
path
.
join
(
dirpath
,
file
)
# get name of the scene
fname
=
file
.
split
(
'
_
'
)[
0
]
# define the new path to the file
new_path
=
os
.
path
.
join
(
target_path
,
fname
)
# check if file is the metadata file
if
file
.
endswith
(
'
_mtl.txt
'
):
# add the collection number to the metadata file
with
open
(
old_path
,
'
a
'
)
as
mfile
:
mfile
.
write
(
'
COLLECTION_NUMBER = 1
'
)
# replace file ending
file
=
file
.
replace
(
'
mtl
'
,
'
MTL
'
)
# move files to new directory
if
os
.
path
.
isfile
(
new_path
+
os
.
sep
+
file
)
and
not
overwrite
:
print
(
'
{} already exists.
'
.
format
(
new_path
+
os
.
sep
+
file
))
continue
else
:
os
.
makedirs
(
new_path
,
exist_ok
=
True
)
shutil
.
move
(
old_path
,
new_path
+
os
.
sep
+
file
)
# remove old file location
shutil
.
rmtree
(
source_path
)
def
destack_sparcs_raster
(
inpath
,
outpath
=
None
,
suffix
=
'
*_toa.tif
'
):
"""
Destack a TIFF with more than one band into a TIFF file for each band.
Parameters
----------
inpath : string
path to a directory containing the TIFF file to destack
outpath : string, optional
path to save the output TIFF files. The default is None. If None,
``outpath`` = ``inpath``.
Returns
-------
None.
"""
# default: output directory is equal to the input directory
if
outpath
is
None
:
outpath
=
inpath
# check if output directory exists
if
not
os
.
path
.
exists
(
outpath
):
os
.
makedirs
(
outpath
)
# get the TIFF to destack
tif
=
glob
.
glob
(
inpath
+
os
.
sep
+
'
*data.tif
'
).
pop
()
# open the raster
img
=
gdal
.
Open
(
tif
)
# check whether the current scene was already processed
processed
=
glob
.
glob
(
inpath
+
os
.
sep
+
suffix
)
if
len
(
processed
)
==
img
.
RasterCount
:
print
(
'
Scene: {} already processed.
'
.
format
(
os
.
path
.
basename
(
inpath
)))
img
=
None
os
.
unlink
(
tif
)
return
# image driver
driver
=
gdal
.
GetDriverByName
(
'
GTiff
'
)
driver
.
Register
()
# output image type: digital numbers unsigned integer 16bit
codage
=
gdal
.
GDT_UInt16
nptype
=
np
.
uint16
# image size and tiles
cols
=
img
.
RasterXSize
rows
=
img
.
RasterYSize
bands
=
img
.
RasterCount
# print progress
imgname
=
os
.
path
.
basename
(
tif
)
print
(
'
Processing: {}
'
.
format
(
imgname
))
# iterate the bands of the raster
for
b
in
range
(
1
,
bands
+
1
):
# output file: replace for band name
fname
=
os
.
path
.
join
(
outpath
,
imgname
.
replace
(
'
data
'
,
'
B
'
+
str
(
b
)))
outDs
=
driver
.
Create
(
fname
,
cols
,
rows
,
1
,
codage
)
# read the data of band b
band
=
img
.
GetRasterBand
(
b
)
data
=
band
.
ReadAsArray
().
astype
(
nptype
)
# define output band
outband
=
outDs
.
GetRasterBand
(
1
)
# write array to output band
outband
.
WriteArray
(
data
)
outband
.
FlushCache
()
# Set the geographic information
outDs
.
SetProjection
(
img
.
GetProjection
())
outDs
.
SetGeoTransform
(
img
.
GetGeoTransform
())
# clear memory
del
outband
,
band
,
data
,
outDs
# remove old stacked GeoTIFF
img
=
None
os
.
unlink
(
tif
)
if
__name__
==
'
__main__
'
:
# define path to working directory
# wd = 'C:/Eurac/2020/'
wd
=
'
/mnt/CEPH_PROJECTS/cci_snow/dfrisinghelli/
'
# path to the downloaded sparcs archive
sparcs_archive
=
os
.
path
.
join
(
wd
,
'
_Datasets/Archives/l8cloudmasks.zip
'
)
# path to save preprocessed sparcs dataset
sparcs_path
=
os
.
path
.
join
(
wd
,
'
_Datasets/Sparcs
'
)
# extract the raw archive to the output path
location
=
extract_data
(
sparcs_archive
,
sparcs_path
)
# transform SPARCS directory structure to pylandsat standard
sparcs2pylandsat
(
source_path
=
location
,
target_path
=
sparcs_path
,
overwrite
=
False
)
# destack the TIFF rasterstack to a single TIFF for each band and perform
# radiometric calibration
for
scene
in
os
.
listdir
(
sparcs_path
):
# path to the current scene
scene_path
=
os
.
path
.
join
(
sparcs_path
,
scene
)
# build the GeoTIFFs for each band
destack_sparcs_raster
(
scene_path
,
suffix
=
'
*_toa.tif
'
)
# convert the digital number format to top of atmosphere reflectance
landsat_radiometric_calibration
(
scene_path
,
exclude
=
[],
suffix
=
'
_toa
'
,
overwrite
=
False
,
remove_raw
=
True
)
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment