Skip to content
Snippets Groups Projects
Commit 0f867da4 authored by Claudio Zandonella's avatar Claudio Zandonella
Browse files

update bolzano analysis

parent 88ac748d
Branches main
No related tags found
No related merge requests found
......@@ -7,7 +7,7 @@ import geopandas as gpd
import numpy as np
import pandas as pd
from justclust.data.bolzano import cols, conv, read_data, sez_cols, wcols
from justclust.data.bolzano import cols, conv, read_data, sez_cols, wcols, selected_col
from justclust.explore.algclusters import explore_models, get_algclusters
from justclust.explore.preprocs import ApplyW, explore_preprocs, get_preprocs
import justclust.paths.paths as paths
......@@ -66,15 +66,19 @@ print(pre_scores["hopkins"].describe())
# %%
# select a preprocessing
pre_key = (
# "s:RobustScaler(quantile_range=(2, 98))|"
# "w:true|"
# "d:DictionaryLearning(alpha=0.1, fit_algorithm='cd', n_jobs=-1)"
"s:Normalizer()|"
"w:true|"
"d:DictionaryLearning(alpha=0.1, fit_algorithm='cd')"
# "s:QuantileTransformer()|w:false|d:DictionaryLearning(alpha=0.1, fit_algorithm='cd', n_jobs=-1)"
"s:MaxAbsScaler()|w:true|d:DictionaryLearning(alpha=0.1, max_iter=2000, n_jobs=-1, random_state=2023)" # try-1
# "s:MinMaxScaler()|w:true|d:DictionaryLearning(alpha=0.1, max_iter=2000, n_jobs=-1, random_state=2023)" # try-2
# "s:Normalizer(norm='max')|w:true|d:DictionaryLearning(alpha=0.1, max_iter=2000, n_jobs=-1, random_state=2023)" # try-3
# "s:Normalizer(norm='l1')|w:true|d:DictionaryLearning(alpha=0.1, max_iter=2000, n_jobs=-1, random_state=2023)" # try-4
# "s:QuantileTransformer(random_state=2023)|w:true|d:DictionaryLearning(alpha=0.1, max_iter=2000, n_jobs=-1, random_state=2023)" # try-5
# "s:Normalizer()|w:true|d:DictionaryLearning(alpha=0.1, max_iter=2000, n_jobs=-1, random_state=2023)" # try-6
# "s:RobustScaler(quantile_range=(20, 80))|w:true|d:DictionaryLearning(alpha=0.1, fit_algorithm='cd', max_iter=2000, n_jobs=-1, random_state=2023)" # try-7
# "s:Normalizer(norm='max')|w:false|d:DictionaryLearning(alpha=0.1, max_iter=2000, n_jobs=-1, random_state=2023)" # try-8
)
if pre_key not in preprocs.keys():
skeys = "\n".join([f" - {k!r}" for k in sorted(preprocs.keys())])
......@@ -108,12 +112,12 @@ else:
models=models,
n_jobs=-1,
filters=[
("n_clusters", (5.0, 12.0)),
("n_clusters", (5.0, 10.0)),
# silhouette_score: The best value is 1 and the worst value is -1.
# Values near 0 indicate overlapping clusters. Negative values generally
# indicate that a sample has been assigned to the wrong cluster, as a
# different cluster is more similar.
("silhouette", (0.5, None)),
("silhouette", (0.85, None)),
# davies_bouldin_score: The minimum score is zero, with lower values
# indicating better clustering.
# ("davies bouldin", (None, 0.6)),
......@@ -160,16 +164,21 @@ else:
# Select the cluster id that need to be compared
# the ID can be taken from the `clscorefile`
sel_clsts = [
# label
# "hdbscan__mcs-15_ms-05_m-euclidean_csm-eom", # k06 @51.6%
# "hdbscan__mcs-15_ms-05_m-sqeuclidean_csm-eom", # k07 @50.2%
# "hdbscan__mcs-10_ms-05_m-sqeuclidean_csm-eom", # k09 @60.3%
# "hdbscan__mcs-10_ms-01_m-euclidean_csm-eom", # k12 @74.5%
# "hdbscan__mcs-07_ms-05_m-chebyshev_csm-eom", # k15 @77.5%
"hdbscan__mcs-10_ms-00_m-euclidean_csm-eom", # k6 96% .91 .17 2653
# "hdbscan__mcs-12_ms-02_m-euclidean_csm-eom", # k7 91% .85 .27 2338
# "hdbscan__mcs-02_ms-05_m-euclidean_csm-eom", # k10 96% .87 .25 1674
# try-1
# "hdbscan__mcs-10_ms-00_m-braycurtis_csm-eom", # 8 clst 84.558824 0.961501 0.097361 7967.110672
"hdbscan__mcs-10_ms-00_m-euclidean_csm-eom" # 9 clst 91.911765 0.916117 0.488055 1082.154275
# # try-2
# "hdbscan__mcs-10_ms-00_m-braycurtis_csm-eom", # 8 clst 84.926471 0.938385 0.306445 1210.865543
# "hdbscan__mcs-10_ms-05_m-chebyshev_csm-eom" # 9 clst 89.705882 0.921724 0.379492 2048.702598
# # try-8
# "hdbscan__mcs-10_ms-00_m-braycurtis_csm-eom", # 5 clst 86.397059 0.991400 0.068170 38957.400453
# "hdbscan__mcs-07_ms-00_m-euclidean_csm-eom", # 6 clst 90.073529 0.991330 0.067792 34901.021320
# "hdbscan__mcs-07_ms-03_m-euclidean_csm-eom", # 7 clst 92.647059 0.989539 0.074449 25493.584344
]
if len(sel_clsts) == 0:
......@@ -232,6 +241,7 @@ for clname in sel_clsts:
qmd.create_qmd_report(
city = 'bolzano',
clname= clname,
selected_col = selected_col,
cdir = my_paths.report_dir / clname
)
......
# %%
import geopandas as gpd
import numpy as np
import pandas as pd
......@@ -8,64 +10,47 @@ my_paths = paths.define_paths(city = 'bolzano')
# %%
# columns selected for the columns of the outputs
sez_cols = [
"SEZ", "geometry",
"SEZ2011", "COD_REG", "COD_ISTAT", "PRO_COM"
]
sez_cols = ["SEZ", "geometry"]
id_col = ['SEZ']
#---- Air Quality ----
# Define available files
prisk_scr_shp = my_paths.rawdata_dir / "Air quality" / "street_canyon_risk.shp"
airq_scr_shp = my_paths.rawdata_dir / "Air quality" / "street_canyon_risk.shp"
pm25_scr_shp = my_paths.rawdata_dir / "Air quality" / "pm_25_concentration.shp"
# Define columns and other constant values
prisk_raw_cols = ["Area_r1", "Area_r2", "Area_r3", "Area_r4", "Area_r6", "Area_r9"]
prisk_nor_cols = [
"P_Area_r1",
"P_Area_r2",
"P_Area_r3",
"P_Area_r4",
"P_Area_r6",
"P_Area_r9",
]
airq_cols = ["risk_1_n", "risk_2_n", "risk_3_n"]
pm25_cols = ["pm_25_n"]
#---- Carbon ----
# Define available files
co2_ab_shp = my_paths.rawdata_dir / "Carbon" / "c_absorption.shp"
co2_em_shp = my_paths.rawdata_dir / "Carbon" / "c_emission.shp"
co2_ab_shp = my_paths.rawdata_dir / "Carbon" / "carbon_absorption.shp"
co2_em_shp = my_paths.rawdata_dir / "Carbon" / "carbon_emission.shp"
# Define columns and other constant values
# Absorption
co2_raw_cols = ["CO2"]
co2_nor_cols = ["kgCO2/m²"]
# Emmissions
ems = ["gasolio_em", "gpl_em", "ee_em", "metano_em", "legno_em", "tot_em", "c_ee_em"]
co2_em_raw_cols = ["finale"]
co2_em_nor_cols = ["finale/m²"]
co2_ab_cols = ["c_ab_n"]
co2_em_cols = ["c_em_n"]
#---- FFH ----
# Define available files
ffh_pa_shp = my_paths.rawdata_dir / "FFH" / "protected_areas.shp"
ffh_vegetation_shp = my_paths.rawdata_dir / "FFH" / "degree_of_vegetation.shp"
ffh_na_shp = my_paths.rawdata_dir / "FFH" / "connectivity_natural_areas.shp"
# Define columns and other constant values
ffh_raw_cols = ["F_Area_m2"]
ffh_nor_cols = ["P_Prot_are"]
ffh_vegetation_cols = ["veg_n"]
ffh_na_cols = ["conn_n"]
#---- Spatial ----
# Define available files
spt_ag_shp = my_paths.rawdata_dir / "Spatial" / "accessibility_green_areas.shp"
spt_wk_shp = my_paths.rawdata_dir / "Spatial" / "walkability.shp"
spt_ag_shp = my_paths.rawdata_dir / "Spatial" / "accessibility_urban_green_areas.shp"
# Define columns and other constant values
spt_ag_raw_cols = ["Area_300m_", "Area_750m_"]
spt_ag_nor_cols = ["P_area_300", "P_area_750"]
spt_wk_nor_cols = ["MEAN_Resul"]
spt_ag_cols = ["area_800_n"]
#---- Temporal ----
......@@ -73,47 +58,28 @@ spt_wk_nor_cols = ["MEAN_Resul"]
tem_ss_shp = my_paths.rawdata_dir / "Temporal" / "soil_sealing.shp"
# Define columns and other constant values
tem_raw_cols = ["Area_diff"]
tem_nor_cols = ["P_Area_dif"]
tem_ss_cols = ["s_area_n"]
#---- Thermal ----
# Define available files
thm_sh_shp = my_paths.rawdata_dir / "Thermal" / "suhi.shp"
thm_sh_shp = my_paths.rawdata_dir / "Thermal" / "heat_stress_zones.shp"
# Define columns and other constant values
thm_nor_cols = ["mean_norm"]
thm_sh_cols = ["stress_n"]
#---- Socio ----
# Define available files
socio_gpkg = my_paths.rawdata_dir / "Socio" / "bz_pop_2021.gpkg"
socio_shp = my_paths.rawdata_dir / "Socio" / "demographic_data.shp"
# Define columns and other constant values
socio_abs_cols = [
"Maschi",
"Femmine",
"Totale popolazione",
"0-14",
"15-64",
"65 e oltre",
"indice di vecchiaia",
"0-17",
"Stranieri",
"Media componenti",
"Totale famiglie con figli",
"Totale famiglie senza figli",
"Totale famiglie",
"Single",
]
socio_perc_cols = [
"%65+",
"%minori",
"%stranieri",
"%con figli",
"%senza figli",
"%Single",
]
socio_cols = [
"pop_den",
'aging_in',
'foreign',
'fam_w_ch'
]
# %%
......@@ -121,37 +87,36 @@ socio_perc_cols = [
cols_dict = {
# Air quality
'prisk':{
"P_Area_r1": "Area very-low P-risk [%]",
"P_Area_r2": "Area low P-risk [%]",
"P_Area_r3": "Area medium-low P-risk [%]",
"P_Area_r4": "Area medium-high P-risk [%]",
"P_Area_r6": "Area high P-risk [%]",
"P_Area_r9": "Area very-high P-risk [%]"
'airq':{
"pm_25_n" : "pm 2.5 concentrations [µg/m3]",
"risk_1_n" : "Area low AQ-risk [%]",
"risk_2_n" : "Area medium AQ-risk [%]",
"risk_3_n" : "Area high AQ-risk [%]"
},
# Carbon
'carbon':{
"finale/m²": "Carbon emission building [ton CO2/m²]",
"kgCO2/m²": "Carbon absorption vegetation [kg CO2/m²]"
"c_ab_n" : "Carbon absorption vegetation [kg CO2/m2]",
"c_em_n" : "Carbon emission building [ton CO2/m²]"
},
# FFH
'ffh':{"P_Prot_are": "Protected area [%]"},
'ffh':{
"veg_n" : "Vegetation quality",
"conn_n" : "Betweenness centrality"
},
# Spatial
'spatial':{
"P_area_300": "Accessibility public gardens (<5 min) [%]",
"MEAN_Resul": "Walkability Index"
"area_800_n" : "Accessibility urban green areas (<10 min) [%]",
},
# Temporal
'temporal':{"P_Area_dif":"Soil sealing between 2022 and 2018 [%]"},
'temporal':{"s_area_n" : "Soil sealing between 2022 and 2018 [%]"},
# Thermal
'thermal':{"mean_norm": "Surface urban heat island"},
'thermal':{"stress_n" : "Heat stress zones"},
# Socio
'socio':{
"indice di vecchiaia": "Age Index",
"%stranieri": "Foreign population [%]",
"%con figli": "Families with children [%]",
# "%senza figli": "Families without children [%]",
"%Single": "Families with one component [%]"
"pop_den" : "Population density [inhab/km2]",
"aging_in" : "Aging index",
"foreign" : "Foreign population [%]",
"fam_w_ch" : "Families with children [%]"
}
}
......@@ -170,7 +135,7 @@ conv = {k:v for values in cols_dict.values() for k,v in values.items()}
# define subset of columns according to categories
selected_col = {
'col_prisk': list(cols_dict.get('prisk').values()),
'col_airq': list(cols_dict.get('airq').values()),
'col_carbon':list(cols_dict.get('carbon').values()),
'col_char':\
list(cols_dict.get('ffh').values())+\
......@@ -186,49 +151,43 @@ selected_col = {
def read_data():
prisk_scr = gpd.read_file(prisk_scr_shp)
pm25_scr = gpd.read_file(pm25_scr_shp)
airq_scr = gpd.read_file(airq_scr_shp)
co2_ab = gpd.read_file(co2_ab_shp)
co2_ab["SEZ"] = co2_ab["SEZ"].astype(int)
co2_ab["kgCO2/m²"] = co2_ab["CO2"] / co2_ab.area
co2_em = gpd.read_file(co2_em_shp)
co2_em["finale/m²"] = co2_em["finale"] / co2_em.area
ffh_pa = gpd.read_file(ffh_pa_shp)
ffh_na = gpd.read_file(ffh_na_shp)
ffh_vegetation = gpd.read_file(ffh_vegetation_shp)
spt_ag = gpd.read_file(spt_ag_shp)
spt_wk = gpd.read_file(spt_wk_shp)
tem_ss = gpd.read_file(tem_ss_shp)
thm_sh = gpd.read_file(thm_sh_shp)
socio_pk = gpd.read_file(socio_gpkg, driver="GPKG")
# fix column
res = []
for val in socio_pk["indice di vecchiaia"]:
try:
v = float(val.strip().replace(",", "."))
except Exception:
v = np.nan
res.append(v)
socio_pk["indice di vecchiaia"] = np.array(res, dtype=float)
raw = pd.concat(
[
co2_ab.loc[:, sez_cols],
prisk_scr.loc[:, prisk_raw_cols + prisk_nor_cols],
co2_ab.loc[:, co2_raw_cols + co2_nor_cols],
co2_em.loc[:, co2_em_raw_cols + co2_em_nor_cols + ems],
ffh_pa.loc[:, ffh_raw_cols + ffh_nor_cols],
spt_ag.loc[:, spt_ag_raw_cols + spt_ag_nor_cols],
spt_wk.loc[:, spt_wk_nor_cols],
tem_ss.loc[:, tem_raw_cols + tem_nor_cols],
thm_sh.loc[:, thm_nor_cols],
socio_pk.loc[:, socio_abs_cols + socio_perc_cols],
],
axis="columns",
)
raw.index = co2_ab["SEZ"]
socio_pk = gpd.read_file(socio_shp)
list_data = [
pm25_scr.loc[:, id_col + ['geometry'] + pm25_cols],
airq_scr.loc[:, id_col + airq_cols],
co2_ab.loc[:, id_col + co2_ab_cols],
co2_em.loc[:, id_col + co2_em_cols],
ffh_na.loc[:, id_col + ffh_na_cols],
ffh_vegetation.loc[:, id_col + ffh_vegetation_cols],
spt_ag.loc[:, id_col + spt_ag_cols],
tem_ss.loc[:, id_col + tem_ss_cols],
thm_sh.loc[:, id_col + thm_sh_cols],
socio_pk.loc[:, id_col + socio_cols],
]
raw = list_data[0]
for df in list_data[1:]:
raw = pd.merge(raw, df, on = id_col)
raw = raw.set_index(id_col, drop = False)
raw.index.name = None
# Remove error observation
mask = raw.index == 8888888
raw = raw[~ mask]
# rename columns
raw = raw.rename(columns = {id_col[0]:'SEZ'})
# set not valid values to Nan
raw = raw.replace(88888, np.nan)
return raw
# %%
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment