Skip to content
Snippets Groups Projects
Commit af4dd43f authored by Claudio Zandonella's avatar Claudio Zandonella
Browse files

update bolzano analysis

parent 137ee551
Branches main
No related tags found
No related merge requests found
No preview for this file type
No preview for this file type
No preview for this file type
......@@ -67,17 +67,21 @@ print(pre_scores["hopkins"].describe())
# select a preprocessing
pre_key = (
"s:MaxAbsScaler()|w:true|d:DictionaryLearning(alpha=0.1, max_iter=2000, n_jobs=-1, random_state=2023)" # try-1
# "s:MinMaxScaler()|w:true|d:DictionaryLearning(alpha=0.1, max_iter=2000, n_jobs=-1, random_state=2023)" # try-2
# "s:Normalizer(norm='max')|w:true|d:DictionaryLearning(alpha=0.1, max_iter=2000, n_jobs=-1, random_state=2023)" # try-3
# "s:Normalizer(norm='l1')|w:true|d:DictionaryLearning(alpha=0.1, max_iter=2000, n_jobs=-1, random_state=2023)" # try-4
# "s:QuantileTransformer(random_state=2023)|w:true|d:DictionaryLearning(alpha=0.1, max_iter=2000, n_jobs=-1, random_state=2023)" # try-5
# "s:Normalizer()|w:true|d:DictionaryLearning(alpha=0.1, max_iter=2000, n_jobs=-1, random_state=2023)" # try-6
# "s:RobustScaler(quantile_range=(20, 80))|w:true|d:DictionaryLearning(alpha=0.1, fit_algorithm='cd', max_iter=2000, n_jobs=-1, random_state=2023)" # try-7
# "s:Normalizer(norm='max')|w:false|d:DictionaryLearning(alpha=0.1, max_iter=2000, n_jobs=-1, random_state=2023)" # try-8
# "s:Normalizer()|w:false|d:DictionaryLearning(alpha=0.1, max_iter=2000, n_jobs=-1, random_state=2023)" # try-1
"s:MinMaxScaler()|w:true|d:DictionaryLearning(alpha=0.1, max_iter=2000, n_jobs=-1, random_state=2023)" # try-8
# "s:Normalizer(norm='max')|w:false|d:DictionaryLearning(alpha=0.1, max_iter=2000, n_jobs=-1, random_state=2023)" # try-9
# "s:MaxAbsScaler()|w:false|d:DictionaryLearning(alpha=0.1, fit_algorithm='cd', max_iter=2000, n_jobs=-1, random_state=2023)" # try-11
# "s:MinMaxScaler()|w:true|d:DictionaryLearning(alpha=0.1, fit_algorithm='cd', max_iter=2000, n_jobs=-1, random_state=2023)" # try-12
# "s:MaxAbsScaler()|w:true|d:DictionaryLearning(alpha=0.1, max_iter=2000, n_jobs=-1, random_state=2023)" # try-13
# "s:RobustScaler(quantile_range=(20, 80))|w:false|d:DictionaryLearning(alpha=0.1, max_iter=2000, n_jobs=-1, random_state=2023)" # try-2
# "s:None|w:true|d:None" # try-3
# "s:None|w:true|d:PCA(random_state=2023)" # try-4
# "s:None|w:false|d:None" # try-5
# "s:None|w:true|d:DictionaryLearning(alpha=0.1, max_iter=2000, n_jobs=-1, random_state=2023)" # try-6
# "s:Normalizer(norm='max')|w:true|d:DictionaryLearning(alpha=0.1, fit_algorithm='cd', max_iter=2000, n_jobs=-1, random_state=2023)" # try-7
# "s:Normalizer()|w:true|d:DictionaryLearning(alpha=0.1, fit_algorithm='cd', max_iter=2000, n_jobs=-1, random_state=2023)" # try-10
)
if pre_key not in preprocs.keys():
......@@ -165,19 +169,21 @@ else:
# the ID can be taken from the `clscorefile`
sel_clsts = [
# try-1
# "hdbscan__mcs-10_ms-00_m-braycurtis_csm-eom", # 8 clst 84.558824 0.961501 0.097361 7967.110672
"hdbscan__mcs-10_ms-00_m-euclidean_csm-eom" # 9 clst 91.911765 0.916117 0.488055 1082.154275
# # try-2
# "hdbscan__mcs-10_ms-00_m-braycurtis_csm-eom", # 8 clst 84.926471 0.938385 0.306445 1210.865543
# "hdbscan__mcs-10_ms-05_m-chebyshev_csm-eom" # 9 clst 89.705882 0.921724 0.379492 2048.702598
# # try-1
# "hdbscan__mcs-15_ms-00_m-braycurtis_csm-eom", # 6 clst 80.514706 0.953342 0.097007 7.043505e+02
# "hdbscan__mcs-10_ms-00_m-sqeuclidean_csm-eom", # 7 clst 83.455882 0.998107 0.006016 1.868399e+06
# "hdbscan__mcs-10_ms-00_m-correlation_csm-eom" # 8 clst 94.852941 0.879734 0.991096 2.381818e+02
# # try-8
# "hdbscan__mcs-10_ms-00_m-braycurtis_csm-eom", # 5 clst 86.397059 0.991400 0.068170 38957.400453
# "hdbscan__mcs-07_ms-00_m-euclidean_csm-eom", # 6 clst 90.073529 0.991330 0.067792 34901.021320
# "hdbscan__mcs-07_ms-03_m-euclidean_csm-eom", # 7 clst 92.647059 0.989539 0.074449 25493.584344
# "hdbscan__mcs-15_ms-00_m-braycurtis_csm-eom", # 9 clst 83.088235 0.923182 0.161301 1956.750249
# "hdbscan__mcs-15_ms-00_m-euclidean_csm-eom", # 9 clst 83.455882 0.916627 0.228929 1498.443317
# "hdbscan__mcs-15_ms-02_m-correlation_csm-eom", # 9 clst 86.764706 0.871355 0.612731 416.349157
# "hdbscan__mcs-12_ms-00_m-euclidean_csm-eom", #10 clst 87.867647 0.920420 0.150891 2789.805233
"hdbscan__mcs-12_ms-02_m-braycurtis_csm-eom" #10 clst 92.279412 0.863256 0.559498 503.105857
# # try-9
# "hdbscan__mcs-12_ms-00_m-chebyshev_csm-eom" , # 7 clst 86.397059 0.990940 0.061372 38540.093865
# "hdbscan__mcs-10_ms-00_m-braycurtis_csm-eom" # 8 clst 90.441176 0.989184 0.065709 30695.363781
]
......
......@@ -50,7 +50,7 @@ ffh_na_cols = ["conn_n"]
spt_ag_shp = my_paths.rawdata_dir / "Spatial" / "accessibility_urban_green_areas.shp"
# Define columns and other constant values
spt_ag_cols = ["area_800_n"]
spt_ag_cols = ["area_400_n"]
#---- Temporal ----
......@@ -88,14 +88,14 @@ socio_cols = [
cols_dict = {
# Air quality
'airq':{
"pm_25_n" : "pm 2.5 concentrations [µg/m3]",
"pm_25_n" : "PM2.5 concentrations [µg/m3]",
"risk_1_n" : "Area low AQ-risk [%]",
"risk_2_n" : "Area medium AQ-risk [%]",
"risk_3_n" : "Area high AQ-risk [%]"
},
# Carbon
'carbon':{
"c_ab_n" : "Carbon absorption vegetation [kg CO2/m2]",
"c_ab_n" : "Carbon absorption vegetation [tC/ha]",
"c_em_n" : "Carbon emission building [ton CO2/m²]"
},
# FFH
......@@ -105,7 +105,7 @@ cols_dict = {
},
# Spatial
'spatial':{
"area_800_n" : "Accessibility urban green areas (<10 min) [%]",
"area_400_n" : "Accessibility urban green areas (<5 min) [%]",
},
# Temporal
'temporal':{"s_area_n" : "Soil sealing between 2022 and 2018 [%]"},
......
......@@ -58,6 +58,8 @@ import pandas as pd
import re
import matplotlib.pyplot as plt
from IPython.display import display, Latex
# Custom modules
from justclust.data.{city} import cols, conv, read_data, selected_col
import justclust.paths.paths as paths
......@@ -135,7 +137,7 @@ summary_cls_frequency = qmd.get_summary_clst_frequency(selected)
```{graph_open}python{graph_close}
#| output: asis
print(
f'The urban area of {city} is formed by {graph_open}n_units_total{graph_close} territorial units.',
f'The urban area of {city.title()} is formed by {graph_open}n_units_total{graph_close} territorial units.',
'However, some territorial units are not included in the analysis due to the presence of missing data (e.g., socio demographic characteristics).',
'Although some spatial units are excluded from this analysis, they may be of interest for NbS planning.',
f'The analysis covers {graph_open}n_units{graph_close} territorial units ({graph_open}n_units/n_units_total*100:.2f{graph_close}% of the total; see @fig-territorial-units).')
......@@ -162,10 +164,11 @@ In the next sections, descriptive statistics of the territorial units characteri
"""
descriptive_feature_airq = f"""
### Air Quality
### Air Quality Justice
The air quality is here estimated considering the distance from different types of roads, which are one of the main air pollutant sources and the street canyons or the possibility to disperse pollutants.
Values regarding pollution risk are presented in @fig-p-risk and summarized in Table \\ref{graph_open}tbl-p-risk{graph_close}.
Furthermore, it is considered the value of PM2.5 concentrations provided for all Europe by EEA. PM2.5 concentrations derive from different sources such as vehicles, smelters, power plants, industrial facilities, residential fireplaces and wood stoves, agricultural burning and forest fires.
Values regarding air pollution risk are presented in @fig-p-risk and summarized in Table \\ref{graph_open}tbl-p-risk{graph_close}.
```{graph_open}python{graph_close}
#| output: asis
......@@ -190,7 +193,7 @@ qmd.plot_summary(selected[selected_col_airq])
"""
descriptive_feature_carbon = f"""
### Carbon
### Carbon Justice
Values regarding carbon emission and absorption are presented in @fig-carbon and summarized in Table \\ref{graph_open}tbl-carbon{graph_close}.
```{graph_open}python{graph_close}
......@@ -222,6 +225,7 @@ qmd.plot_summary(
### Unit Characteristics about other (in)justice components
Beyond the level of air quality and carbon (in)justices, this report includes other indicators related to spatial, temporal, and thermal (in)justice components. Furthermore, indicators related to Flora, Fauna & Habitat inclusion and on the main socioeconomic features are used.
During the validation process, some issues were raised concerning the heat stress zones (thermal justice) and other indicators used; for a deep understanding of the limits and further considerations related to the indicators, please see the tables included in Chapter 5 of Deliverable 2.3.
Values regarding these territorial unit characteristics are presented in @fig-unit and summarized in Table \\ref{graph_open}tbl-unit{graph_close}.
```{graph_open}python{graph_close}
......@@ -394,7 +398,9 @@ The selected clusters were obtained using the following settings:
cluster_results = f"""
## Cluster Results {graph_open}#sec-cluster-results{graph_close}
In this section, the results of the cluster analysis are presented. In @fig-cluster-map-all, the territorial units are colored according to their assigned cluster. Note that `Z` is used to indicate the *outliers* that were not assigned to any cluster and it should not be considered a cluster group per se.
In this section, the results of the cluster analysis are presented.
Each cluster represents one ecological & socio-economic status and disparities profile.
In @fig-cluster-map-all, the territorial units are colored according to their assigned cluster. Note that `Z` is used to indicate the *outliers* that were not assigned to any cluster and it should not be considered a cluster group per se.
```{graph_open}python{graph_close}
#| fig-align: center
......@@ -495,7 +501,7 @@ qmd.plot_clst_boxplot_loop(
"""
cluster_results_char = f"""
- **Unit Characteristics**
- **Unit Characteristics about other (in)justice components**
```{graph_open}python{graph_close}
#| fig-align: center
......@@ -546,7 +552,7 @@ qmd.plot_clst_boxplot_loop(
cluster_results_overview = f"""
### Overview Clusters
To provide an overview of the cluster characteristics, we consider cluster mean and median values. In @fig-mean-heatmap, cluster mean values are reported for each variable and colored according to the difference with the average mean among clusters (`Avg mean`). In @fig-cluster-comp-mean, the cluster mean value is standardized with respect to the average mean among clusters for each variable). That is,
To provide an overview of the cluster characteristics, we consider cluster mean and median values. In @fig-mean-heatmap, cluster mean values are reported for each variable and colored according to the difference with the average mean among clusters (`Avg mean`). In @fig-cluster-comp-mean, the cluster mean value is standardized with respect to the average mean among clusters for each variable. That is,
$$
\\frac{graph_open}\\bar{graph_open}x{graph_close}_i - \\bar{graph_open}x{graph_close}_{graph_open}Avg{graph_close}{graph_close}{graph_open}\\bar{graph_open}x{graph_close}_{graph_open}Std{graph_close}{graph_close}
......@@ -657,9 +663,9 @@ for cluster_lab in cluster_labels.values():
graph_par_open = "{graph_open}"
graph_par_close = "{graph_close}"
print(f'\\subsubsection{graph_open}graph_par_open{graph_close}Cluster {graph_open}cluster_lab{graph_close} (n = {graph_open}selected.loc[mask].shape[0]{graph_close}){graph_open}graph_par_close{graph_close}')
display(Latex(f'\\subsubsection{graph_open}graph_par_open{graph_close}Cluster {graph_open}cluster_lab{graph_close} (n = {graph_open}selected.loc[mask].shape[0]{graph_close}){graph_open}graph_par_close{graph_close}'))
print('\\center')
display(Latex('\\center'))
qmd.plot_map_single_cluster(
data_plot=selected,
mask = mask,
......@@ -667,7 +673,7 @@ for cluster_lab in cluster_labels.values():
cluster_colors=cluster_colors
)
print('\\\\')
display(Latex('\\\\'))
qmd.plot_cluster_comp_single(
dict_stat=dict_stat,
......@@ -677,7 +683,7 @@ for cluster_lab in cluster_labels.values():
sorted = True
)
print('\\clearpage')
display(Latex('\\clearpage'))
qmd.plot_boxplot_comp_cluster_loop(
data_cluster = data_cluster,
......@@ -690,7 +696,7 @@ for cluster_lab in cluster_labels.values():
figsize = (20,25)
)
print('\\clearpage')
display(Latex('\\clearpage'))
```
......@@ -837,16 +843,10 @@ def table_latex(
def cluster_color_dict(cluster_labels):
n_clusters = len(cluster_labels)
if 'Z' not in cluster_labels.values():
n_clusters = n_clusters + 1
palette = list(sns.color_palette("colorblind", n_colors=max(8, n_clusters)))
gray_color = palette[7]
palette.remove(gray_color)
palette = palette[0: n_clusters-1] + [gray_color]
palette = list(sns.color_palette("Paired", n_colors= n_clusters))
gray_color = (0.5803921568627451, 0.5803921568627451, 0.5803921568627451)
res = {
label:rgb for label, rgb in zip(cluster_labels.values(), palette)
label:(rgb if label != 'Z' else gray_color) for label, rgb in zip(cluster_labels.values(), palette)
}
return res
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment