update bolzano analysis

af4dd43f · Claudio Zandonella · 137ee551 · af4dd43f · af4dd43f · af4dd43f
Commit af4dd43f authored 1 year ago by Claudio Zandonella
--- a/data/raw/bolzano/Air quality/pm_25_concentration.shp
+++ b/data/raw/bolzano/Air quality/pm_25_concentration.shp
--- a/data/raw/bolzano/Carbon/carbon_absorption.shp
+++ b/data/raw/bolzano/Carbon/carbon_absorption.shp
--- a/data/raw/bolzano/Thermal/heat_stress_zones.shp
+++ b/data/raw/bolzano/Thermal/heat_stress_zones.shp
--- a/justclust/analysis/bolzano.py
+++ b/justclust/analysis/bolzano.py
@@ -67,17 +67,21 @@ print(pre_scores["hopkins"].describe())
 # select a preprocessing
 pre_key = (
+    # "s:Normalizer()|w:false|d:DictionaryLearning(alpha=0.1, max_iter=2000, n_jobs=-1, random_state=2023)" # try-1
-    "s:MaxAbsScaler()|w:true|d:DictionaryLearning(alpha=0.1, max_iter=2000, n_jobs=-1, random_state=2023)" # try-1
+    "s:MinMaxScaler()|w:true|d:DictionaryLearning(alpha=0.1, max_iter=2000, n_jobs=-1, random_state=2023)" # try-8
-    # "s:MinMaxScaler()|w:true|d:DictionaryLearning(alpha=0.1, max_iter=2000, n_jobs=-1, random_state=2023)" # try-2
+    # "s:Normalizer(norm='max')|w:false|d:DictionaryLearning(alpha=0.1, max_iter=2000, n_jobs=-1, random_state=2023)" # try-9
+    # "s:MaxAbsScaler()|w:false|d:DictionaryLearning(alpha=0.1, fit_algorithm='cd', max_iter=2000, n_jobs=-1, random_state=2023)" # try-11
-    # "s:Normalizer(norm='max')|w:true|d:DictionaryLearning(alpha=0.1, max_iter=2000, n_jobs=-1, random_state=2023)" # try-3
+    # "s:MinMaxScaler()|w:true|d:DictionaryLearning(alpha=0.1, fit_algorithm='cd', max_iter=2000, n_jobs=-1, random_state=2023)" # try-12
-    # "s:Normalizer(norm='l1')|w:true|d:DictionaryLearning(alpha=0.1, max_iter=2000, n_jobs=-1, random_state=2023)" # try-4
+    # "s:MaxAbsScaler()|w:true|d:DictionaryLearning(alpha=0.1, max_iter=2000, n_jobs=-1, random_state=2023)" # try-13
-    # "s:QuantileTransformer(random_state=2023)|w:true|d:DictionaryLearning(alpha=0.1, max_iter=2000, n_jobs=-1, random_state=2023)" # try-5
-    # "s:Normalizer()|w:true|d:DictionaryLearning(alpha=0.1, max_iter=2000, n_jobs=-1, random_state=2023)" # try-6
-    # "s:RobustScaler(quantile_range=(20, 80))|w:true|d:DictionaryLearning(alpha=0.1, fit_algorithm='cd', max_iter=2000, n_jobs=-1, random_state=2023)" # try-7
+    # "s:RobustScaler(quantile_range=(20, 80))|w:false|d:DictionaryLearning(alpha=0.1, max_iter=2000, n_jobs=-1, random_state=2023)" # try-2
+    # "s:None|w:true|d:None" # try-3
-    # "s:Normalizer(norm='max')|w:false|d:DictionaryLearning(alpha=0.1, max_iter=2000, n_jobs=-1, random_state=2023)" # try-8
+    # "s:None|w:true|d:PCA(random_state=2023)" # try-4
+    # "s:None|w:false|d:None" # try-5
+    # "s:None|w:true|d:DictionaryLearning(alpha=0.1, max_iter=2000, n_jobs=-1, random_state=2023)" # try-6
+    # "s:Normalizer(norm='max')|w:true|d:DictionaryLearning(alpha=0.1, fit_algorithm='cd', max_iter=2000, n_jobs=-1, random_state=2023)" # try-7
+    # "s:Normalizer()|w:true|d:DictionaryLearning(alpha=0.1, fit_algorithm='cd', max_iter=2000, n_jobs=-1, random_state=2023)" # try-10
 )
 if pre_key not in preprocs.keys():
@@ -165,19 +169,21 @@ else:
 # the ID can be taken from the `clscorefile`
 sel_clsts = [
-    # try-1
+    # # try-1
-    # "hdbscan__mcs-10_ms-00_m-braycurtis_csm-eom", #  8 clst 84.558824   0.961501        0.097361        7967.110672
+    # "hdbscan__mcs-15_ms-00_m-braycurtis_csm-eom",  # 6 clst 80.514706    0.953342        0.097007       7.043505e+02
-    "hdbscan__mcs-10_ms-00_m-euclidean_csm-eom"   # 9 clst  91.911765   0.916117        0.488055        1082.154275
+    # "hdbscan__mcs-10_ms-00_m-sqeuclidean_csm-eom", # 7 clst 83.455882    0.998107        0.006016       1.868399e+06
+    # "hdbscan__mcs-10_ms-00_m-correlation_csm-eom"  # 8 clst 94.852941    0.879734        0.991096       2.381818e+02
-    # # try-2
-    # "hdbscan__mcs-10_ms-00_m-braycurtis_csm-eom", #  8 clst 84.926471   0.938385        0.306445        1210.865543
-    # "hdbscan__mcs-10_ms-05_m-chebyshev_csm-eom"   #  9 clst 89.705882   0.921724        0.379492        2048.702598
    # # try-8
-    # "hdbscan__mcs-10_ms-00_m-braycurtis_csm-eom", # 5 clst 86.397059    0.991400        0.068170       38957.400453
+    # "hdbscan__mcs-15_ms-00_m-braycurtis_csm-eom",  # 9 clst 83.088235    0.923182        0.161301        1956.750249
-    # "hdbscan__mcs-07_ms-00_m-euclidean_csm-eom",  # 6 clst 90.073529    0.991330        0.067792       34901.021320
+    # "hdbscan__mcs-15_ms-00_m-euclidean_csm-eom",   # 9 clst 83.455882    0.916627        0.228929        1498.443317
-    # "hdbscan__mcs-07_ms-03_m-euclidean_csm-eom",  # 7 clst 92.647059    0.989539        0.074449       25493.584344
+    # "hdbscan__mcs-15_ms-02_m-correlation_csm-eom", # 9 clst 86.764706    0.871355        0.612731         416.349157
+    # "hdbscan__mcs-12_ms-00_m-euclidean_csm-eom",   #10 clst 87.867647    0.920420        0.150891        2789.805233
+    "hdbscan__mcs-12_ms-02_m-braycurtis_csm-eom"   #10 clst 92.279412    0.863256        0.559498         503.105857
+    # # try-9
+    # "hdbscan__mcs-12_ms-00_m-chebyshev_csm-eom" ,  # 7 clst 86.397059    0.990940        0.061372       38540.093865
+    # "hdbscan__mcs-10_ms-00_m-braycurtis_csm-eom"   # 8 clst 90.441176    0.989184        0.065709       30695.363781
 ]

--- a/justclust/data/bolzano.py
+++ b/justclust/data/bolzano.py
@@ -50,7 +50,7 @@ ffh_na_cols = ["conn_n"]
 spt_ag_shp = my_paths.rawdata_dir / "Spatial" / "accessibility_urban_green_areas.shp"
 # Define columns and other constant values
-spt_ag_cols = ["area_800_n"]
+spt_ag_cols = ["area_400_n"]
 #----    Temporal    ----
@@ -88,14 +88,14 @@ socio_cols = [
 cols_dict = {
    # Air quality
    'airq':{
-        "pm_25_n" : "pm 2.5 concentrations [µg/m3]",
+        "pm_25_n" : "PM2.5 concentrations [µg/m3]",
        "risk_1_n" : "Area low AQ-risk [%]",
        "risk_2_n" : "Area medium AQ-risk [%]",
        "risk_3_n" : "Area high AQ-risk [%]"
        },
    # Carbon
    'carbon':{
-        "c_ab_n" : "Carbon absorption vegetation [kg CO2/m2]",
+        "c_ab_n" : "Carbon absorption vegetation [tC/ha]",
        "c_em_n" : "Carbon emission building [ton CO2/m²]"
        },
    # FFH
@@ -105,7 +105,7 @@ cols_dict = {
        },
    # Spatial
    'spatial':{
-        "area_800_n" : "Accessibility urban green areas (<10 min) [%]",
+        "area_400_n" : "Accessibility urban green areas (<5 min) [%]",
        },
    # Temporal
    'temporal':{"s_area_n" : "Soil sealing between 2022 and 2018 [%]"},

--- a/justclust/quarto.py
+++ b/justclust/quarto.py
@@ -58,6 +58,8 @@ import pandas as pd
 import re
 import matplotlib.pyplot as plt
+from IPython.display import display, Latex
 # Custom modules
 from justclust.data.{city} import cols, conv, read_data, selected_col
 import justclust.paths.paths as paths
@@ -135,7 +137,7 @@ summary_cls_frequency = qmd.get_summary_clst_frequency(selected)
 ```{graph_open}python{graph_close}
 #| output: asis
 print(
-    f'The urban area of {city} is formed by {graph_open}n_units_total{graph_close} territorial units.',
+    f'The urban area of {city.title()} is formed by {graph_open}n_units_total{graph_close} territorial units.',
    'However, some territorial units are not included in the analysis due to the presence of missing data (e.g., socio demographic characteristics).',
    'Although some spatial units are excluded from this analysis, they may be of interest for NbS planning.',
    f'The analysis covers {graph_open}n_units{graph_close} territorial units ({graph_open}n_units/n_units_total*100:.2f{graph_close}% of the total; see @fig-territorial-units).')
@@ -162,10 +164,11 @@ In the next sections, descriptive statistics of the territorial units characteri
 """
    descriptive_feature_airq = f"""
-###  Air Quality
+###  Air Quality Justice
 The air quality is here estimated considering the distance from different types of roads, which are one of the main air pollutant sources and the street canyons or the possibility to disperse pollutants.
-Values regarding pollution risk are presented in @fig-p-risk and summarized in Table \\ref{graph_open}tbl-p-risk{graph_close}.
+Furthermore, it is considered the value of PM2.5 concentrations provided for all Europe by EEA. PM2.5 concentrations derive from different sources such as vehicles, smelters, power plants, industrial facilities, residential fireplaces and wood stoves, agricultural burning and forest fires.
+Values regarding air pollution risk are presented in @fig-p-risk and summarized in Table \\ref{graph_open}tbl-p-risk{graph_close}.
 ```{graph_open}python{graph_close}
 #| output: asis
@@ -190,7 +193,7 @@ qmd.plot_summary(selected[selected_col_airq])
 """
    descriptive_feature_carbon = f"""
-###  Carbon
+###  Carbon Justice
 Values regarding carbon emission and absorption are presented in @fig-carbon and summarized in Table \\ref{graph_open}tbl-carbon{graph_close}.
 ```{graph_open}python{graph_close}
@@ -222,6 +225,7 @@ qmd.plot_summary(
 ###  Unit Characteristics about other (in)justice components
 Beyond the level of air quality and carbon (in)justices, this report includes other indicators related to spatial, temporal, and thermal (in)justice components. Furthermore, indicators related to Flora, Fauna & Habitat inclusion and on the main socioeconomic features are used.
+During the validation process, some issues were raised concerning the heat stress zones (thermal justice) and other indicators used; for a deep understanding of the limits and further considerations related to the indicators, please see the tables included in Chapter 5 of Deliverable 2.3. 
 Values regarding these territorial unit characteristics are presented in @fig-unit and summarized in Table \\ref{graph_open}tbl-unit{graph_close}.
 ```{graph_open}python{graph_close}
@@ -394,7 +398,9 @@ The selected clusters were obtained using the following settings:
    cluster_results = f"""
 ## Cluster Results {graph_open}#sec-cluster-results{graph_close}
-In this section, the results of the cluster analysis are presented. In @fig-cluster-map-all, the territorial units are colored according to their assigned cluster. Note that `Z` is used to indicate the *outliers* that were not assigned to any cluster and it should not be considered a cluster group per se. 
+In this section, the results of the cluster analysis are presented. 
+Each cluster represents one ecological & socio-economic status and disparities profile.
+In @fig-cluster-map-all, the territorial units are colored according to their assigned cluster. Note that `Z` is used to indicate the *outliers* that were not assigned to any cluster and it should not be considered a cluster group per se. 
 ```{graph_open}python{graph_close}
 #| fig-align: center
@@ -495,7 +501,7 @@ qmd.plot_clst_boxplot_loop(
 """
    cluster_results_char = f"""
- **Unit Characteristics**
+- **Unit Characteristics about other (in)justice components**
 ```{graph_open}python{graph_close}
 #| fig-align: center
@@ -546,7 +552,7 @@ qmd.plot_clst_boxplot_loop(
    cluster_results_overview = f"""
 ### Overview Clusters
-To provide an overview of the cluster characteristics, we consider cluster mean and median values. In @fig-mean-heatmap, cluster mean values are reported for each variable and colored according to the difference with the average mean among clusters (`Avg mean`). In @fig-cluster-comp-mean, the cluster mean value is standardized with respect to the average mean among clusters for each variable). That is,
+To provide an overview of the cluster characteristics, we consider cluster mean and median values. In @fig-mean-heatmap, cluster mean values are reported for each variable and colored according to the difference with the average mean among clusters (`Avg mean`). In @fig-cluster-comp-mean, the cluster mean value is standardized with respect to the average mean among clusters for each variable. That is,
 $$
 \\frac{graph_open}\\bar{graph_open}x{graph_close}_i  - \\bar{graph_open}x{graph_close}_{graph_open}Avg{graph_close}{graph_close}{graph_open}\\bar{graph_open}x{graph_close}_{graph_open}Std{graph_close}{graph_close}
@@ -657,9 +663,9 @@ for cluster_lab in cluster_labels.values():
    graph_par_open = "{graph_open}"
    graph_par_close = "{graph_close}"
-    print(f'\\subsubsection{graph_open}graph_par_open{graph_close}Cluster {graph_open}cluster_lab{graph_close} (n = {graph_open}selected.loc[mask].shape[0]{graph_close}){graph_open}graph_par_close{graph_close}')
+    display(Latex(f'\\subsubsection{graph_open}graph_par_open{graph_close}Cluster {graph_open}cluster_lab{graph_close} (n = {graph_open}selected.loc[mask].shape[0]{graph_close}){graph_open}graph_par_close{graph_close}'))
-    print('\\center')
+    display(Latex('\\center'))
    qmd.plot_map_single_cluster(
        data_plot=selected,
        mask = mask,
@@ -667,7 +673,7 @@ for cluster_lab in cluster_labels.values():
        cluster_colors=cluster_colors
    )
-    print('\\\\')
+    display(Latex('\\\\'))
    qmd.plot_cluster_comp_single(
        dict_stat=dict_stat,
@@ -677,7 +683,7 @@ for cluster_lab in cluster_labels.values():
        sorted = True
    )
-    print('\\clearpage')
+    display(Latex('\\clearpage'))
    qmd.plot_boxplot_comp_cluster_loop(
        data_cluster = data_cluster,
@@ -690,7 +696,7 @@ for cluster_lab in cluster_labels.values():
        figsize = (20,25)
        )
-    print('\\clearpage')
+    display(Latex('\\clearpage'))
 ```
@@ -837,16 +843,10 @@ def table_latex(
 def cluster_color_dict(cluster_labels):
    n_clusters = len(cluster_labels)
-    if 'Z' not in cluster_labels.values():
+    palette = list(sns.color_palette("Paired", n_colors= n_clusters))
-        n_clusters = n_clusters + 1
+    gray_color = (0.5803921568627451, 0.5803921568627451, 0.5803921568627451)
-    palette = list(sns.color_palette("colorblind", n_colors=max(8, n_clusters)))
-    gray_color = palette[7]
-    palette.remove(gray_color)
-    palette =  palette[0: n_clusters-1] + [gray_color]
    res = {
-        label:rgb for label, rgb in zip(cluster_labels.values(), palette)
+        label:(rgb if label != 'Z' else gray_color) for label, rgb in zip(cluster_labels.values(), palette) 
    }
    return res