diff --git a/data/raw/bolzano/Air quality/pm_25_concentration.shp b/data/raw/bolzano/Air quality/pm_25_concentration.shp index a8947f5584ee6f245d1176c30f955117b8bbea7b..f37336c67c9731ac630551486778216b9a8833f7 100755 Binary files a/data/raw/bolzano/Air quality/pm_25_concentration.shp and b/data/raw/bolzano/Air quality/pm_25_concentration.shp differ diff --git a/data/raw/bolzano/Carbon/carbon_absorption.shp b/data/raw/bolzano/Carbon/carbon_absorption.shp index ef1764c08a0b580f7065d7b24c450d4ccdeb6665..d4dac4b965cc632c482b7048950afa084b728060 100755 Binary files a/data/raw/bolzano/Carbon/carbon_absorption.shp and b/data/raw/bolzano/Carbon/carbon_absorption.shp differ diff --git a/data/raw/bolzano/Thermal/heat_stress_zones.shp b/data/raw/bolzano/Thermal/heat_stress_zones.shp index ef1764c08a0b580f7065d7b24c450d4ccdeb6665..f37336c67c9731ac630551486778216b9a8833f7 100755 Binary files a/data/raw/bolzano/Thermal/heat_stress_zones.shp and b/data/raw/bolzano/Thermal/heat_stress_zones.shp differ diff --git a/justclust/analysis/bolzano.py b/justclust/analysis/bolzano.py index 8185ece5dc8646b5c67218ebb673512bcc5536c1..dc843f562ab1f6aadc470056a0618acfa767488b 100644 --- a/justclust/analysis/bolzano.py +++ b/justclust/analysis/bolzano.py @@ -67,17 +67,21 @@ print(pre_scores["hopkins"].describe()) # select a preprocessing pre_key = ( - - "s:MaxAbsScaler()|w:true|d:DictionaryLearning(alpha=0.1, max_iter=2000, n_jobs=-1, random_state=2023)" # try-1 - # "s:MinMaxScaler()|w:true|d:DictionaryLearning(alpha=0.1, max_iter=2000, n_jobs=-1, random_state=2023)" # try-2 - - # "s:Normalizer(norm='max')|w:true|d:DictionaryLearning(alpha=0.1, max_iter=2000, n_jobs=-1, random_state=2023)" # try-3 - # "s:Normalizer(norm='l1')|w:true|d:DictionaryLearning(alpha=0.1, max_iter=2000, n_jobs=-1, random_state=2023)" # try-4 - # "s:QuantileTransformer(random_state=2023)|w:true|d:DictionaryLearning(alpha=0.1, max_iter=2000, n_jobs=-1, random_state=2023)" # try-5 - # "s:Normalizer()|w:true|d:DictionaryLearning(alpha=0.1, max_iter=2000, n_jobs=-1, random_state=2023)" # try-6 - # "s:RobustScaler(quantile_range=(20, 80))|w:true|d:DictionaryLearning(alpha=0.1, fit_algorithm='cd', max_iter=2000, n_jobs=-1, random_state=2023)" # try-7 - - # "s:Normalizer(norm='max')|w:false|d:DictionaryLearning(alpha=0.1, max_iter=2000, n_jobs=-1, random_state=2023)" # try-8 + # "s:Normalizer()|w:false|d:DictionaryLearning(alpha=0.1, max_iter=2000, n_jobs=-1, random_state=2023)" # try-1 + "s:MinMaxScaler()|w:true|d:DictionaryLearning(alpha=0.1, max_iter=2000, n_jobs=-1, random_state=2023)" # try-8 + # "s:Normalizer(norm='max')|w:false|d:DictionaryLearning(alpha=0.1, max_iter=2000, n_jobs=-1, random_state=2023)" # try-9 + # "s:MaxAbsScaler()|w:false|d:DictionaryLearning(alpha=0.1, fit_algorithm='cd', max_iter=2000, n_jobs=-1, random_state=2023)" # try-11 + # "s:MinMaxScaler()|w:true|d:DictionaryLearning(alpha=0.1, fit_algorithm='cd', max_iter=2000, n_jobs=-1, random_state=2023)" # try-12 + # "s:MaxAbsScaler()|w:true|d:DictionaryLearning(alpha=0.1, max_iter=2000, n_jobs=-1, random_state=2023)" # try-13 + + + # "s:RobustScaler(quantile_range=(20, 80))|w:false|d:DictionaryLearning(alpha=0.1, max_iter=2000, n_jobs=-1, random_state=2023)" # try-2 + # "s:None|w:true|d:None" # try-3 + # "s:None|w:true|d:PCA(random_state=2023)" # try-4 + # "s:None|w:false|d:None" # try-5 + # "s:None|w:true|d:DictionaryLearning(alpha=0.1, max_iter=2000, n_jobs=-1, random_state=2023)" # try-6 + # "s:Normalizer(norm='max')|w:true|d:DictionaryLearning(alpha=0.1, fit_algorithm='cd', max_iter=2000, n_jobs=-1, random_state=2023)" # try-7 + # "s:Normalizer()|w:true|d:DictionaryLearning(alpha=0.1, fit_algorithm='cd', max_iter=2000, n_jobs=-1, random_state=2023)" # try-10 ) if pre_key not in preprocs.keys(): @@ -165,19 +169,21 @@ else: # the ID can be taken from the `clscorefile` sel_clsts = [ - # try-1 - # "hdbscan__mcs-10_ms-00_m-braycurtis_csm-eom", # 8 clst 84.558824 0.961501 0.097361 7967.110672 - "hdbscan__mcs-10_ms-00_m-euclidean_csm-eom" # 9 clst 91.911765 0.916117 0.488055 1082.154275 - - # # try-2 - # "hdbscan__mcs-10_ms-00_m-braycurtis_csm-eom", # 8 clst 84.926471 0.938385 0.306445 1210.865543 - # "hdbscan__mcs-10_ms-05_m-chebyshev_csm-eom" # 9 clst 89.705882 0.921724 0.379492 2048.702598 + # # try-1 + # "hdbscan__mcs-15_ms-00_m-braycurtis_csm-eom", # 6 clst 80.514706 0.953342 0.097007 7.043505e+02 + # "hdbscan__mcs-10_ms-00_m-sqeuclidean_csm-eom", # 7 clst 83.455882 0.998107 0.006016 1.868399e+06 + # "hdbscan__mcs-10_ms-00_m-correlation_csm-eom" # 8 clst 94.852941 0.879734 0.991096 2.381818e+02 # # try-8 - # "hdbscan__mcs-10_ms-00_m-braycurtis_csm-eom", # 5 clst 86.397059 0.991400 0.068170 38957.400453 - # "hdbscan__mcs-07_ms-00_m-euclidean_csm-eom", # 6 clst 90.073529 0.991330 0.067792 34901.021320 - # "hdbscan__mcs-07_ms-03_m-euclidean_csm-eom", # 7 clst 92.647059 0.989539 0.074449 25493.584344 - + # "hdbscan__mcs-15_ms-00_m-braycurtis_csm-eom", # 9 clst 83.088235 0.923182 0.161301 1956.750249 + # "hdbscan__mcs-15_ms-00_m-euclidean_csm-eom", # 9 clst 83.455882 0.916627 0.228929 1498.443317 + # "hdbscan__mcs-15_ms-02_m-correlation_csm-eom", # 9 clst 86.764706 0.871355 0.612731 416.349157 + # "hdbscan__mcs-12_ms-00_m-euclidean_csm-eom", #10 clst 87.867647 0.920420 0.150891 2789.805233 + "hdbscan__mcs-12_ms-02_m-braycurtis_csm-eom" #10 clst 92.279412 0.863256 0.559498 503.105857 + + # # try-9 + # "hdbscan__mcs-12_ms-00_m-chebyshev_csm-eom" , # 7 clst 86.397059 0.990940 0.061372 38540.093865 + # "hdbscan__mcs-10_ms-00_m-braycurtis_csm-eom" # 8 clst 90.441176 0.989184 0.065709 30695.363781 ] diff --git a/justclust/data/bolzano.py b/justclust/data/bolzano.py index 00fbf62e4818e1c9aff89ef43b79b28ef318620a..844f7688933004d5fbfc7040b5f9e611d8a5f886 100644 --- a/justclust/data/bolzano.py +++ b/justclust/data/bolzano.py @@ -50,7 +50,7 @@ ffh_na_cols = ["conn_n"] spt_ag_shp = my_paths.rawdata_dir / "Spatial" / "accessibility_urban_green_areas.shp" # Define columns and other constant values -spt_ag_cols = ["area_800_n"] +spt_ag_cols = ["area_400_n"] #---- Temporal ---- @@ -88,14 +88,14 @@ socio_cols = [ cols_dict = { # Air quality 'airq':{ - "pm_25_n" : "pm 2.5 concentrations [µg/m3]", + "pm_25_n" : "PM2.5 concentrations [µg/m3]", "risk_1_n" : "Area low AQ-risk [%]", "risk_2_n" : "Area medium AQ-risk [%]", "risk_3_n" : "Area high AQ-risk [%]" }, # Carbon 'carbon':{ - "c_ab_n" : "Carbon absorption vegetation [kg CO2/m2]", + "c_ab_n" : "Carbon absorption vegetation [tC/ha]", "c_em_n" : "Carbon emission building [ton CO2/m²]" }, # FFH @@ -105,7 +105,7 @@ cols_dict = { }, # Spatial 'spatial':{ - "area_800_n" : "Accessibility urban green areas (<10 min) [%]", + "area_400_n" : "Accessibility urban green areas (<5 min) [%]", }, # Temporal 'temporal':{"s_area_n" : "Soil sealing between 2022 and 2018 [%]"}, diff --git a/justclust/quarto.py b/justclust/quarto.py index 589238401a7dde4730d50fbd97ae8ab4957dc2e8..44031b57a6801a027eb1df8f8e8fe59b79364fcc 100644 --- a/justclust/quarto.py +++ b/justclust/quarto.py @@ -58,6 +58,8 @@ import pandas as pd import re import matplotlib.pyplot as plt +from IPython.display import display, Latex + # Custom modules from justclust.data.{city} import cols, conv, read_data, selected_col import justclust.paths.paths as paths @@ -135,7 +137,7 @@ summary_cls_frequency = qmd.get_summary_clst_frequency(selected) ```{graph_open}python{graph_close} #| output: asis print( - f'The urban area of {city} is formed by {graph_open}n_units_total{graph_close} territorial units.', + f'The urban area of {city.title()} is formed by {graph_open}n_units_total{graph_close} territorial units.', 'However, some territorial units are not included in the analysis due to the presence of missing data (e.g., socio demographic characteristics).', 'Although some spatial units are excluded from this analysis, they may be of interest for NbS planning.', f'The analysis covers {graph_open}n_units{graph_close} territorial units ({graph_open}n_units/n_units_total*100:.2f{graph_close}% of the total; see @fig-territorial-units).') @@ -162,10 +164,11 @@ In the next sections, descriptive statistics of the territorial units characteri """ descriptive_feature_airq = f""" -### Air Quality +### Air Quality Justice The air quality is here estimated considering the distance from different types of roads, which are one of the main air pollutant sources and the street canyons or the possibility to disperse pollutants. -Values regarding pollution risk are presented in @fig-p-risk and summarized in Table \\ref{graph_open}tbl-p-risk{graph_close}. +Furthermore, it is considered the value of PM2.5 concentrations provided for all Europe by EEA. PM2.5 concentrations derive from different sources such as vehicles, smelters, power plants, industrial facilities, residential fireplaces and wood stoves, agricultural burning and forest fires. +Values regarding air pollution risk are presented in @fig-p-risk and summarized in Table \\ref{graph_open}tbl-p-risk{graph_close}. ```{graph_open}python{graph_close} #| output: asis @@ -190,7 +193,7 @@ qmd.plot_summary(selected[selected_col_airq]) """ descriptive_feature_carbon = f""" -### Carbon +### Carbon Justice Values regarding carbon emission and absorption are presented in @fig-carbon and summarized in Table \\ref{graph_open}tbl-carbon{graph_close}. ```{graph_open}python{graph_close} @@ -222,6 +225,7 @@ qmd.plot_summary( ### Unit Characteristics about other (in)justice components Beyond the level of air quality and carbon (in)justices, this report includes other indicators related to spatial, temporal, and thermal (in)justice components. Furthermore, indicators related to Flora, Fauna & Habitat inclusion and on the main socioeconomic features are used. +During the validation process, some issues were raised concerning the heat stress zones (thermal justice) and other indicators used; for a deep understanding of the limits and further considerations related to the indicators, please see the tables included in Chapter 5 of Deliverable 2.3. Values regarding these territorial unit characteristics are presented in @fig-unit and summarized in Table \\ref{graph_open}tbl-unit{graph_close}. ```{graph_open}python{graph_close} @@ -394,7 +398,9 @@ The selected clusters were obtained using the following settings: cluster_results = f""" ## Cluster Results {graph_open}#sec-cluster-results{graph_close} -In this section, the results of the cluster analysis are presented. In @fig-cluster-map-all, the territorial units are colored according to their assigned cluster. Note that `Z` is used to indicate the *outliers* that were not assigned to any cluster and it should not be considered a cluster group per se. +In this section, the results of the cluster analysis are presented. +Each cluster represents one ecological & socio-economic status and disparities profile. +In @fig-cluster-map-all, the territorial units are colored according to their assigned cluster. Note that `Z` is used to indicate the *outliers* that were not assigned to any cluster and it should not be considered a cluster group per se. ```{graph_open}python{graph_close} #| fig-align: center @@ -495,7 +501,7 @@ qmd.plot_clst_boxplot_loop( """ cluster_results_char = f""" -- **Unit Characteristics** +- **Unit Characteristics about other (in)justice components** ```{graph_open}python{graph_close} #| fig-align: center @@ -546,7 +552,7 @@ qmd.plot_clst_boxplot_loop( cluster_results_overview = f""" ### Overview Clusters -To provide an overview of the cluster characteristics, we consider cluster mean and median values. In @fig-mean-heatmap, cluster mean values are reported for each variable and colored according to the difference with the average mean among clusters (`Avg mean`). In @fig-cluster-comp-mean, the cluster mean value is standardized with respect to the average mean among clusters for each variable). That is, +To provide an overview of the cluster characteristics, we consider cluster mean and median values. In @fig-mean-heatmap, cluster mean values are reported for each variable and colored according to the difference with the average mean among clusters (`Avg mean`). In @fig-cluster-comp-mean, the cluster mean value is standardized with respect to the average mean among clusters for each variable. That is, $$ \\frac{graph_open}\\bar{graph_open}x{graph_close}_i - \\bar{graph_open}x{graph_close}_{graph_open}Avg{graph_close}{graph_close}{graph_open}\\bar{graph_open}x{graph_close}_{graph_open}Std{graph_close}{graph_close} @@ -657,9 +663,9 @@ for cluster_lab in cluster_labels.values(): graph_par_open = "{graph_open}" graph_par_close = "{graph_close}" - print(f'\\subsubsection{graph_open}graph_par_open{graph_close}Cluster {graph_open}cluster_lab{graph_close} (n = {graph_open}selected.loc[mask].shape[0]{graph_close}){graph_open}graph_par_close{graph_close}') + display(Latex(f'\\subsubsection{graph_open}graph_par_open{graph_close}Cluster {graph_open}cluster_lab{graph_close} (n = {graph_open}selected.loc[mask].shape[0]{graph_close}){graph_open}graph_par_close{graph_close}')) - print('\\center') + display(Latex('\\center')) qmd.plot_map_single_cluster( data_plot=selected, mask = mask, @@ -667,7 +673,7 @@ for cluster_lab in cluster_labels.values(): cluster_colors=cluster_colors ) - print('\\\\') + display(Latex('\\\\')) qmd.plot_cluster_comp_single( dict_stat=dict_stat, @@ -677,7 +683,7 @@ for cluster_lab in cluster_labels.values(): sorted = True ) - print('\\clearpage') + display(Latex('\\clearpage')) qmd.plot_boxplot_comp_cluster_loop( data_cluster = data_cluster, @@ -690,7 +696,7 @@ for cluster_lab in cluster_labels.values(): figsize = (20,25) ) - print('\\clearpage') + display(Latex('\\clearpage')) ``` @@ -837,16 +843,10 @@ def table_latex( def cluster_color_dict(cluster_labels): n_clusters = len(cluster_labels) - if 'Z' not in cluster_labels.values(): - n_clusters = n_clusters + 1 - - - palette = list(sns.color_palette("colorblind", n_colors=max(8, n_clusters))) - gray_color = palette[7] - palette.remove(gray_color) - palette = palette[0: n_clusters-1] + [gray_color] + palette = list(sns.color_palette("Paired", n_colors= n_clusters)) + gray_color = (0.5803921568627451, 0.5803921568627451, 0.5803921568627451) res = { - label:rgb for label, rgb in zip(cluster_labels.values(), palette) + label:(rgb if label != 'Z' else gray_color) for label, rgb in zip(cluster_labels.values(), palette) } return res