Skip to content
Snippets Groups Projects
Commit 4b972517 authored by Claudio Zandonella's avatar Claudio Zandonella
Browse files

change standardization for plot report using std and iqr

parent 0f867da4
Branches main
No related tags found
No related merge requests found
......@@ -549,10 +549,10 @@ qmd.plot_clst_boxplot_loop(
To provide an overview of the cluster characteristics, we consider cluster mean and median values. In @fig-mean-heatmap, cluster mean values are reported for each variable and colored according to the difference with the average mean among clusters (`Avg mean`). In @fig-cluster-comp-mean, the cluster mean value is standardized with respect to the average mean among clusters for each variable). That is,
$$
\\frac{graph_open}\\bar{graph_open}x{graph_close}_i - \\bar{graph_open}x{graph_close}_{graph_open}Avg{graph_close}{graph_close}{graph_open}\\bar{graph_open}x{graph_close}_{graph_open}Avg{graph_close}{graph_close}
\\frac{graph_open}\\bar{graph_open}x{graph_close}_i - \\bar{graph_open}x{graph_close}_{graph_open}Avg{graph_close}{graph_close}{graph_open}\\bar{graph_open}x{graph_close}_{graph_open}Std{graph_close}{graph_close}
$$
where $\\bar{graph_open}x{graph_close}_i$ is the mean value of a given variable for cluster $i$ and $\\bar{graph_open}x{graph_close}_{graph_open}Avg{graph_close}$ is the average mean among clusters (i.e, $\\frac{graph_open}\\sum_i{graph_open}\\bar{graph_open}x{graph_close}_i{graph_close}{graph_close}{graph_open}\\#i{graph_close}$).
where $\\bar{graph_open}x{graph_close}_i$ is the mean value of a given variable for cluster $i$, $\\bar{graph_open}x{graph_close}_{graph_open}Avg{graph_close}$ is the average mean among clusters (i.e, $\\frac{graph_open}\\sum_i{graph_open}\\bar{graph_open}x{graph_close}_i{graph_close}{graph_close}{graph_open}\\#i{graph_close}$), and $\\bar{graph_open}x{graph_close}_{graph_open}Std{graph_close}$ is the standard deviation of the cluster means (i.e, $Std(\\bar{graph_open}x{graph_close}_i)$).
```{graph_open}python{graph_close}
#| fig-align: center
......@@ -594,10 +594,10 @@ qmd.plot_cluster_comp_loop(
In @fig-median-heatmap, cluster median values are reported for each variable and colored according to the difference with the average median among clusters (`Avg median`). In @fig-cluster-comp-median, the cluster median value is standardized with respect to the average median among clusters for each variable). That is,
$$
\\frac{graph_open}\\tilde{graph_open}x{graph_close}_i - \\tilde{graph_open}x{graph_close}_{graph_open}Avg{graph_close}{graph_close}{graph_open}\\tilde{graph_open}x{graph_close}_{graph_open}Avg{graph_close}{graph_close}
\\frac{graph_open}\\tilde{graph_open}x{graph_close}_i - \\tilde{graph_open}x{graph_close}_{graph_open}Avg{graph_close}{graph_close}{graph_open}\\tilde{graph_open}x{graph_close}_{graph_open}IQR{graph_close}{graph_close}
$$
where $\\tilde{graph_open}x{graph_close}_i$ is the median value of a given variable for cluster $i$ and $\\tilde{graph_open}x{graph_close}_{graph_open}Avg{graph_close}$ is the average median among clusters (i.e, $\\frac{graph_open}\\sum_i{graph_open}\\tilde{graph_open}x{graph_close}_i{graph_close}{graph_close}{graph_open}\\#i{graph_close}$).
where $\\tilde{graph_open}x{graph_close}_i$ is the median value of a given variable for cluster $i$, $\\tilde{graph_open}x{graph_close}_{graph_open}Avg{graph_close}$ is the average median among clusters (i.e, $\\frac{graph_open}\\sum_i{graph_open}\\tilde{graph_open}x{graph_close}_i{graph_close}{graph_close}{graph_open}\\#i{graph_close}$), , and $\\tilde{graph_open}x{graph_close}_{graph_open}Avg{graph_close}$ is the interquantile range of the cluster medians (i.e, $IQR(\\tilde{graph_open}x{graph_close}_i)$)..
```{graph_open}python{graph_close}
......@@ -776,19 +776,25 @@ def get_dict_stat(
selected_grouped = data.groupby(['cluster_lab'])
cluster_means = selected_grouped.mean()
cluster_medians = selected_grouped.median()
overall_mean = cluster_means.mean()
ovarall_sd = cluster_means.std()
overall_median = cluster_medians.mean()
ovarall_iqr = cluster_medians.apply( # interqunaitle range
lambda x: np.round(np.percentile(x, 75) - np.percentile(x, 25),8),
axis=0)
res = {
'cluster_means':cluster_means.T,
'cluster_means_cent':cluster_means.T\
.sub(overall_mean, axis = 0)\
.divide(np.abs(overall_mean), axis = 0),
.divide(ovarall_sd, axis = 0),
'cluster_medians':cluster_medians.T,
'cluster_medians_cent':cluster_medians.T\
.sub(overall_median, axis = 0)\
.divide(np.abs(overall_median), axis = 0)
.divide(ovarall_iqr, axis = 0)
}
return res
......@@ -1374,6 +1380,7 @@ def plot_cluster_comp_single(
xmax=xmax,
)
axes[0].set(title='Mean')
axes[0].set(xlabel=r'$\frac{\bar{x}_i - \bar{x}_{Avg}}{\bar{x}_{std}}$')
plot_cluster_comp(
serie_plot=data_plot_median,
......@@ -1385,6 +1392,7 @@ def plot_cluster_comp_single(
xmax=xmax,
)
axes[1].set(title='Median')
axes[1].set(xlabel=r'$\frac{\tilde{x}_i - \tilde{x}_{Avg}}{\tilde{x}_{IQR}}$')
plt.show()
#---- plot_map_single_cluster ----
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment