Skip to content
Snippets Groups Projects
Commit 90744cad authored by Claudio Zandonella's avatar Claudio Zandonella
Browse files

small fixes

parent 5b4aacef
Branches main
No related tags found
No related merge requests found
......@@ -27,11 +27,6 @@ raw = read_data()
selected = raw.loc[:, cols].dropna()
selected.rename(columns=conv, inplace=True)
# %%
# Remove error observation
mask = selected.index == 8888888
selected = selected[~ mask]
# %%
# Explore clustering pre-processing
......@@ -77,7 +72,7 @@ pre_key = (
"s:Normalizer()|"
"w:true|"
"d:DictionaryLearning(alpha=0.1, fit_algorithm='cd', n_jobs=-1)"
"d:DictionaryLearning(alpha=0.1, fit_algorithm='cd')"
# "s:QuantileTransformer()|w:false|d:DictionaryLearning(alpha=0.1, fit_algorithm='cd', n_jobs=-1)"
)
......@@ -102,6 +97,7 @@ if my_paths.clfile.exists():
fi = pd.read_excel(my_paths.fimpfile, header=0, index_col=0)
else:
print(f"File: {my_paths.clfile} does not exist. Start computing the clusters")
print(f"Preprocessing: {pre_key}")
geo_out = raw.loc[:, sez_cols]
models = get_algclusters(preprocs=pre)
......
......@@ -7,14 +7,22 @@ import justclust.paths.paths as paths
my_paths = paths.define_paths(city = 'bolzano')
# %%
# columns selected for the columns of the outputs
sez_cols = [
"SEZ", "geometry",
"SEZ2011", "COD_REG", "COD_ISTAT", "PRO_COM"
]
id_col = ['SEZ']
#---- Air Quality ----
# Define available files
aq_scr_shp = my_paths.rawdata_dir / "Air quality" / "street_canyon_risk.shp"
prisk_scr_shp = my_paths.rawdata_dir / "Air quality" / "street_canyon_risk.shp"
# Define columns and other constant values
aq_raw_cols = ["Area_r1", "Area_r2", "Area_r3", "Area_r4", "Area_r6", "Area_r9"]
aq_nor_cols = [
prisk_raw_cols = ["Area_r1", "Area_r2", "Area_r3", "Area_r4", "Area_r6", "Area_r9"]
prisk_nor_cols = [
"P_Area_r1",
"P_Area_r2",
"P_Area_r3",
......@@ -39,9 +47,6 @@ ems = ["gasolio_em", "gpl_em", "ee_em", "metano_em", "legno_em", "tot_em", "c_ee
co2_em_raw_cols = ["finale"]
co2_em_nor_cols = ["finale/m²"]
# columns selected for the columns of the outputs
sez_cols = ["geometry", "SEZ", "SEZ2011", "COD_REG", "COD_ISTAT", "PRO_COM"]
#---- FFH ----
# Define available files
......@@ -116,13 +121,13 @@ socio_perc_cols = [
cols_dict = {
# Air quality
'air_q':{
"P_Area_r1": "area very-low P-risk [%]",
"P_Area_r2": "area low P-risk [%]",
"P_Area_r3": "area medium-low P-risk [%]",
"P_Area_r4": "area medium-high P-risk [%]",
"P_Area_r6": "area high P-risk [%]",
"P_Area_r9": "area very-high P-risk [%]"
'prisk':{
"P_Area_r1": "Area very-low P-risk [%]",
"P_Area_r2": "Area low P-risk [%]",
"P_Area_r3": "Area medium-low P-risk [%]",
"P_Area_r4": "Area medium-high P-risk [%]",
"P_Area_r6": "Area high P-risk [%]",
"P_Area_r9": "Area very-high P-risk [%]"
},
# Carbon
'carbon':{
......@@ -165,7 +170,7 @@ conv = {k:v for values in cols_dict.values() for k,v in values.items()}
# define subset of columns according to categories
selected_col = {
'col_aq': list(cols_dict.get('air_q').values()),
'col_prisk': list(cols_dict.get('prisk').values()),
'col_carbon':list(cols_dict.get('carbon').values()),
'col_char':\
list(cols_dict.get('ffh').values())+\
......@@ -181,7 +186,7 @@ selected_col = {
def read_data():
aq_scr = gpd.read_file(aq_scr_shp)
prisk_scr = gpd.read_file(prisk_scr_shp)
co2_ab = gpd.read_file(co2_ab_shp)
co2_ab["SEZ"] = co2_ab["SEZ"].astype(int)
co2_ab["kgCO2/m²"] = co2_ab["CO2"] / co2_ab.area
......@@ -207,7 +212,7 @@ def read_data():
raw = pd.concat(
[
co2_ab.loc[:, sez_cols],
aq_scr.loc[:, aq_raw_cols + aq_nor_cols],
prisk_scr.loc[:, prisk_raw_cols + prisk_nor_cols],
co2_ab.loc[:, co2_raw_cols + co2_nor_cols],
co2_em.loc[:, co2_em_raw_cols + co2_em_nor_cols + ems],
ffh_pa.loc[:, ffh_raw_cols + ffh_nor_cols],
......@@ -221,4 +226,9 @@ def read_data():
)
raw.index = co2_ab["SEZ"]
raw.index.name = None
# Remove error observation
mask = raw.index == 8888888
raw = raw[~ mask]
return raw
......@@ -32,7 +32,7 @@ def get_preprocs(selected=None, apply_weight=None):
pre.StandardScaler(with_mean=True, with_std=True),
pre.Normalizer(norm="l2"),
pre.Normalizer(norm="l1"),
pre.Normalizer(norm="max"),
pre.Normalizer(norm="max"), # create sparse matrixes that lead to error in fast ica https://github.com/scikit-learn/scikit-learn/issues/2089#issuecomment-1126183560
pre.QuantileTransformer(
n_quantiles=1000,
output_distribution="uniform",
......@@ -81,11 +81,11 @@ def get_preprocs(selected=None, apply_weight=None):
dec.PCA(),
dec.PCA(n_components="mle", svd_solver="full"),
dec.PCA(svd_solver="randomized", whiten=True),
dec.FastICA(whiten="warn"),
dec.FastICA(whiten="unit-variance"),
dec.FactorAnalysis(max_iter=200),
dec.DictionaryLearning(fit_algorithm="lars", alpha=0.1, n_jobs=-1),
dec.DictionaryLearning(fit_algorithm="cd", alpha=0.1, n_jobs=-1),
dec.FastICA(whiten="warn", max_iter= 500),
dec.FastICA(whiten="unit-variance", max_iter= 500),
dec.FactorAnalysis(max_iter=500),
dec.DictionaryLearning(fit_algorithm="lars", alpha=0.1, n_jobs=None),
dec.DictionaryLearning(fit_algorithm="cd", alpha=0.1, n_jobs=None),
]
preprocs = {
f"s:{s}|w:{'true' if w else 'false'}|d:{d}": (s, w, d)
......
......@@ -311,7 +311,11 @@ def plot_profiles2(
row = floor(c / ncols)
col = floor(c - (row * ncols))
print(f"cl: {cl}, row: {row}, col: {col}")
ax1 = axes[row][col]
if nrows > 1:
ax1 = axes[row][col]
else:
ax1 = axes[col]
data = [clf[vname][cl] for vname in vnames]
......@@ -530,7 +534,12 @@ def plot_stats(
for c, cl in enumerate(cl_cat):
irow = floor(c / ncols)
icol = floor(c - (irow * ncols))
ax = axes[irow][icol]
if nrows > 1:
ax = axes[irow][icol]
else:
ax = axes[icol]
light_bg = is_light(*cmap[cl], hsp_threshold=hsp_threshold)
sdf = stdfs[cl]
if idx_order is None:
......
This diff is collapsed.
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment