Skip to content

Commit

Permalink
small fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
GiacomoAru committed Nov 12, 2023
1 parent eb55cd6 commit e4feafa
Show file tree
Hide file tree
Showing 3 changed files with 36 additions and 4 deletions.
31 changes: 31 additions & 0 deletions TASK_1/incidents_understanding_preparation.py
Original file line number Diff line number Diff line change
Expand Up @@ -1957,6 +1957,31 @@ def max_min_value(attribute): # FIXME: convertire in float, escludere <= 122 e >
# %%
from TASK_1.data_preparation_utils import set_tags_consistent_data

CASTING = {'latitude':'Float64',
'longitude':'Float64',
'participant_age1':'Int64',
'min_age_participants':'Int64',
'avg_age_participants':'Int64',
'max_age_participants':'Int64',
'n_participants_child':'Int64',
'n_participants_teen':'Int64',
'n_participants_adult':'Int64',
'n_males':'Int64',
'n_females':'Int64',
'n_killed':'Int64',
'n_injured':'Int64',
'n_arrested':'Int64',
'n_unharmed':'Int64',
'n_participants':'Int64',
'year':'Int64',
'month':'Int64',
'day':'Int64',
'day_of_week':'Int64',
'location_importance':'Float64',
'state_house_district':'Int64',
'state_senate_district':'Int64',
'congressional_district':'Int64'
}
if LOAD_DATA_FROM_CHECKPOINT:
with zipfile.ZipFile('checkpoints/checkpoint_6.csv.zip', 'r') as zip_ref:
zip_ref.extractall('checkpoints/')
Expand All @@ -1970,6 +1995,12 @@ def max_min_value(attribute): # FIXME: convertire in float, escludere <= 122 e >
incidents_df = incidents_df.apply(lambda row: check_characteristics_consistency(row), axis=1)
save_checkpoint(incidents_df, 'checkpoint_6')

# %%
incidents_df.sample(10, random_state=1)

# %%
pd.DataFrame(data=incidents_df.dtypes).T

# %%
incidents_df['tag_consistency'].value_counts()

Expand Down
7 changes: 4 additions & 3 deletions TASK_1/indicators.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# -*- coding: utf-8 -*-
# %%
import pandas as pd
import numpy as np
Expand All @@ -7,7 +8,7 @@
import seaborn as sns
sys.path.append(os.path.abspath('..'))
from plot_utils import *
%matplotlib inline
# %matplotlib inline

# %%
incidents_df = pd.read_csv('../data/incidents_cleaned.csv')
Expand Down Expand Up @@ -171,7 +172,7 @@ def log_normalization(df, new_df, columns):

# %% [markdown]
# La trasformazione logaritmica serve a rendere i dati meno sparsi, e in questo caso è utilizzata con il proposito opposto...
#
#
# Non possiamo trasformare dei dati poco significanti in dati significanti in questo modo, attenzione e io consiglierei di non utilizzare il logaritmo per i valori tra [0,1]

# %%
Expand Down Expand Up @@ -431,7 +432,7 @@ def update_legend_marker_size(handle, orig):
# - uccisi, feriti ecc.. rispetto alla media, con norm. logaritmica
# - rapporto degli uccisi/totali o feriti/totali dell'incidente (magari sostituiti)
# - entropie pazzerelle (su tutti i tag o combinazioni di tag)
#
#

# %%

Expand Down
2 changes: 1 addition & 1 deletion html/incidents_per_day.html

Large diffs are not rendered by default.

0 comments on commit e4feafa

Please sign in to comment.