small fixes

USA-Gun-Incidents · Nov 12, 2023 · e4feafa · e4feafa
1 parent eb55cd6
commit e4feafa
Show file tree

Hide file tree

Showing 3 changed files with 36 additions and 4 deletions.
diff --git a/TASK_1/incidents_understanding_preparation.py b/TASK_1/incidents_understanding_preparation.py
@@ -1957,6 +1957,31 @@ def max_min_value(attribute): # FIXME: convertire in float, escludere <= 122 e >
 # %%
 from TASK_1.data_preparation_utils import set_tags_consistent_data
 
+CASTING = {'latitude':'Float64',
+           'longitude':'Float64',
+           'participant_age1':'Int64',
+           'min_age_participants':'Int64',
+           'avg_age_participants':'Int64',
+           'max_age_participants':'Int64',
+           'n_participants_child':'Int64',
+           'n_participants_teen':'Int64',
+           'n_participants_adult':'Int64',
+           'n_males':'Int64',
+           'n_females':'Int64',
+           'n_killed':'Int64',
+           'n_injured':'Int64',
+           'n_arrested':'Int64',
+           'n_unharmed':'Int64',
+           'n_participants':'Int64',
+           'year':'Int64',
+           'month':'Int64',
+           'day':'Int64',
+           'day_of_week':'Int64',
+           'location_importance':'Float64',
+           'state_house_district':'Int64',
+           'state_senate_district':'Int64',
+           'congressional_district':'Int64'
+           }
 if LOAD_DATA_FROM_CHECKPOINT:
     with zipfile.ZipFile('checkpoints/checkpoint_6.csv.zip', 'r') as zip_ref:
         zip_ref.extractall('checkpoints/')
@@ -1970,6 +1995,12 @@ def max_min_value(attribute): # FIXME: convertire in float, escludere <= 122 e >
     incidents_df = incidents_df.apply(lambda row: check_characteristics_consistency(row), axis=1)
     save_checkpoint(incidents_df, 'checkpoint_6')
 
+# %%
+incidents_df.sample(10, random_state=1)
+
+# %%
+pd.DataFrame(data=incidents_df.dtypes).T
+
 # %%
 incidents_df['tag_consistency'].value_counts()
 

diff --git a/TASK_1/indicators.py b/TASK_1/indicators.py
@@ -1,3 +1,4 @@
+# -*- coding: utf-8 -*-
 # %%
 import pandas as pd
 import numpy as np
@@ -7,7 +8,7 @@
 import seaborn as sns
 sys.path.append(os.path.abspath('..'))
 from plot_utils import *
-%matplotlib inline
+# %matplotlib inline
 
 # %%
 incidents_df = pd.read_csv('../data/incidents_cleaned.csv')
@@ -171,7 +172,7 @@ def log_normalization(df, new_df, columns):
 
 # %% [markdown]
 # La trasformazione logaritmica serve a rendere i dati meno sparsi, e in questo caso è utilizzata con il proposito opposto... 
-# 
+#
 # Non possiamo trasformare dei dati poco significanti in dati significanti in questo modo, attenzione e io consiglierei di non utilizzare il logaritmo per i valori tra [0,1]
 
 # %%
@@ -431,7 +432,7 @@ def update_legend_marker_size(handle, orig):
 # - uccisi, feriti ecc.. rispetto alla media, con norm. logaritmica
 # - rapporto degli uccisi/totali o feriti/totali dell'incidente (magari sostituiti)
 # - entropie pazzerelle (su tutti i tag o combinazioni di tag)
-# 
+#
 
 # %%
 

diff --git a/html/incidents_per_day.html b/html/incidents_per_day.html