diff --git a/plots/distance_per_operator_stacked.png b/plots/distance_per_operator_stacked.png index b8ee987..27ede72 100644 Binary files a/plots/distance_per_operator_stacked.png and b/plots/distance_per_operator_stacked.png differ diff --git a/plots/duration_per_operator_stacked.png b/plots/duration_per_operator_stacked.png index e073007..804e9c4 100644 Binary files a/plots/duration_per_operator_stacked.png and b/plots/duration_per_operator_stacked.png differ diff --git a/src/plotsCodes/distance_per_operator_stacked.py b/src/plotsCodes/distance_per_operator_stacked.py index 901a82d..d5419c4 100644 --- a/src/plotsCodes/distance_per_operator_stacked.py +++ b/src/plotsCodes/distance_per_operator_stacked.py @@ -10,6 +10,7 @@ from datetime import datetime import matplotlib.pyplot as plt +import pandas as pd import numpy as np @@ -22,7 +23,21 @@ def plot_distance_per_operator_stacked(trips): bottom = np.zeros(len(years)) operators = past_trips["Operator"].copy() - operators_sorted = operators.value_counts().index.tolist() + all_operators = operators.unique().tolist() + operators_distance = [] + for operator in all_operators: + operators_distance.append( + past_trips.loc[operators == operator]["Distance (km)"].sum() + ) + + operators_distance = np.array(operators_distance) + operators_distance_df = pd.DataFrame( + {"Operator": all_operators, "Distance": operators_distance} + ) + + operators_sorted = operators_distance_df.sort_values( + by="Distance", ascending=False + ).Operator.tolist() operators_selected = operators_sorted[0:7] operators.loc[~operators.isin(operators_selected)] = "Others" operators_selected.append("Others") diff --git a/src/plotsCodes/duration_per_operator_stacked.py b/src/plotsCodes/duration_per_operator_stacked.py index 8d36da7..bcb07bf 100644 --- a/src/plotsCodes/duration_per_operator_stacked.py +++ b/src/plotsCodes/duration_per_operator_stacked.py @@ -11,6 +11,7 @@ import matplotlib.pyplot as plt import numpy as np +import pandas as pd # Plot of km travelled by train operator @@ -22,7 +23,21 @@ def plot_duration_per_operator_stacked(trips): bottom = np.zeros(len(years)) operators = past_trips["Operator"].copy() - operators_sorted = operators.value_counts().index.tolist() + all_operators = operators.unique().tolist() + operators_duration = [] + for operator in all_operators: + operators_duration.append( + past_trips.loc[operators == operator]["Duration"].sum().total_seconds() + ) + + operators_distance = np.array(operators_duration) + operators_distance_df = pd.DataFrame( + {"Operator": all_operators, "Distance": operators_distance} + ) + + operators_sorted = operators_distance_df.sort_values( + by="Distance", ascending=False + ).Operator.tolist() operators_selected = operators_sorted[0:7] operators.loc[~operators.isin(operators_selected)] = "Others" operators_selected.append("Others")