From 0d0d2bd48e3eedbf66b0ac9d2825f1ccd0ba302d Mon Sep 17 00:00:00 2001 From: atmorling Date: Thu, 1 Aug 2024 13:14:53 +0200 Subject: [PATCH] adding bar_chart (#222) --- ecoscope/plotting/__init__.py | 11 +----- ecoscope/plotting/plot.py | 50 +++++++++++++++++++++++++ tests/test_ecoplot.py | 69 ++++++++++++++++++++++++++++++++++- 3 files changed, 120 insertions(+), 10 deletions(-) diff --git a/ecoscope/plotting/__init__.py b/ecoscope/plotting/__init__.py index 23d6dd37..490b8d73 100644 --- a/ecoscope/plotting/__init__.py +++ b/ecoscope/plotting/__init__.py @@ -6,14 +6,7 @@ nsd, plot_seasonal_dist, speed, + stacked_bar_chart, ) -__all__ = [ - "EcoPlotData", - "add_seasons", - "ecoplot", - "mcp", - "nsd", - "speed", - "plot_seasonal_dist", -] +__all__ = ["EcoPlotData", "add_seasons", "ecoplot", "mcp", "nsd", "speed", "plot_seasonal_dist", "stacked_bar_chart"] diff --git a/ecoscope/plotting/plot.py b/ecoscope/plotting/plot.py index 15632036..c3fb780a 100644 --- a/ecoscope/plotting/plot.py +++ b/ecoscope/plotting/plot.py @@ -268,3 +268,53 @@ def plot_seasonal_dist(ndvi_vals, cuts, bandwidth=0.05, output_file=None): if output_file: fig.write_image(output_file) return fig + + +def stacked_bar_chart(data: EcoPlotData, agg_function: str, stack_column: str, layout_kwargs: dict = None): + """ + Creates a stacked bar chart from the provided EcoPlotData object + Parameters + ---------- + data: ecoscope.Plotting.EcoPlotData + The data to plot, counts categorical data.y_col values for data.x_col + agg_function: str + The pandas.Dataframe.aggregate() function to run ie; 'count', 'sum' + stack_column: str + The name of the column in the data to build stacks from, should be categorical + layout_kwargs: dict + Additional kwargs passed to plotly.go.Figure(layout) + Returns + ------- + fig : plotly.graph_objects.Figure + The plotly bar chart + """ + # TODO cleanup EPD defaults + data.style.pop("mode") + + fig = go.Figure(layout=layout_kwargs) + + x_axis_name = data.x_col + y_axis_name = data.y_col + + agg = ( + data.grouped[y_axis_name] + .agg(agg_function) + .to_frame(agg_function) + .unstack(fill_value=0) + .stack(future_stack=True) + .reset_index() + ) + + x = agg[x_axis_name].unique() + for category in agg[stack_column].unique(): + fig.add_trace( + go.Bar( + x=x, + y=list(agg[agg[stack_column] == category][agg_function]), + name=str(category), + **{**data.style, **data.groupby_style[category]}, + ) + ) + + fig.update_layout(barmode="stack") + return fig diff --git a/tests/test_ecoplot.py b/tests/test_ecoplot.py index 16f3e42e..af3ccf6e 100644 --- a/tests/test_ecoplot.py +++ b/tests/test_ecoplot.py @@ -1,5 +1,6 @@ import numpy as np -from ecoscope.plotting.plot import EcoPlotData, ecoplot, mcp, nsd, speed +import pandas as pd +from ecoscope.plotting.plot import EcoPlotData, ecoplot, mcp, nsd, speed, stacked_bar_chart from ecoscope.base import Trajectory @@ -45,3 +46,69 @@ def test_speed(movebank_relocations): assert len(figure.data) == 1 len(figure.data[0].x) == len(traj) * 4 len(figure.data[0].y) == len(traj) * 4 + + +def test_stacked_bar_chart_categorical(): + df = pd.DataFrame( + { + "id": [1, 2, 3, 4], + "category": ["A", "B", "B", "B"], + "time": ["2024-07-22", "2024-07-22", "2024-07-22", "2024-07-21"], + } + ) + df.set_index("id", inplace=True) + + groupby_style = {"A": {"marker_color": "red"}, "B": {"marker_color": "blue"}} + style = {"marker_line_color": "black", "xperiodalignment": "middle"} + layout_kwargs = {"plot_bgcolor": "gray", "xaxis_dtick": 86400000} + + gb = df.groupby(["time", "category"]) + epd = EcoPlotData(gb, "time", "category", groupby_style=groupby_style, **style) + chart = stacked_bar_chart(epd, agg_function="count", stack_column="category", layout_kwargs=layout_kwargs) + + # we should have 2 categorical buckets + assert len(chart.data) == 2 + assert chart.data[0].name == "A" + assert chart.data[1].name == "B" + # Should be the count of A and B for our 2 dates + assert chart.data[0].y == (0, 1) + assert chart.data[1].y == (1, 2) + # validate style kwargs + assert chart.layout.plot_bgcolor == "gray" + assert chart.data[0].xperiodalignment == chart.data[1].xperiodalignment == "middle" + assert chart.data[0].marker.line.color == chart.data[1].marker.line.color == "black" + assert chart.data[0].marker.color == "red" + assert chart.data[1].marker.color == "blue" + + +def test_stacked_bar_chart_numerical(): + df = pd.DataFrame( + { + "id": [1, 2, 3, 4], + "category": ["A", "B", "B", "B"], + "value": [25, 40, 65, 150], + "time": ["2024-07-22", "2024-07-22", "2024-07-22", "2024-07-21"], + } + ) + df.set_index("id", inplace=True) + + groupby_style = {"A": {"marker_color": "yellow"}, "B": {"marker_color": "green"}} + style = {"marker_line_color": "black", "xperiodalignment": "middle"} + layout_kwargs = {"plot_bgcolor": "gray", "xaxis_dtick": 86400000} + + gb = df.groupby(["time", "category"]) + epd = EcoPlotData(gb, "time", "value", groupby_style=groupby_style, **style) + chart = stacked_bar_chart(epd, agg_function="sum", stack_column="category", layout_kwargs=layout_kwargs) + + # we should have 2 categorical buckets + assert len(chart.data) == 2 + assert chart.data[0].name == "A" + assert chart.data[1].name == "B" + # Should be the the sum of values by A and B over time + assert chart.data[0].y == (0, 25) + assert chart.data[1].y == (150, 105) + # validate style kwargs + assert chart.layout.plot_bgcolor == "gray" + assert chart.data[0].xperiodalignment == chart.data[1].xperiodalignment == "middle" + assert chart.data[0].marker.line.color == chart.data[1].marker.line.color == "black" + assert chart.data[1].marker.color == "green"