Skip to content

Commit

Permalink
adding bar_chart (#222)
Browse files Browse the repository at this point in the history
  • Loading branch information
atmorling authored Aug 1, 2024
1 parent 791b163 commit 0d0d2bd
Show file tree
Hide file tree
Showing 3 changed files with 120 additions and 10 deletions.
11 changes: 2 additions & 9 deletions ecoscope/plotting/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,7 @@
nsd,
plot_seasonal_dist,
speed,
stacked_bar_chart,
)

__all__ = [
"EcoPlotData",
"add_seasons",
"ecoplot",
"mcp",
"nsd",
"speed",
"plot_seasonal_dist",
]
__all__ = ["EcoPlotData", "add_seasons", "ecoplot", "mcp", "nsd", "speed", "plot_seasonal_dist", "stacked_bar_chart"]
50 changes: 50 additions & 0 deletions ecoscope/plotting/plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -268,3 +268,53 @@ def plot_seasonal_dist(ndvi_vals, cuts, bandwidth=0.05, output_file=None):
if output_file:
fig.write_image(output_file)
return fig


def stacked_bar_chart(data: EcoPlotData, agg_function: str, stack_column: str, layout_kwargs: dict = None):
"""
Creates a stacked bar chart from the provided EcoPlotData object
Parameters
----------
data: ecoscope.Plotting.EcoPlotData
The data to plot, counts categorical data.y_col values for data.x_col
agg_function: str
The pandas.Dataframe.aggregate() function to run ie; 'count', 'sum'
stack_column: str
The name of the column in the data to build stacks from, should be categorical
layout_kwargs: dict
Additional kwargs passed to plotly.go.Figure(layout)
Returns
-------
fig : plotly.graph_objects.Figure
The plotly bar chart
"""
# TODO cleanup EPD defaults
data.style.pop("mode")

fig = go.Figure(layout=layout_kwargs)

x_axis_name = data.x_col
y_axis_name = data.y_col

agg = (
data.grouped[y_axis_name]
.agg(agg_function)
.to_frame(agg_function)
.unstack(fill_value=0)
.stack(future_stack=True)
.reset_index()
)

x = agg[x_axis_name].unique()
for category in agg[stack_column].unique():
fig.add_trace(
go.Bar(
x=x,
y=list(agg[agg[stack_column] == category][agg_function]),
name=str(category),
**{**data.style, **data.groupby_style[category]},
)
)

fig.update_layout(barmode="stack")
return fig
69 changes: 68 additions & 1 deletion tests/test_ecoplot.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import numpy as np
from ecoscope.plotting.plot import EcoPlotData, ecoplot, mcp, nsd, speed
import pandas as pd
from ecoscope.plotting.plot import EcoPlotData, ecoplot, mcp, nsd, speed, stacked_bar_chart
from ecoscope.base import Trajectory


Expand Down Expand Up @@ -45,3 +46,69 @@ def test_speed(movebank_relocations):
assert len(figure.data) == 1
len(figure.data[0].x) == len(traj) * 4
len(figure.data[0].y) == len(traj) * 4


def test_stacked_bar_chart_categorical():
df = pd.DataFrame(
{
"id": [1, 2, 3, 4],
"category": ["A", "B", "B", "B"],
"time": ["2024-07-22", "2024-07-22", "2024-07-22", "2024-07-21"],
}
)
df.set_index("id", inplace=True)

groupby_style = {"A": {"marker_color": "red"}, "B": {"marker_color": "blue"}}
style = {"marker_line_color": "black", "xperiodalignment": "middle"}
layout_kwargs = {"plot_bgcolor": "gray", "xaxis_dtick": 86400000}

gb = df.groupby(["time", "category"])
epd = EcoPlotData(gb, "time", "category", groupby_style=groupby_style, **style)
chart = stacked_bar_chart(epd, agg_function="count", stack_column="category", layout_kwargs=layout_kwargs)

# we should have 2 categorical buckets
assert len(chart.data) == 2
assert chart.data[0].name == "A"
assert chart.data[1].name == "B"
# Should be the count of A and B for our 2 dates
assert chart.data[0].y == (0, 1)
assert chart.data[1].y == (1, 2)
# validate style kwargs
assert chart.layout.plot_bgcolor == "gray"
assert chart.data[0].xperiodalignment == chart.data[1].xperiodalignment == "middle"
assert chart.data[0].marker.line.color == chart.data[1].marker.line.color == "black"
assert chart.data[0].marker.color == "red"
assert chart.data[1].marker.color == "blue"


def test_stacked_bar_chart_numerical():
df = pd.DataFrame(
{
"id": [1, 2, 3, 4],
"category": ["A", "B", "B", "B"],
"value": [25, 40, 65, 150],
"time": ["2024-07-22", "2024-07-22", "2024-07-22", "2024-07-21"],
}
)
df.set_index("id", inplace=True)

groupby_style = {"A": {"marker_color": "yellow"}, "B": {"marker_color": "green"}}
style = {"marker_line_color": "black", "xperiodalignment": "middle"}
layout_kwargs = {"plot_bgcolor": "gray", "xaxis_dtick": 86400000}

gb = df.groupby(["time", "category"])
epd = EcoPlotData(gb, "time", "value", groupby_style=groupby_style, **style)
chart = stacked_bar_chart(epd, agg_function="sum", stack_column="category", layout_kwargs=layout_kwargs)

# we should have 2 categorical buckets
assert len(chart.data) == 2
assert chart.data[0].name == "A"
assert chart.data[1].name == "B"
# Should be the the sum of values by A and B over time
assert chart.data[0].y == (0, 25)
assert chart.data[1].y == (150, 105)
# validate style kwargs
assert chart.layout.plot_bgcolor == "gray"
assert chart.data[0].xperiodalignment == chart.data[1].xperiodalignment == "middle"
assert chart.data[0].marker.line.color == chart.data[1].marker.line.color == "black"
assert chart.data[1].marker.color == "green"

0 comments on commit 0d0d2bd

Please sign in to comment.