Skip to content

Commit

Permalink
Allow classifier to label ranges (#269)
Browse files Browse the repository at this point in the history
  • Loading branch information
atmorling authored Oct 31, 2024
1 parent daf746f commit 0058c6e
Show file tree
Hide file tree
Showing 3 changed files with 66 additions and 5 deletions.
36 changes: 33 additions & 3 deletions ecoscope/analysis/classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,19 +25,36 @@

# pass in a dataframe and output a series
def apply_classification(
dataframe, input_column_name, output_column_name=None, labels=None, scheme="natural_breaks", **kwargs
dataframe,
input_column_name,
output_column_name=None,
labels=None,
scheme="natural_breaks",
label_prefix="",
label_suffix="",
label_ranges=False,
label_decimals=1,
**kwargs,
):
"""
Classifies the data in a GeoDataFrame column using specified classification scheme.
Classifies the data in a DataFrame column using specified classification scheme.
Args:
dataframe (pd.DatFrame): The data.
input_column_name (str): The dataframe column to classify.
output_column_name (str): The dataframe column that will contain the classification.
output_column_names (str): The dataframe column that will contain the classification.
Defaults to "<input_column_name>_classified"
labels (list[str]): labels of bins, use bin edges if labels==None.
scheme (str): Classification scheme to use [equal_interval, natural_breaks, quantile, std_mean, max_breaks,
fisher_jenks]
label_prefix (str): Prepends provided string to each label
label_suffix (str): Appends provided string to each label
label_ranges (bool): Applicable only when 'labels' is not set
If True, generated labels will be the range between bin edges,
rather than the bin edges themselves.
label_decimals (int): Applicable only when 'labels' is not set
Specifies the number of decimal places in the label
**kwargs:
Additional keyword arguments specific to the classification scheme, passed to mapclassify.
Expand Down Expand Up @@ -75,7 +92,20 @@ def apply_classification(
classifier = classifier_class(dataframe[input_column_name].to_numpy(), **kwargs)
if labels is None:
labels = classifier.bins

if label_ranges:
# We could do this using mapclassify.get_legend_classes, but this generates a cleaner label
ranges = [f"{dataframe[input_column_name].min():.{label_decimals}f} - {labels[0]:.{label_decimals}f}"]
ranges.extend(
[f"{labels[i]:.{label_decimals}f} - {labels[i + 1]:.{label_decimals}f}" for i in range(len(labels) - 1)]
)
labels = ranges
else:
labels = [round(label, label_decimals) for label in labels]

assert len(labels) == len(classifier.bins)
if label_prefix or label_suffix:
labels = [f"{label_prefix}{label}{label_suffix}" for label in labels]
dataframe[output_column_name] = [labels[i] for i in classifier.yb]
return dataframe

Expand Down
26 changes: 25 additions & 1 deletion tests/test_classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ def sample_df():
(
"std_mean",
{"multiples": [-2, -1, 1, 2]},
[1.4188611699158102, 4.58113883008419, 4.58113883008419, 4.58113883008419, 6.16227766016838],
[1.4, 4.6, 4.6, 4.6, 6.2],
),
("max_breaks", {"k": 4}, [2.5, 2.5, 3.5, 4.5, 5.0]),
("fisher_jenks", {"k": 5}, [1.0, 2.0, 3.0, 4.0, 5.0]),
Expand All @@ -38,6 +38,19 @@ def test_classify_with_labels(sample_df):
assert result["value_classified"].values.tolist() == ["1", "1", "1", "2", "2"]


def test_classify_with_labels_prefix_suffix(sample_df):
result = apply_classification(
sample_df,
input_column_name="value",
labels=["1", "2"],
label_prefix="_",
label_suffix="_",
scheme="equal_interval",
k=2,
)
assert result["value_classified"].values.tolist() == ["_1_", "_1_", "_1_", "_2_", "_2_"]


def test_classify_with_invalid_labels(sample_df):
with pytest.raises(AssertionError):
apply_classification(sample_df, input_column_name="value", labels=[0], scheme="std_mean")
Expand Down Expand Up @@ -104,3 +117,14 @@ def test_apply_colormap_cmap_user_defined_bad(movebank_relocations):

with pytest.raises(AssertionError):
apply_color_map(classified, "speed_bins", cmap)


def test_classify_with_ranges(sample_df):
result = apply_classification(sample_df, input_column_name="value", scheme="equal_interval", label_ranges=True, k=5)
assert result["value_classified"].values.tolist() == [
"1.0 - 1.8",
"1.8 - 2.6",
"2.6 - 3.4",
"3.4 - 4.2",
"4.2 - 5.0",
]
9 changes: 8 additions & 1 deletion tests/test_ecomap.py
Original file line number Diff line number Diff line change
Expand Up @@ -279,13 +279,20 @@ def test_add_polyline_with_color(movebank_relocations):
trajectory = ecoscope.base.Trajectory.from_relocations(movebank_relocations)
# this is effectively a reimplementation of SpeedDataFrame
apply_classification(
trajectory, input_column_name="speed_kmhr", output_column_name="speed_bins", scheme="equal_interval", k=6
trajectory,
input_column_name="speed_kmhr",
output_column_name="speed_bins",
scheme="equal_interval",
label_suffix=" km/h",
label_ranges=True,
k=6,
)
cmap = ["#1a9850", "#91cf60", "#d9ef8b", "#fee08b", "#fc8d59", "#d73027"]
apply_color_map(trajectory, "speed_bins", cmap=cmap, output_column_name="speed_colors")

m = EcoMap()
m.add_layer(m.polyline_layer(trajectory, color_column="speed_colors", get_width=2000))
m.add_legend(labels=trajectory["speed_bins"], colors=trajectory["speed_colors"])

assert len(m.layers) == 2
assert isinstance(m.layers[1], PathLayer)
Expand Down

0 comments on commit 0058c6e

Please sign in to comment.