Skip to content

Commit

Permalink
new feature label_largest
Browse files Browse the repository at this point in the history
  • Loading branch information
wspr committed Jan 16, 2025
1 parent d839854 commit 4934e18
Show file tree
Hide file tree
Showing 4 changed files with 32 additions and 3 deletions.
18 changes: 16 additions & 2 deletions ausankey/ausankey.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,9 @@ class Sankey:
Finally, a separate string can be provided for each flow.
label_largest: bool
Only the label the largest valued of all nodes for each label.
label_duplicate : bool
When set False, will only print a middle label if that label didn't
appear in the previous stage. This minimises chart clutter but might
Expand Down Expand Up @@ -355,6 +358,7 @@ def __init__(
label_loc=("left", "none", "right"),
label_font=None,
label_duplicate=None,
label_largest=None,
label_values=None,
label_thresh=0,
label_thresh_ofsum=0,
Expand Down Expand Up @@ -416,6 +420,7 @@ def __init__(
self.label_thresh_ofsum = label_thresh_ofsum
self.label_thresh_ofmax = label_thresh_ofmax
self.label_duplicate = True if label_duplicate is None else label_duplicate
self.label_largest = False if label_largest is None else label_largest
self.label_values = False if label_values is None else label_values
self.node_lw = node_lw
self.node_width = node_width
Expand Down Expand Up @@ -581,6 +586,7 @@ def weight_labels(self):
self.weight_sum = np.empty(self.num_stages)

self.node_indiv_heights = {}
self.nodes_largest = {}

for ii in range(self.num_stages):
self.nodes_uniq[ii] = pd.Series(self.data[2 * ii]).dropna().unique()
Expand Down Expand Up @@ -616,6 +622,7 @@ def weight_labels(self):
self.node_indiv_heights[ii][0][lbl] = weight_cont + weight_only + weight_stop
self.node_indiv_heights[ii - 1][1][lbl] = weight_cont + weight_only + weight_strt
self.node_sizes[ii][lbl] = weight_cont + weight_only + max(weight_stop, weight_strt)
self.nodes_largest[lbl] = self.node_sizes[ii][lbl] if self.node_sizes[ii][lbl] > self.nodes_largest.get(lbl,0) else self.nodes_largest.get(lbl,0)

self.weight_sum[ii] = pd.Series(self.node_sizes[ii].values()).sum()

Expand Down Expand Up @@ -724,11 +731,18 @@ def subplot(self, ii):

for label in self.node_sizes[ii + lr]:
val = self.node_sizes[ii + lr][label]
if val is not None and (
if (val is None) or (val == 0):
continue

check_not_largest = self.label_largest and (
val < self.nodes_largest[label]
)
check_less_thresh = (
val < self.label_thresh
or val < self.label_thresh_ofsum * self.weight_sum[ii + lr]
or val < self.label_thresh_ofmax * self.plot_height_nom
):
)
if check_less_thresh or check_not_largest:
continue

if loc in ("left", "both"):
Expand Down
4 changes: 4 additions & 0 deletions docs/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
# Changelog for ausankey

## 2025-01-16 v1.7

* Add parameter `label_largest`.

## 2025-01-11 v1.6

* Remove redundant `_val` suffix in various `...thresh_val` parameters.
Expand Down
11 changes: 11 additions & 0 deletions docs/interface.md
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,17 @@ sky.sankey(
```
![Image with options](iface_frame3_labels_dup.png)

Sometimes you only need to label once. If the values fluctuate significantly one approach to successfully labelling can be to only label the largest valued node across all stages:

```
sky.sankey(
data,
label_largest = True,
)
```

The position of the label will still be inferred from the `label_loc` setting.

By default the node label only includes the textual string.
To include the numerical value as well, set `label_values` to true:

Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ build-backend = "hatchling.build"

[project]
name = "ausankey"
version = "1.6"
version = "1.7"

description = "Sankey diagrams simply"
readme = "README.md"
Expand Down

0 comments on commit 4934e18

Please sign in to comment.