From e3ea365725dc62e205bd67c2832d4c9f91e2f55f Mon Sep 17 00:00:00 2001
From: Ant White <antwhite@users.noreply.github.com>
Date: Thu, 31 Oct 2024 20:52:00 +0000
Subject: [PATCH] core: use friendlier names for duplicated nodes in mermaid
 output (#27747)

Thank you for contributing to LangChain!

- [x] **PR title**: "core: use friendlier names for duplicated nodes in
mermaid output"

- **Description:** When generating the Mermaid visualization of a chain,
if the chain had multiple nodes of the same type, the reid function
would replace their names with the UUID node_id. This made the generated
graph difficult to understand. This change deduplicates the nodes in a
chain by appending an index to their names.
- **Issue:** None
- **Discussion:**
https://github.com/langchain-ai/langchain/discussions/27714
- **Dependencies:** None

- [ ] **Add tests and docs**:
- Currently this functionality is not covered by unit tests, happy to
add tests if you'd like


- [x] **Lint and test**: Run `make format`, `make lint` and `make test`
from the root of the package(s) you've modified. See contribution
guidelines for more: https://python.langchain.com/docs/contributing/

Additional guidelines:
- Make sure optional dependencies are imported within a function.
- Please do not add dependencies to pyproject.toml files (even optional
ones) unless they are required for unit tests.
- Most PRs should not touch more than one package.
- Changes should be backwards compatible.
- If you are adding something to community, do not re-import it in
langchain.

If no one reviews your PR within a few days, please @-mention one of
baskaryan, efriis, eyurtsev, ccurme, vbarda, hwchase17.

# Example Code:
```python
from langchain_core.runnables import RunnablePassthrough

def fake_llm(prompt: str) -> str: # Fake LLM for the example
    return "completion"

runnable = {
    'llm1':  fake_llm,
    'llm2':  fake_llm,
} | RunnablePassthrough.assign(
    total_chars=lambda inputs: len(inputs['llm1'] + inputs['llm2'])
)

print(runnable.get_graph().draw_mermaid(with_styles=False))
```

# Before
```mermaid
graph TD;
	Parallel_llm1_llm2_Input --> 0b01139db5ed4587ad37964e3a40c0ec;
	0b01139db5ed4587ad37964e3a40c0ec --> Parallel_llm1_llm2_Output;
	Parallel_llm1_llm2_Input --> a98d4b56bd294156a651230b9293347f;
	a98d4b56bd294156a651230b9293347f --> Parallel_llm1_llm2_Output;
	Parallel_total_chars_Input --> Lambda;
	Lambda --> Parallel_total_chars_Output;
	Parallel_total_chars_Input --> Passthrough;
	Passthrough --> Parallel_total_chars_Output;
	Parallel_llm1_llm2_Output --> Parallel_total_chars_Input;
```

# After
```mermaid
graph TD;
	Parallel_llm1_llm2_Input --> fake_llm_1;
	fake_llm_1 --> Parallel_llm1_llm2_Output;
	Parallel_llm1_llm2_Input --> fake_llm_2;
	fake_llm_2 --> Parallel_llm1_llm2_Output;
	Parallel_total_chars_Input --> Lambda;
	Lambda --> Parallel_total_chars_Output;
	Parallel_total_chars_Input --> Passthrough;
	Passthrough --> Parallel_total_chars_Output;
	Parallel_llm1_llm2_Output --> Parallel_total_chars_Input;
```
---
 libs/core/langchain_core/runnables/graph.py     | 17 ++++++++++++-----
 .../runnables/__snapshots__/test_graph.ambr     | 14 ++++++++++++++
 .../tests/unit_tests/runnables/test_graph.py    | 14 ++++++++++++++
 3 files changed, 40 insertions(+), 5 deletions(-)

diff --git a/libs/core/langchain_core/runnables/graph.py b/libs/core/langchain_core/runnables/graph.py
index fd2c8adb98b23..2cb57c4e0fec5 100644
--- a/libs/core/langchain_core/runnables/graph.py
+++ b/libs/core/langchain_core/runnables/graph.py
@@ -1,7 +1,7 @@
 from __future__ import annotations
 
 import inspect
-from collections import Counter
+from collections import defaultdict
 from collections.abc import Sequence
 from dataclasses import dataclass, field
 from enum import Enum
@@ -423,12 +423,19 @@ def prefixed(id: str) -> str:
     def reid(self) -> Graph:
         """Return a new graph with all nodes re-identified,
         using their unique, readable names where possible."""
-        node_labels = {node.id: node.name for node in self.nodes.values()}
-        node_label_counts = Counter(node_labels.values())
+        node_name_to_ids = defaultdict(list)
+        for node in self.nodes.values():
+            node_name_to_ids[node.name].append(node.id)
+
+        unique_labels = {
+            node_id: node_name if len(node_ids) == 1 else f"{node_name}_{i + 1}"
+            for node_name, node_ids in node_name_to_ids.items()
+            for i, node_id in enumerate(node_ids)
+        }
 
         def _get_node_id(node_id: str) -> str:
-            label = node_labels[node_id]
-            if is_uuid(node_id) and node_label_counts[label] == 1:
+            label = unique_labels[node_id]
+            if is_uuid(node_id):
                 return label
             else:
                 return node_id
diff --git a/libs/core/tests/unit_tests/runnables/__snapshots__/test_graph.ambr b/libs/core/tests/unit_tests/runnables/__snapshots__/test_graph.ambr
index ba9d742b37407..208bdbac45e7f 100644
--- a/libs/core/tests/unit_tests/runnables/__snapshots__/test_graph.ambr
+++ b/libs/core/tests/unit_tests/runnables/__snapshots__/test_graph.ambr
@@ -26,6 +26,20 @@
   
   '''
 # ---
+# name: test_graph_mermaid_duplicate_nodes[mermaid]
+  '''
+  graph TD;
+  	PromptInput --> PromptTemplate_1;
+  	Parallel_llm1_llm2_Input --> FakeListLLM_1;
+  	FakeListLLM_1 --> Parallel_llm1_llm2_Output;
+  	Parallel_llm1_llm2_Input --> FakeListLLM_2;
+  	FakeListLLM_2 --> Parallel_llm1_llm2_Output;
+  	PromptTemplate_1 --> Parallel_llm1_llm2_Input;
+  	PromptTemplate_2 --> PromptTemplateOutput;
+  	Parallel_llm1_llm2_Output --> PromptTemplate_2;
+  
+  '''
+# ---
 # name: test_graph_sequence[ascii]
   '''
               +-------------+              
diff --git a/libs/core/tests/unit_tests/runnables/test_graph.py b/libs/core/tests/unit_tests/runnables/test_graph.py
index 8898b64c01fae..39f2f2871a800 100644
--- a/libs/core/tests/unit_tests/runnables/test_graph.py
+++ b/libs/core/tests/unit_tests/runnables/test_graph.py
@@ -405,3 +405,17 @@ def test_graph_mermaid_escape_node_label() -> None:
     assert _escape_node_label("foo-bar") == "foo-bar"
     assert _escape_node_label("foo_1") == "foo_1"
     assert _escape_node_label("#foo*&!") == "_foo___"
+
+
+def test_graph_mermaid_duplicate_nodes(snapshot: SnapshotAssertion) -> None:
+    fake_llm = FakeListLLM(responses=["foo", "bar"])
+    sequence: Runnable = (
+        PromptTemplate.from_template("Hello, {input}")
+        | {
+            "llm1": fake_llm,
+            "llm2": fake_llm,
+        }
+        | PromptTemplate.from_template("{llm1} {llm2}")
+    )
+    graph = sequence.get_graph()
+    assert graph.draw_mermaid(with_styles=False) == snapshot(name="mermaid")