-
Notifications
You must be signed in to change notification settings - Fork 34
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Feat/swc loaderfile Adding SWC loader to load SWC file #384
Changes from 21 commits
46c5da2
31bc91d
68385fa
703de88
ba883c3
c62cdb5
c0ebcec
5265094
8093370
f389344
debf533
8eb5a9a
e36a532
ac46824
779f19c
8b4bded
e9a6742
e5dbc88
b4d0b69
01914db
8fa1b05
ab9a8eb
bbac284
67f7319
d930f76
046dbda
c98a0f7
bf31f28
6056b2f
b118eea
763ce34
a1f1f76
374e029
3f4e9e4
f2225c7
c39b7e5
6d07eeb
d9ccfd5
877042c
b4df5d1
35b820e
416f0d5
4d934ea
aa291d8
9641853
5093b2a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,292 @@ | ||
import re | ||
|
||
import networkx as nx | ||
|
||
|
||
class SWCNode: | ||
""" | ||
Represents a single node in an SWC (Standardized Morphology Data Format) file. | ||
|
||
The SWC format is a widely used standard for representing neuronal morphology data. | ||
It consists of a series of lines, each representing a single node or sample point | ||
along the neuronal structure. For more information on the SWC format, see: | ||
https://swc-specification.readthedocs.io/en/latest/swc.html | ||
|
||
Attributes: | ||
UNDEFINED (int): ID representing an undefined node type. | ||
SOMA (int): ID representing a soma node. | ||
AXON (int): ID representing an axon node. | ||
BASAL_DENDRITE (int): ID representing a basal dendrite node. | ||
APICAL_DENDRITE (int): ID representing an apical dendrite node. | ||
CUSTOM (int): ID representing a custom node type. | ||
UNSPECIFIED_NEURITE (int): ID representing an unspecified neurite node. | ||
GLIA_PROCESSES (int): ID representing a glia process node. | ||
TYPE_NAMES (dict): A mapping of node type IDs to their string representations. | ||
""" | ||
|
||
UNDEFINED = 0 | ||
SOMA = 1 | ||
AXON = 2 | ||
BASAL_DENDRITE = 3 | ||
APICAL_DENDRITE = 4 | ||
CUSTOM = 5 | ||
UNSPECIFIED_NEURITE = 6 | ||
GLIA_PROCESSES = 7 | ||
|
||
TYPE_NAMES = { | ||
UNDEFINED: "Undefined", | ||
SOMA: "Soma", | ||
AXON: "Axon", | ||
BASAL_DENDRITE: "Basal Dendrite", | ||
APICAL_DENDRITE: "Apical Dendrite", | ||
CUSTOM: "Custom", | ||
UNSPECIFIED_NEURITE: "Unspecified Neurite", | ||
GLIA_PROCESSES: "Glia Processes", | ||
} | ||
|
||
def __init__(self, node_id, type_id, x, y, z, radius, parent_id): | ||
try: | ||
self.id = int(node_id) | ||
self.type = int(type_id) | ||
self.x = float(x) | ||
self.y = float(y) | ||
self.z = float(z) | ||
self.radius = float(radius) | ||
self.parent_id = int(parent_id) | ||
self.children = [] | ||
self.fraction_along = 0.0 | ||
except (ValueError, TypeError) as e: | ||
raise ValueError(f"Invalid data types in SWC line: {e}") | ||
|
||
def __repr__(self): | ||
type_name = self.TYPE_NAMES.get(self.type, f"Custom_{self.type}") | ||
return f"SWCNode(id={self.id}, type={type_name}, x={self.x:.2f}, y={self.y:.2f}, z={self.z:.2f}, radius={self.radius:.2f}, parent_id={self.parent_id})" | ||
|
||
|
||
class SWCGraph: | ||
AdityaPandeyCN marked this conversation as resolved.
Show resolved
Hide resolved
|
||
HEADER_FIELDS = [ | ||
"ORIGINAL_SOURCE", | ||
"CREATURE", | ||
"REGION", | ||
"FIELD/LAYER", | ||
"TYPE", | ||
"CONTRIBUTOR", | ||
"REFERENCE", | ||
"RAW", | ||
"EXTRAS", | ||
"SOMA_AREA", | ||
"SHRINKAGE_CORRECTION", | ||
"VERSION_NUMBER", | ||
"VERSION_DATE", | ||
"SCALE", | ||
] | ||
|
||
def __init__(self): | ||
self.nodes = {} | ||
self.root = None # New attribute to store the root node | ||
self.metadata = {} | ||
|
||
def add_node(self, node): | ||
""" | ||
Add a node to the SWC graph. | ||
|
||
Args: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. needs to be modified to our docstring format |
||
node (SWCNode): The node to be added. | ||
|
||
Raises: | ||
ValueError: If a node with the same ID already exists in the graph. | ||
""" | ||
if node.id in self.nodes: | ||
raise ValueError(f"Duplicate node ID: {node.id}") | ||
self.nodes[node.id] = node | ||
if node.parent_id == -1: | ||
if self.root is not None: | ||
raise ValueError("Multiple root nodes found") | ||
self.root = node | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There should only be one root node, so should we check here if one already exists? The I'd also add logging bits to all of these operations like this:
It'll make debugging easier later when we start working with large SWC files and running into errors. See the other pyneuroml files on how to set up logging. Here's the general python documentation: |
||
elif node.parent_id in self.nodes: | ||
parent = self.nodes[node.parent_id] | ||
parent.children.append(node.id) | ||
else: | ||
print(f"Warning: Parent {node.parent_id} not found for node {node.id}") | ||
|
||
def add_metadata(self, key, value): | ||
""" | ||
Add metadata to the SWC graph. | ||
|
||
Args: | ||
key (str): The key for the metadata. | ||
value (str): The value for the metadata. | ||
|
||
Note: | ||
Only valid header fields (as defined in HEADER_FIELDS) are added as metadata. | ||
""" | ||
if key in self.HEADER_FIELDS: | ||
self.metadata[key] = value | ||
|
||
AdityaPandeyCN marked this conversation as resolved.
Show resolved
Hide resolved
|
||
def get_segment_adjacency_list(self): | ||
""" | ||
Get the adjacency list of all segments (nodes) in the SWC graph. | ||
|
||
Returns a dict where each key is a parent node, and the value is the | ||
list of its child nodes. | ||
|
||
Nodes without children (leaf nodes) are not included as parents in the | ||
adjacency list. | ||
|
||
This method also stores the computed adjacency list in | ||
`self.adjacency_list` for future use by other methods. | ||
|
||
`self.adjacency_list` is populated each time this method is run, to | ||
ensure that users can regenerate it after making modifications to the | ||
SWC graph. If the graph has not changed, one only needs to | ||
populate it once and then re-use it as required. | ||
|
||
Returns: | ||
dict: A dictionary containing the adjacency list representation of the SWC graph. | ||
Keys are parent node objects, and values are lists of child node objects. | ||
""" | ||
child_lists = {} | ||
for node in self.nodes.values(): | ||
if node.parent_id != -1: | ||
if node.parent_id not in child_lists: | ||
child_lists[node.parent_id] = [] | ||
child_lists[node.parent_id].append(node) | ||
|
||
self.adjacency_list = child_lists | ||
return child_lists | ||
|
||
def get_graph(self): | ||
""" | ||
Get a networkx DiGraph representation of the SWC graph. | ||
|
||
The graph represents the neuronal morphology, where nodes correspond to | ||
points in the SWC data, and edges represent connections between parent | ||
and child nodes. The weight of each edge is the distance from the | ||
proximal point of the parent segment to the point where the child segment | ||
connects. | ||
|
||
This method uses the adjacency list computed by `get_segment_adjacency_list` | ||
to construct the graph. | ||
|
||
This method also stores the computed graph in the `self.graph` attribute | ||
for future use. | ||
|
||
For more information on networkx routines and operations on graphs, see: | ||
https://networkx.org/documentation/stable/reference/ | ||
|
||
Returns: | ||
networkx.DiGraph: A directed graph representing the SWC data. | ||
""" | ||
graph = nx.DiGraph() | ||
|
||
# Populate the graph with nodes | ||
for node in self.nodes.values(): | ||
graph.add_node( | ||
node.id, | ||
type=node.type, | ||
x=node.x, | ||
y=node.y, | ||
z=node.z, | ||
radius=node.radius, | ||
) | ||
|
||
# Populate the graph with edges using the adjacency list | ||
adjacency_list = self.get_segment_adjacency_list() | ||
for parent_id, children in adjacency_list.items(): | ||
parent_length = self.get_segment_length(parent_id) | ||
|
||
for child in children: | ||
fraction_along = child.fraction_along | ||
distance_to_proximal = parent_length * fraction_along | ||
graph.add_edge(parent_id, child.id, weight=distance_to_proximal) | ||
|
||
self.graph = graph | ||
return graph | ||
|
||
def get_parent(self, node_id): | ||
"""Get the parent of a given node.""" | ||
node = self.nodes.get(node_id) | ||
if node is None: | ||
raise ValueError(f"Node {node_id} not found") | ||
return self.nodes.get(node.parent_id) if node.parent_id != -1 else None | ||
|
||
def get_descendants(self, node_id): | ||
"""Get all descendants of a given node.""" | ||
node = self.nodes.get(node_id) | ||
if node is None: | ||
raise ValueError(f"Node {node_id} not found") | ||
|
||
descendants = [] | ||
queue = node.children.copy() | ||
while queue: | ||
child_id = queue.pop(0) | ||
child = self.nodes[child_id] | ||
descendants.append(child) | ||
queue.extend(child.children) | ||
return descendants | ||
|
||
def get_nodes_with_multiple_children(self, type_id=None): | ||
"""Get all nodes that have more than one child, optionally filtering by type.""" | ||
nodes = [] | ||
for node in self.nodes.values(): | ||
if len(node.children) > 1: | ||
if type_id is None or node.type == type_id: | ||
nodes.append(node) | ||
return nodes | ||
|
||
def get_nodes_by_type(self, type_id): | ||
"""Get all nodes of a specific type.""" | ||
return [node for node in self.nodes.values() if node.type == type_id] | ||
|
||
def get_branch_points(self, *types): | ||
"""Get all branch points (nodes with multiple children) of the given types.""" | ||
nodes = [] | ||
for type_id in types: | ||
nodes.extend(self.get_nodes_with_multiple_children(type_id)) | ||
return nodes | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we probably want to return a dict with the key being the type and the value being the list of nodes of that type? The use case here is "give me branch points in axons and dendrites" and we'll probably want to treat them differently. If not, the user can always iterate over all the values. |
||
|
||
def has_soma_node(self): | ||
""" | ||
Check if the graph contains at least one soma node (type 1). | ||
""" | ||
return any(node.type == SWCNode.SOMA for node in self.nodes.values()) | ||
|
||
|
||
def parse_header(line): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. don't forget the type hints and doc string :) |
||
for field in SWCGraph.HEADER_FIELDS: | ||
match = re.match(rf"{field}\s+(.+)", line, re.IGNORECASE) | ||
if match: | ||
return field, match.group(1).strip() | ||
return None, None | ||
|
||
|
||
def load_swc(filename): | ||
tree = SWCGraph() | ||
try: | ||
with open(filename, "r") as file: | ||
for line in file: | ||
line = line.strip() | ||
if not line: | ||
continue | ||
if line.startswith("#"): | ||
key, value = parse_header(line[1:].strip()) | ||
if key: | ||
tree.add_metadata(key, value) | ||
continue | ||
|
||
parts = line.split() | ||
if len(parts) != 7: | ||
print(f"Warning: Skipping invalid line: {line}") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'd make this a logger.error or logger.warning perhaps, and we need to decide if this should stop the parsing of the file. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this still needs to be updated @AdityaBITMESRA |
||
continue | ||
|
||
node_id, type_id, x, y, z, radius, parent_id = parts | ||
try: | ||
node = SWCNode(node_id, type_id, x, y, z, radius, parent_id) | ||
tree.add_node(node) | ||
except ValueError as e: | ||
print(f"Warning: {e} in line: {line}") | ||
|
||
except (FileNotFoundError, IOError) as e: | ||
print(f"Error reading file {filename}: {e}") | ||
|
||
return tree |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,99 @@ | ||
import unittest | ||
|
||
import networkx as nx | ||
|
||
from pyneuroml.swc.LoadSWC import SWCGraph, SWCNode | ||
|
||
|
||
class TestSWCNode(unittest.TestCase): | ||
def test_init(self): | ||
node = SWCNode(1, 1, 0.0, 0.0, 0.0, 1.0, -1) | ||
self.assertEqual(node.id, 1) | ||
self.assertEqual(node.type, 1) | ||
self.assertEqual(node.x, 0.0) | ||
self.assertEqual(node.y, 0.0) | ||
self.assertEqual(node.z, 0.0) | ||
self.assertEqual(node.radius, 1.0) | ||
self.assertEqual(node.parent_id, -1) | ||
|
||
def test_invalid_init(self): | ||
with self.assertRaises(ValueError): | ||
SWCNode("a", 1, 0.0, 0.0, 0.0, 1.0, -1) | ||
|
||
|
||
class TestSWCGraph(unittest.TestCase): | ||
def setUp(self): | ||
self.tree = SWCGraph() | ||
self.node1 = SWCNode(1, 1, 0.0, 0.0, 0.0, 1.0, -1) | ||
self.node2 = SWCNode(2, 3, 1.0, 0.0, 0.0, 0.5, 1) | ||
self.node2.fraction_along = 1.0 | ||
self.node3 = SWCNode(3, 3, 2.0, 0.0, 0.0, 0.5, 2) | ||
self.node3.fraction_along = 0.5 | ||
self.tree.add_node(self.node1) | ||
self.tree.add_node(self.node2) | ||
self.tree.add_node(self.node3) | ||
|
||
def test_duplicate_node(self): | ||
with self.assertRaises(ValueError): | ||
self.tree.add_node(SWCNode(1, 1, 0.0, 0.0, 0.0, 1.0, -1)) | ||
|
||
def test_add_metadata(self): | ||
self.tree.add_metadata("ORIGINAL_SOURCE", "file.swc") | ||
self.assertEqual(self.tree.metadata["ORIGINAL_SOURCE"], "file.swc") | ||
|
||
def test_invalid_metadata(self): | ||
self.tree.add_metadata("INVALID_FIELD", "value") | ||
self.assertEqual(self.tree.metadata, {}) | ||
|
||
def test_get_segment_adjacency_list(self): | ||
adjacency_list = self.tree.get_segment_adjacency_list() | ||
self.assertEqual(adjacency_list, {1: [self.node2], 2: [self.node3]}) | ||
|
||
def test_get_graph(self): | ||
graph = self.tree.get_graph() | ||
self.assertIsInstance(graph, nx.DiGraph) | ||
self.assertEqual(len(graph.nodes), 3) | ||
self.assertEqual(len(graph.edges), 2) | ||
self.assertEqual(graph.nodes[self.node1.id]["type"], self.node1.type) | ||
self.assertEqual(graph.nodes[self.node2.id]["x"], self.node2.x) | ||
self.assertEqual(graph.nodes[self.node3.id]["radius"], self.node3.radius) | ||
self.assertEqual(graph.edges[(self.node1.id, self.node2.id)]["weight"], 0.0) | ||
self.assertEqual(graph.edges[(self.node2.id, self.node3.id)]["weight"], 0.5) | ||
|
||
def test_get_parent(self): | ||
self.assertIsNone(self.tree.get_parent(self.node1.id)) | ||
self.assertEqual(self.tree.get_parent(self.node2.id), self.node1) | ||
self.assertEqual(self.tree.get_parent(self.node3.id), self.node2) | ||
with self.assertRaises(ValueError): | ||
self.tree.get_parent(4) | ||
|
||
def test_get_descendants(self): | ||
self.assertEqual( | ||
self.tree.get_descendants(self.node1.id), [self.node2, self.node3] | ||
) | ||
self.assertEqual(self.tree.get_descendants(self.node2.id), [self.node3]) | ||
self.assertEqual(self.tree.get_descendants(self.node3.id), []) | ||
with self.assertRaises(ValueError): | ||
self.tree.get_descendants(4) | ||
|
||
def test_get_nodes_with_multiple_children(self): | ||
self.assertEqual(self.tree.get_nodes_with_multiple_children(), []) | ||
self.assertEqual(self.tree.get_nodes_with_multiple_children(SWCNode.SOMA), []) | ||
self.assertEqual( | ||
self.tree.get_nodes_with_multiple_children(SWCNode.BASAL_DENDRITE), [] | ||
) | ||
|
||
def test_get_nodes_by_type(self): | ||
self.assertEqual(len(self.tree.get_nodes_by_type(SWCNode.SOMA)), 1) | ||
self.assertEqual(len(self.tree.get_nodes_by_type(SWCNode.BASAL_DENDRITE)), 2) | ||
self.assertEqual(len(self.tree.get_nodes_by_type(SWCNode.AXON)), 0) | ||
|
||
def test_get_branch_points(self): | ||
self.assertEqual(self.tree.get_branch_points(SWCNode.SOMA), []) | ||
self.assertEqual(self.tree.get_branch_points(SWCNode.BASAL_DENDRITE), []) | ||
self.assertEqual( | ||
self.tree.get_branch_points(SWCNode.SOMA, SWCNode.BASAL_DENDRITE), [] | ||
) | ||
|
||
def test_has_soma_node(self): | ||
self.assertTrue(self.tree.has_soma_node()) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
These aren't showing up in the docs because they're not in the format we're using. You'll have to use
and so on. See the other code bits. The sphinx documentation is here:
https://www.sphinx-doc.org/en/master/usage/domains/python.html#info-field-lists
This is the "default" format, which is what we use.