-
Notifications
You must be signed in to change notification settings - Fork 34
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Feat/swc loaderfile Adding SWC loader to load SWC file #384
Changes from 38 commits
46c5da2
31bc91d
68385fa
703de88
ba883c3
c62cdb5
c0ebcec
5265094
8093370
f389344
debf533
8eb5a9a
e36a532
ac46824
779f19c
8b4bded
e9a6742
e5dbc88
b4d0b69
01914db
8fa1b05
ab9a8eb
bbac284
67f7319
d930f76
046dbda
c98a0f7
bf31f28
6056b2f
b118eea
763ce34
a1f1f76
374e029
3f4e9e4
f2225c7
c39b7e5
6d07eeb
d9ccfd5
877042c
b4df5d1
35b820e
416f0d5
4d934ea
aa291d8
9641853
5093b2a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,266 @@ | ||
import re | ||
|
||
|
||
class SWCNode: | ||
""" | ||
Represents a single node in an SWC (Standardized Morphology Data Format) file. | ||
|
||
The SWC format is a widely used standard for representing neuronal morphology data. | ||
It consists of a series of lines, each representing a single node or sample point | ||
along the neuronal structure. For more information on the SWC format, see: | ||
https://swc-specification.readthedocs.io/en/latest/swc.html | ||
|
||
Attributes: | ||
UNDEFINED (int): ID representing an undefined node type. | ||
SOMA (int): ID representing a soma node. | ||
AXON (int): ID representing an axon node. | ||
BASAL_DENDRITE (int): ID representing a basal dendrite node. | ||
APICAL_DENDRITE (int): ID representing an apical dendrite node. | ||
CUSTOM (int): ID representing a custom node type. | ||
UNSPECIFIED_NEURITE (int): ID representing an unspecified neurite node. | ||
GLIA_PROCESSES (int): ID representing a glia process node. | ||
TYPE_NAMES (dict): A mapping of node type IDs to their string representations. | ||
""" | ||
|
||
UNDEFINED = 0 | ||
SOMA = 1 | ||
AXON = 2 | ||
BASAL_DENDRITE = 3 | ||
APICAL_DENDRITE = 4 | ||
CUSTOM = 5 | ||
UNSPECIFIED_NEURITE = 6 | ||
GLIA_PROCESSES = 7 | ||
|
||
TYPE_NAMES = { | ||
UNDEFINED: "Undefined", | ||
SOMA: "Soma", | ||
AXON: "Axon", | ||
BASAL_DENDRITE: "Basal Dendrite", | ||
APICAL_DENDRITE: "Apical Dendrite", | ||
CUSTOM: "Custom", | ||
UNSPECIFIED_NEURITE: "Unspecified Neurite", | ||
GLIA_PROCESSES: "Glia Processes", | ||
} | ||
|
||
def __init__(self, node_id, type_id, x, y, z, radius, parent_id): | ||
try: | ||
self.id = int(node_id) | ||
self.type = int(type_id) | ||
self.x = float(x) | ||
self.y = float(y) | ||
self.z = float(z) | ||
self.radius = float(radius) | ||
self.parent_id = int(parent_id) | ||
self.children = [] | ||
except (ValueError, TypeError) as e: | ||
raise ValueError(f"Invalid data types in SWC line: {e}") | ||
|
||
def to_string(self): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We should add a type hint for return val: There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Just wondering: what's the difference between adding a new https://docs.python.org/3/reference/datamodel.html#object.__str__ Currently, for example, one gets this:
which isn't really informative |
||
""" | ||
Returns a human-readable string representation of the node. | ||
""" | ||
type_name = self.TYPE_NAMES.get(self.type, f"Custom_{self.type}") | ||
return f"Node ID: {self.id}, Type: {type_name}, Coordinates: ({self.x:.2f}, {self.y:.2f}, {self.z:.2f}), Radius: {self.radius:.2f}, Parent ID: {self.parent_id}" | ||
|
||
|
||
class SWCGraph: | ||
AdityaPandeyCN marked this conversation as resolved.
Show resolved
Hide resolved
|
||
HEADER_FIELDS = [ | ||
"ORIGINAL_SOURCE", | ||
"CREATURE", | ||
"REGION", | ||
"FIELD/LAYER", | ||
"TYPE", | ||
"CONTRIBUTOR", | ||
"REFERENCE", | ||
"RAW", | ||
"EXTRAS", | ||
"SOMA_AREA", | ||
"SHRINKAGE_CORRECTION", | ||
"VERSION_NUMBER", | ||
"VERSION_DATE", | ||
"SCALE", | ||
] | ||
|
||
def __init__(self): | ||
self.nodes = [] | ||
self.root = None | ||
self.metadata = {} | ||
|
||
def add_node(self, node): | ||
""" | ||
Add a node to the SWC graph. | ||
|
||
Args: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. needs to be modified to our docstring format |
||
node (SWCNode): The node to be added. | ||
|
||
Raises: | ||
ValueError: If a node with the same ID already exists in the graph. | ||
""" | ||
if any(existing_node.id == node.id for existing_node in self.nodes): | ||
raise ValueError(f"Duplicate node ID: {node.id}") | ||
|
||
self.nodes.append(node) | ||
|
||
if node.parent_id != -1: | ||
parent = next((n for n in self.nodes if n.id == node.parent_id), None) | ||
if parent: | ||
parent.children.append(node) | ||
else: | ||
self.root = node | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There should only be one root node, so should we check here if one already exists? The I'd also add logging bits to all of these operations like this:
It'll make debugging easier later when we start working with large SWC files and running into errors. See the other pyneuroml files on how to set up logging. Here's the general python documentation: |
||
|
||
def add_metadata(self, key, value): | ||
""" | ||
Add metadata to the SWC graph. | ||
|
||
Args: | ||
key (str): The key for the metadata. | ||
value (str): The value for the metadata. | ||
|
||
Note: | ||
Only valid header fields (as defined in HEADER_FIELDS) are added as metadata. | ||
""" | ||
if key in self.HEADER_FIELDS: | ||
self.metadata[key] = value | ||
|
||
AdityaPandeyCN marked this conversation as resolved.
Show resolved
Hide resolved
|
||
def get_parent(self, node_id): | ||
""" | ||
Get the parent node of a given node in the SWC tree. | ||
|
||
Args: | ||
node_id (int): The ID of the node for which to retrieve the parent. | ||
|
||
Returns: | ||
Node or None: The parent Node object if the node has a parent, otherwise None. | ||
|
||
Raises: | ||
ValueError: If the specified node_id is not found in the SWC tree. | ||
|
||
""" | ||
node = next((n for n in self.nodes if n.id == node_id), None) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is it worth putting these few lines on getting a node from the graph into a different |
||
if node is None: | ||
raise ValueError(f"Node {node_id} not found") | ||
parent_id = node.parent_id | ||
if parent_id == -1: | ||
return None | ||
else: | ||
parent_node = next((n for n in self.nodes if n.id == parent_id), None) | ||
return parent_node | ||
|
||
def get_children(self, node_id): | ||
""" | ||
Get a list of child nodes for a given node. | ||
|
||
Args: | ||
node_id (int): The ID of the node for which to get the children. | ||
|
||
Returns: | ||
list: A list of SWCNode objects representing the children of the given node. | ||
|
||
Raises: | ||
ValueError: If the provided node_id is not found in the graph. | ||
""" | ||
parent_node = next((node for node in self.nodes if node.id == node_id), None) | ||
if parent_node is None: | ||
raise ValueError(f"Node {node_id} not found or has no children") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We're not checking for children here, so this error message isn't correct---am I missing something? |
||
|
||
children = [node for node in self.nodes if node.parent_id == node_id] | ||
parent_node.children = children | ||
return children | ||
|
||
def get_nodes_with_multiple_children(self, type_id=None): | ||
""" | ||
Get a list of child nodes for a given node. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. docstring here needs to be corrected |
||
|
||
Args: | ||
node_id (int): The ID of the node for which to get the children. | ||
|
||
Returns: | ||
list: A list of SWCNode objects representing the children of the given node. | ||
|
||
Raises: | ||
ValueError: If the provided node_id is not found in the graph. | ||
""" | ||
nodes = [] | ||
for node in self.nodes: | ||
children = self.get_children(node.id) | ||
if len(children) > 1 and (type_id is None or node.type == type_id): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. what would happen if an invalid type id is given? |
||
nodes.append(node) | ||
|
||
if type_id is not None: | ||
print(f"Found {len(nodes)} nodes of type {type_id} with multiple children.") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. probably make this a |
||
|
||
return nodes | ||
|
||
def get_nodes_by_type(self, type_id): | ||
""" | ||
Get a list of nodes of a specific type. | ||
|
||
Args: | ||
type_id (int): The type ID of the nodes to retrieve. | ||
|
||
Returns: | ||
list: A list of SWCNode objects that have the specified type ID. | ||
""" | ||
return [node for node in self.nodes if node.type == type_id] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. what happens if an invalid type id is given? |
||
|
||
def get_branch_points(self, *types): | ||
""" | ||
Get all branch points (nodes with multiple children) of the given types. | ||
|
||
Args: | ||
*types (int): One or more node type IDs to filter the branch points by. | ||
If no types are provided, all branch points in the graph will be returned. | ||
|
||
Returns: | ||
list: A list of SWCNode objects that represent branch points (nodes with | ||
multiple children) of the specified types. If no types are provided, | ||
all branch points in the graph are returned. | ||
""" | ||
nodes = [] | ||
if not types: | ||
nodes = self.get_nodes_with_multiple_children() | ||
else: | ||
for type_id in types: | ||
nodes.extend(self.get_nodes_with_multiple_children(type_id)) | ||
|
||
return nodes | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we probably want to return a dict with the key being the type and the value being the list of nodes of that type? The use case here is "give me branch points in axons and dendrites" and we'll probably want to treat them differently. If not, the user can always iterate over all the values. |
||
|
||
|
||
def parse_header(line): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. don't forget the type hints and doc string :) |
||
for field in SWCGraph.HEADER_FIELDS: | ||
match = re.match(rf"{field}\s+(.+)", line, re.IGNORECASE) | ||
if match: | ||
return field, match.group(1).strip() | ||
return None, None | ||
|
||
|
||
def load_swc(filename): | ||
tree = SWCGraph() | ||
try: | ||
with open(filename, "r") as file: | ||
for line in file: | ||
line = line.strip() | ||
if not line: | ||
continue | ||
if line.startswith("#"): | ||
key, value = parse_header(line[1:].strip()) | ||
if key: | ||
tree.add_metadata(key, value) | ||
continue | ||
|
||
parts = line.split() | ||
if len(parts) != 7: | ||
print(f"Warning: Skipping invalid line: {line}") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'd make this a logger.error or logger.warning perhaps, and we need to decide if this should stop the parsing of the file. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this still needs to be updated @AdityaBITMESRA |
||
continue | ||
|
||
node_id, type_id, x, y, z, radius, parent_id = parts | ||
try: | ||
node = SWCNode(node_id, type_id, x, y, z, radius, parent_id) | ||
tree.add_node(node) | ||
except ValueError as e: | ||
print(f"Warning: {e} in line: {line}") | ||
|
||
except (FileNotFoundError, IOError) as e: | ||
print(f"Error reading file {filename}: {e}") | ||
|
||
return tree |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
import unittest | ||
|
||
from pyneuroml.swc.LoadSWC import SWCGraph, SWCNode | ||
|
||
|
||
class TestSWCNode(unittest.TestCase): | ||
def test_init(self): | ||
node = SWCNode(1, 1, 0.0, 0.0, 0.0, 1.0, -1) | ||
self.assertEqual(node.id, 1) | ||
self.assertEqual(node.type, 1) | ||
self.assertEqual(node.x, 0.0) | ||
self.assertEqual(node.y, 0.0) | ||
self.assertEqual(node.z, 0.0) | ||
self.assertEqual(node.radius, 1.0) | ||
self.assertEqual(node.parent_id, -1) | ||
|
||
def test_invalid_init(self): | ||
with self.assertRaises(ValueError): | ||
SWCNode("a", 1, 0.0, 0.0, 0.0, 1.0, -1) | ||
|
||
|
||
class TestSWCGraph(unittest.TestCase): | ||
def setUp(self): | ||
self.tree = SWCGraph() | ||
self.node1 = SWCNode(1, 1, 0.0, 0.0, 0.0, 1.0, -1) | ||
self.node2 = SWCNode(2, 3, 1.0, 0.0, 0.0, 0.5, 1) | ||
self.node3 = SWCNode(3, 3, 2.0, 0.0, 0.0, 0.5, 2) | ||
self.tree.add_node(self.node1) | ||
self.tree.add_node(self.node2) | ||
self.tree.add_node(self.node3) | ||
|
||
def test_duplicate_node(self): | ||
with self.assertRaises(ValueError): | ||
self.tree.add_node(SWCNode(1, 1, 0.0, 0.0, 0.0, 1.0, -1)) | ||
|
||
def test_add_metadata(self): | ||
self.tree.add_metadata("ORIGINAL_SOURCE", "file.swc") | ||
self.assertEqual(self.tree.metadata["ORIGINAL_SOURCE"], "file.swc") | ||
|
||
def test_invalid_metadata(self): | ||
self.tree.add_metadata("INVALID_FIELD", "value") | ||
self.assertEqual(self.tree.metadata, {}) | ||
|
||
def test_get_parent(self): | ||
self.assertIsNone(self.tree.get_parent(self.node1.id)) | ||
self.assertEqual(self.tree.get_parent(self.node2.id), self.node1) | ||
self.assertEqual(self.tree.get_parent(self.node3.id), self.node2) | ||
with self.assertRaises(ValueError): | ||
self.tree.get_parent(4) | ||
|
||
def test_get_children(self): | ||
self.assertEqual(self.tree.get_children(self.node1.id), [self.node2]) | ||
self.assertEqual(self.tree.get_children(self.node2.id), [self.node3]) | ||
with self.assertRaises(ValueError): | ||
self.tree.get_parent(4) | ||
|
||
def test_get_nodes_with_multiple_children(self): | ||
node4 = SWCNode(4, 3, 3.0, 0.0, 0.0, 0.5, 2) | ||
self.tree.add_node(node4) | ||
self.assertEqual(self.tree.get_nodes_with_multiple_children(), [self.node2]) | ||
|
||
def test_get_nodes_by_type(self): | ||
self.assertEqual(self.tree.get_nodes_by_type(1), [self.node1]) | ||
self.assertEqual(self.tree.get_nodes_by_type(3), [self.node2, self.node3]) | ||
|
||
|
||
if __name__ == "__main__": | ||
unittest.main() |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
These aren't showing up in the docs because they're not in the format we're using. You'll have to use
and so on. See the other code bits. The sphinx documentation is here:
https://www.sphinx-doc.org/en/master/usage/domains/python.html#info-field-lists
This is the "default" format, which is what we use.