Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feat/swc loaderfile Adding SWC loader to load SWC file #384

Merged
merged 46 commits into from
Jul 3, 2024
Merged
Show file tree
Hide file tree
Changes from 21 commits
Commits
Show all changes
46 commits
Select commit Hold shift + click to select a range
46c5da2
Merge branch 'development' of https://github.com/NeuroML/pyNeuroML in…
AdityaPandeyCN Jun 3, 2024
31bc91d
Merge branch 'development' of https://github.com/NeuroML/pyNeuroML in…
AdityaPandeyCN Jun 5, 2024
68385fa
added SWCnode class
AdityaPandeyCN Jun 5, 2024
703de88
Include type and parent attribute
AdityaPandeyCN Jun 5, 2024
ba883c3
added SWCgraph class
AdityaPandeyCN Jun 5, 2024
c62cdb5
restructured to SWCtree
AdityaPandeyCN Jun 5, 2024
c0ebcec
extended SWCTree class and added load_swc
AdityaPandeyCN Jun 5, 2024
5265094
removed unused imports
AdityaPandeyCN Jun 5, 2024
8093370
added docstrings
AdityaPandeyCN Jun 7, 2024
f389344
replaced with SWCGraph class
AdityaPandeyCN Jun 9, 2024
debf533
added validation check
AdityaPandeyCN Jun 9, 2024
8eb5a9a
added validation check
AdityaPandeyCN Jun 9, 2024
e36a532
added load_swc function
AdityaPandeyCN Jun 9, 2024
ac46824
added unit test file & TestSWCNode class
AdityaPandeyCN Jun 10, 2024
779f19c
removed unused errors
AdityaPandeyCN Jun 10, 2024
8b4bded
removed unused variable
AdityaPandeyCN Jun 10, 2024
e9a6742
added TestSWCGraph class
AdityaPandeyCN Jun 10, 2024
e5dbc88
added __init__.py file
AdityaPandeyCN Jun 10, 2024
b4d0b69
added test_get_graph function
AdityaPandeyCN Jun 10, 2024
01914db
added test_get_parent
AdityaPandeyCN Jun 10, 2024
8fa1b05
added more tests
AdityaPandeyCN Jun 11, 2024
ab9a8eb
replaced the test file for updated LoadSWC.py func
AdityaPandeyCN Jun 18, 2024
bbac284
updated the LoadSWC.py function
AdityaPandeyCN Jun 18, 2024
67f7319
Merge branch 'development' of https://github.com/NeuroML/pyNeuroML in…
AdityaPandeyCN Jun 18, 2024
d930f76
Merge branch 'development' into feat/SWCLoaderfile
AdityaPandeyCN Jun 18, 2024
046dbda
updated the docstring
AdityaPandeyCN Jun 18, 2024
c98a0f7
updated
AdityaPandeyCN Jun 18, 2024
bf31f28
removed get_descendants
AdityaPandeyCN Jun 18, 2024
6056b2f
removed test_get_descendants
AdityaPandeyCN Jun 18, 2024
b118eea
changed the doctrings
AdityaPandeyCN Jun 19, 2024
763ce34
removed unused imports
AdityaPandeyCN Jun 19, 2024
a1f1f76
changes making the function correct
AdityaPandeyCN Jun 20, 2024
374e029
changes making the function correct
AdityaPandeyCN Jun 20, 2024
3f4e9e4
added more test
AdityaPandeyCN Jun 20, 2024
f2225c7
added get_nodes_by_type test
AdityaPandeyCN Jun 20, 2024
c39b7e5
removed unused function
AdityaPandeyCN Jun 21, 2024
6d07eeb
changes
AdityaPandeyCN Jun 21, 2024
d9ccfd5
changes
AdityaPandeyCN Jun 21, 2024
877042c
added docstrings
AdityaPandeyCN Jun 25, 2024
b4df5d1
added docstrings in parse header function
AdityaPandeyCN Jun 25, 2024
35b820e
added docstrings and refactored the functions
AdityaPandeyCN Jun 25, 2024
416f0d5
changed the docstring format
AdityaPandeyCN Jun 25, 2024
4d934ea
changed the docstring format
AdityaPandeyCN Jun 26, 2024
aa291d8
changed the string represenation function
AdityaPandeyCN Jun 26, 2024
9641853
Merge branch 'development' into feat/SWCLoaderfile
sanjayankur31 Jun 26, 2024
5093b2a
Merge branch 'development' into feat/SWCLoaderfile
sanjayankur31 Jun 26, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
292 changes: 292 additions & 0 deletions pyneuroml/swc/LoadSWC.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,292 @@
import re

import networkx as nx


class SWCNode:
"""
Represents a single node in an SWC (Standardized Morphology Data Format) file.

The SWC format is a widely used standard for representing neuronal morphology data.
It consists of a series of lines, each representing a single node or sample point
along the neuronal structure. For more information on the SWC format, see:
https://swc-specification.readthedocs.io/en/latest/swc.html

Attributes:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These aren't showing up in the docs because they're not in the format we're using. You'll have to use

:param UNDEFINED: ...
:type UNDEFINED: ...

and so on. See the other code bits. The sphinx documentation is here:

https://www.sphinx-doc.org/en/master/usage/domains/python.html#info-field-lists

This is the "default" format, which is what we use.

UNDEFINED (int): ID representing an undefined node type.
SOMA (int): ID representing a soma node.
AXON (int): ID representing an axon node.
BASAL_DENDRITE (int): ID representing a basal dendrite node.
APICAL_DENDRITE (int): ID representing an apical dendrite node.
CUSTOM (int): ID representing a custom node type.
UNSPECIFIED_NEURITE (int): ID representing an unspecified neurite node.
GLIA_PROCESSES (int): ID representing a glia process node.
TYPE_NAMES (dict): A mapping of node type IDs to their string representations.
"""

UNDEFINED = 0
SOMA = 1
AXON = 2
BASAL_DENDRITE = 3
APICAL_DENDRITE = 4
CUSTOM = 5
UNSPECIFIED_NEURITE = 6
GLIA_PROCESSES = 7

TYPE_NAMES = {
UNDEFINED: "Undefined",
SOMA: "Soma",
AXON: "Axon",
BASAL_DENDRITE: "Basal Dendrite",
APICAL_DENDRITE: "Apical Dendrite",
CUSTOM: "Custom",
UNSPECIFIED_NEURITE: "Unspecified Neurite",
GLIA_PROCESSES: "Glia Processes",
}

def __init__(self, node_id, type_id, x, y, z, radius, parent_id):
try:
self.id = int(node_id)
self.type = int(type_id)
self.x = float(x)
self.y = float(y)
self.z = float(z)
self.radius = float(radius)
self.parent_id = int(parent_id)
self.children = []
self.fraction_along = 0.0
except (ValueError, TypeError) as e:
raise ValueError(f"Invalid data types in SWC line: {e}")

def __repr__(self):
type_name = self.TYPE_NAMES.get(self.type, f"Custom_{self.type}")
return f"SWCNode(id={self.id}, type={type_name}, x={self.x:.2f}, y={self.y:.2f}, z={self.z:.2f}, radius={self.radius:.2f}, parent_id={self.parent_id})"


class SWCGraph:
AdityaPandeyCN marked this conversation as resolved.
Show resolved Hide resolved
HEADER_FIELDS = [
"ORIGINAL_SOURCE",
"CREATURE",
"REGION",
"FIELD/LAYER",
"TYPE",
"CONTRIBUTOR",
"REFERENCE",
"RAW",
"EXTRAS",
"SOMA_AREA",
"SHRINKAGE_CORRECTION",
"VERSION_NUMBER",
"VERSION_DATE",
"SCALE",
]

def __init__(self):
self.nodes = {}
self.root = None # New attribute to store the root node
self.metadata = {}

def add_node(self, node):
"""
Add a node to the SWC graph.

Args:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

needs to be modified to our docstring format

node (SWCNode): The node to be added.

Raises:
ValueError: If a node with the same ID already exists in the graph.
"""
if node.id in self.nodes:
raise ValueError(f"Duplicate node ID: {node.id}")
self.nodes[node.id] = node
if node.parent_id == -1:
if self.root is not None:
raise ValueError("Multiple root nodes found")
self.root = node
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There should only be one root node, so should we check here if one already exists?

The self.nodes.append bit should probably happen right at the end of the method after this new check.

I'd also add logging bits to all of these operations like this:

logger.debug(f"New node added: {node}")

It'll make debugging easier later when we start working with large SWC files and running into errors. See the other pyneuroml files on how to set up logging. Here's the general python documentation:

https://docs.python.org/3/howto/logging.html#loggers

elif node.parent_id in self.nodes:
parent = self.nodes[node.parent_id]
parent.children.append(node.id)
else:
print(f"Warning: Parent {node.parent_id} not found for node {node.id}")

def add_metadata(self, key, value):
"""
Add metadata to the SWC graph.

Args:
key (str): The key for the metadata.
value (str): The value for the metadata.

Note:
Only valid header fields (as defined in HEADER_FIELDS) are added as metadata.
"""
if key in self.HEADER_FIELDS:
self.metadata[key] = value

AdityaPandeyCN marked this conversation as resolved.
Show resolved Hide resolved
def get_segment_adjacency_list(self):
"""
Get the adjacency list of all segments (nodes) in the SWC graph.

Returns a dict where each key is a parent node, and the value is the
list of its child nodes.

Nodes without children (leaf nodes) are not included as parents in the
adjacency list.

This method also stores the computed adjacency list in
`self.adjacency_list` for future use by other methods.

`self.adjacency_list` is populated each time this method is run, to
ensure that users can regenerate it after making modifications to the
SWC graph. If the graph has not changed, one only needs to
populate it once and then re-use it as required.

Returns:
dict: A dictionary containing the adjacency list representation of the SWC graph.
Keys are parent node objects, and values are lists of child node objects.
"""
child_lists = {}
for node in self.nodes.values():
if node.parent_id != -1:
if node.parent_id not in child_lists:
child_lists[node.parent_id] = []
child_lists[node.parent_id].append(node)

self.adjacency_list = child_lists
return child_lists

def get_graph(self):
"""
Get a networkx DiGraph representation of the SWC graph.

The graph represents the neuronal morphology, where nodes correspond to
points in the SWC data, and edges represent connections between parent
and child nodes. The weight of each edge is the distance from the
proximal point of the parent segment to the point where the child segment
connects.

This method uses the adjacency list computed by `get_segment_adjacency_list`
to construct the graph.

This method also stores the computed graph in the `self.graph` attribute
for future use.

For more information on networkx routines and operations on graphs, see:
https://networkx.org/documentation/stable/reference/

Returns:
networkx.DiGraph: A directed graph representing the SWC data.
"""
graph = nx.DiGraph()

# Populate the graph with nodes
for node in self.nodes.values():
graph.add_node(
node.id,
type=node.type,
x=node.x,
y=node.y,
z=node.z,
radius=node.radius,
)

# Populate the graph with edges using the adjacency list
adjacency_list = self.get_segment_adjacency_list()
for parent_id, children in adjacency_list.items():
parent_length = self.get_segment_length(parent_id)

for child in children:
fraction_along = child.fraction_along
distance_to_proximal = parent_length * fraction_along
graph.add_edge(parent_id, child.id, weight=distance_to_proximal)

self.graph = graph
return graph

def get_parent(self, node_id):
"""Get the parent of a given node."""
node = self.nodes.get(node_id)
if node is None:
raise ValueError(f"Node {node_id} not found")
return self.nodes.get(node.parent_id) if node.parent_id != -1 else None

def get_descendants(self, node_id):
"""Get all descendants of a given node."""
node = self.nodes.get(node_id)
if node is None:
raise ValueError(f"Node {node_id} not found")

descendants = []
queue = node.children.copy()
while queue:
child_id = queue.pop(0)
child = self.nodes[child_id]
descendants.append(child)
queue.extend(child.children)
return descendants

def get_nodes_with_multiple_children(self, type_id=None):
"""Get all nodes that have more than one child, optionally filtering by type."""
nodes = []
for node in self.nodes.values():
if len(node.children) > 1:
if type_id is None or node.type == type_id:
nodes.append(node)
return nodes

def get_nodes_by_type(self, type_id):
"""Get all nodes of a specific type."""
return [node for node in self.nodes.values() if node.type == type_id]

def get_branch_points(self, *types):
"""Get all branch points (nodes with multiple children) of the given types."""
nodes = []
for type_id in types:
nodes.extend(self.get_nodes_with_multiple_children(type_id))
return nodes
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we probably want to return a dict with the key being the type and the value being the list of nodes of that type?

The use case here is "give me branch points in axons and dendrites" and we'll probably want to treat them differently. If not, the user can always iterate over all the values.


def has_soma_node(self):
"""
Check if the graph contains at least one soma node (type 1).
"""
return any(node.type == SWCNode.SOMA for node in self.nodes.values())


def parse_header(line):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

don't forget the type hints and doc string :)

for field in SWCGraph.HEADER_FIELDS:
match = re.match(rf"{field}\s+(.+)", line, re.IGNORECASE)
if match:
return field, match.group(1).strip()
return None, None


def load_swc(filename):
tree = SWCGraph()
try:
with open(filename, "r") as file:
for line in file:
line = line.strip()
if not line:
continue
if line.startswith("#"):
key, value = parse_header(line[1:].strip())
if key:
tree.add_metadata(key, value)
continue

parts = line.split()
if len(parts) != 7:
print(f"Warning: Skipping invalid line: {line}")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd make this a logger.error or logger.warning perhaps, and we need to decide if this should stop the parsing of the file.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this still needs to be updated @AdityaBITMESRA

continue

node_id, type_id, x, y, z, radius, parent_id = parts
try:
node = SWCNode(node_id, type_id, x, y, z, radius, parent_id)
tree.add_node(node)
except ValueError as e:
print(f"Warning: {e} in line: {line}")

except (FileNotFoundError, IOError) as e:
print(f"Error reading file {filename}: {e}")

return tree
Empty file added tests/swc/__init__.py
Empty file.
99 changes: 99 additions & 0 deletions tests/swc/test_LoadSWC.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
import unittest

import networkx as nx

from pyneuroml.swc.LoadSWC import SWCGraph, SWCNode


class TestSWCNode(unittest.TestCase):
def test_init(self):
node = SWCNode(1, 1, 0.0, 0.0, 0.0, 1.0, -1)
self.assertEqual(node.id, 1)
self.assertEqual(node.type, 1)
self.assertEqual(node.x, 0.0)
self.assertEqual(node.y, 0.0)
self.assertEqual(node.z, 0.0)
self.assertEqual(node.radius, 1.0)
self.assertEqual(node.parent_id, -1)

def test_invalid_init(self):
with self.assertRaises(ValueError):
SWCNode("a", 1, 0.0, 0.0, 0.0, 1.0, -1)


class TestSWCGraph(unittest.TestCase):
def setUp(self):
self.tree = SWCGraph()
self.node1 = SWCNode(1, 1, 0.0, 0.0, 0.0, 1.0, -1)
self.node2 = SWCNode(2, 3, 1.0, 0.0, 0.0, 0.5, 1)
self.node2.fraction_along = 1.0
self.node3 = SWCNode(3, 3, 2.0, 0.0, 0.0, 0.5, 2)
self.node3.fraction_along = 0.5
self.tree.add_node(self.node1)
self.tree.add_node(self.node2)
self.tree.add_node(self.node3)

def test_duplicate_node(self):
with self.assertRaises(ValueError):
self.tree.add_node(SWCNode(1, 1, 0.0, 0.0, 0.0, 1.0, -1))

def test_add_metadata(self):
self.tree.add_metadata("ORIGINAL_SOURCE", "file.swc")
self.assertEqual(self.tree.metadata["ORIGINAL_SOURCE"], "file.swc")

def test_invalid_metadata(self):
self.tree.add_metadata("INVALID_FIELD", "value")
self.assertEqual(self.tree.metadata, {})

def test_get_segment_adjacency_list(self):
adjacency_list = self.tree.get_segment_adjacency_list()
self.assertEqual(adjacency_list, {1: [self.node2], 2: [self.node3]})

def test_get_graph(self):
graph = self.tree.get_graph()
self.assertIsInstance(graph, nx.DiGraph)
self.assertEqual(len(graph.nodes), 3)
self.assertEqual(len(graph.edges), 2)
self.assertEqual(graph.nodes[self.node1.id]["type"], self.node1.type)
self.assertEqual(graph.nodes[self.node2.id]["x"], self.node2.x)
self.assertEqual(graph.nodes[self.node3.id]["radius"], self.node3.radius)
self.assertEqual(graph.edges[(self.node1.id, self.node2.id)]["weight"], 0.0)
self.assertEqual(graph.edges[(self.node2.id, self.node3.id)]["weight"], 0.5)

def test_get_parent(self):
self.assertIsNone(self.tree.get_parent(self.node1.id))
self.assertEqual(self.tree.get_parent(self.node2.id), self.node1)
self.assertEqual(self.tree.get_parent(self.node3.id), self.node2)
with self.assertRaises(ValueError):
self.tree.get_parent(4)

def test_get_descendants(self):
self.assertEqual(
self.tree.get_descendants(self.node1.id), [self.node2, self.node3]
)
self.assertEqual(self.tree.get_descendants(self.node2.id), [self.node3])
self.assertEqual(self.tree.get_descendants(self.node3.id), [])
with self.assertRaises(ValueError):
self.tree.get_descendants(4)

def test_get_nodes_with_multiple_children(self):
self.assertEqual(self.tree.get_nodes_with_multiple_children(), [])
self.assertEqual(self.tree.get_nodes_with_multiple_children(SWCNode.SOMA), [])
self.assertEqual(
self.tree.get_nodes_with_multiple_children(SWCNode.BASAL_DENDRITE), []
)

def test_get_nodes_by_type(self):
self.assertEqual(len(self.tree.get_nodes_by_type(SWCNode.SOMA)), 1)
self.assertEqual(len(self.tree.get_nodes_by_type(SWCNode.BASAL_DENDRITE)), 2)
self.assertEqual(len(self.tree.get_nodes_by_type(SWCNode.AXON)), 0)

def test_get_branch_points(self):
self.assertEqual(self.tree.get_branch_points(SWCNode.SOMA), [])
self.assertEqual(self.tree.get_branch_points(SWCNode.BASAL_DENDRITE), [])
self.assertEqual(
self.tree.get_branch_points(SWCNode.SOMA, SWCNode.BASAL_DENDRITE), []
)

def test_has_soma_node(self):
self.assertTrue(self.tree.has_soma_node())
Loading