Skip to content

Commit

Permalink
Added constant props to pandas loaders (#1165)
Browse files Browse the repository at this point in the history
* Added vertex type arguments to the pandas loader and tests for layers/types in pandas loader

* Changed to static props

* Added functions for adding constant props only

* fmt

* added tests for static props

* Finalised tests
  • Loading branch information
miratepuffin authored Aug 9, 2023
1 parent 47283e2 commit a105baf
Show file tree
Hide file tree
Showing 4 changed files with 425 additions and 38 deletions.
97 changes: 80 additions & 17 deletions python/tests/test_graphdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -1504,8 +1504,7 @@ def test_load_from_pandas():
(5, 6, 5.0, "purple")]


def test_load_from_pandas_vertices():
import pandas as pd
def test_load_from_pandas_into_existing_graph():
edges_df = pd.DataFrame({
"src": [1, 2, 3, 4, 5],
"dst": [2, 3, 4, 5, 6],
Expand All @@ -1520,8 +1519,11 @@ def test_load_from_pandas_vertices():
"time": [1, 2, 3, 4, 5, 6],
})

g = Graph.load_from_pandas(edges_df, src="src", dst="dst", time="time", props=["weight", "marbles"],
vertex_df=vertices_df, vertex_col="id", vertex_time_col="time", vertex_props=["name"])
g = Graph()

g.load_vertices_from_pandas(vertices_df, "id", "time", ["name"])

g.load_edges_from_pandas(edges_df, "src", "dst", "time", ["weight", "marbles"])

assert g.vertices().id().collect() == [1, 2, 3, 4, 5, 6]
edges = []
Expand All @@ -1541,13 +1543,7 @@ def test_load_from_pandas_vertices():
assert vertices == [(1, "Alice"), (2, "Bob"), (3, "Carol"), (4, "Dave"), (5, "Eve"), (6, "Frank")]


def test_hits_algorithm():
g = graph_loader.lotr_graph()
assert algorithms.hits(g).get('Aldor') == (0.0035840950440615416, 0.007476256228983402)


def load_from_pandas_into_existing_graph():
import pandas as pd
def test_load_from_pandas_vertices():
edges_df = pd.DataFrame({
"src": [1, 2, 3, 4, 5],
"dst": [2, 3, 4, 5, 6],
Expand All @@ -1562,11 +1558,8 @@ def load_from_pandas_into_existing_graph():
"time": [1, 2, 3, 4, 5, 6],
})

g = Graph()

g.load_vertices_from_pandas(vertices_df, "id", "time", ["name"])

g.load_edges_frompandas(edges_df, "src", "dst", "time", ["weight", "marbles"])
g = Graph.load_from_pandas(edges_df, src="src", dst="dst", time="time", props=["weight", "marbles"],
vertex_df=vertices_df, vertex_col="id", vertex_time_col="time", vertex_props=["name"])

assert g.vertices().id().collect() == [1, 2, 3, 4, 5, 6]
edges = []
Expand All @@ -1586,10 +1579,80 @@ def load_from_pandas_into_existing_graph():
assert vertices == [(1, "Alice"), (2, "Bob"), (3, "Carol"), (4, "Dave"), (5, "Eve"), (6, "Frank")]


def test_load_from_pandas_with_types():
edges_df = pd.DataFrame({
"src": [1, 2, 3, 4, 5],
"dst": [2, 3, 4, 5, 6],
"time": [1, 2, 3, 4, 5],
"weight": [1.0, 2.0, 3.0, 4.0, 5.0],
"marbles": ["red", "blue", "green", "yellow", "purple"],
"marbles_const": ["red", "blue", "green", "yellow", "purple"],
"layers": ["layer 1", "layer 2", "layer 3", "layer 4", "layer 5"]
})
vertices_df = pd.DataFrame({
"id": [1, 2, 3, 4, 5, 6],
"name": ["Alice", "Bob", "Carol", "Dave", "Eve", "Frank"],
"time": [1, 2, 3, 4, 5, 6],
"type": ["Person 1", "Person 2", "Person 3", "Person 4", "Person 5", "Person 6"]
})
g = Graph()
g.load_vertices_from_pandas(vertices_df, "id", "time", ["name"],shared_const_props={"type": "Person", "tag": "test_tag"})
assert g.vertices().properties.constant.get("type").collect() == ["Person", "Person", "Person", "Person", "Person", "Person"]
assert g.vertices().properties.constant.get("tag").collect() == ["test_tag", "test_tag", "test_tag", "test_tag", "test_tag", "test_tag"]

g = Graph()
g.load_vertices_from_pandas(vertices_df, "id", "time", ["name"],const_props=["type"])
assert g.vertices().properties.constant.get("type").collect() == ["Person 1", "Person 2", "Person 3", "Person 4", "Person 5", "Person 6"]

g = Graph()
g.load_edges_from_pandas(edges_df, "src", "dst", "time", ["weight", "marbles"], const_props=["marbles_const"], shared_const_props={"type": "Edge", "tag": "test_tag"}, layer="test_layer")

assert g.layers(["test_layer"]).edges().src().id().collect() == [1, 2, 3, 4, 5]
assert g.edges().properties.constant.get("type").collect() == [{'test_layer': 'Edge'},{'test_layer': 'Edge'},{'test_layer': 'Edge'},{'test_layer': 'Edge'},{'test_layer': 'Edge'}]
assert g.edges().properties.constant.get("tag").collect() == [{'test_layer': 'test_tag'},{'test_layer': 'test_tag'},{'test_layer': 'test_tag'},{'test_layer': 'test_tag'},{'test_layer': 'test_tag'}]
assert g.edges().properties.constant.get("marbles_const").collect() == [{'test_layer': 'red'},{'test_layer': 'blue'},{'test_layer': 'green'},{'test_layer': 'yellow'},{'test_layer': 'purple'}]


g = Graph()
g.load_edges_from_pandas(edges_df, "src", "dst", "time", ["weight", "marbles"],layer_in_df="layers")
assert g.layers(["layer 1"]).edges().src().id().collect() == [1]
assert g.layers(["layer 1","layer 2"]).edges().src().id().collect() == [1,2]
assert g.layers(["layer 1","layer 2","layer 3"]).edges().src().id().collect() == [1,2,3]
assert g.layers(["layer 1","layer 4","layer 5"]).edges().src().id().collect() == [1,4,5]

g = Graph.load_from_pandas(edges_df, "src", "dst", "time", layer = "test_layer",vertex_df=vertices_df, vertex_col="id", vertex_time_col="time", vertex_props=["name"],vertex_shared_const_props={"type":"Person"})
assert g.vertices().properties.constant.get("type").collect() == ["Person", "Person", "Person", "Person", "Person", "Person"]
assert g.layers(["test_layer"]).edges().src().id().collect() == [1, 2, 3, 4, 5]

g = Graph.load_from_pandas(edges_df, "src", "dst", "time", layer_in_df = "layers",vertex_df=vertices_df, vertex_col="id", vertex_time_col="time", vertex_props=["name"],vertex_const_props=["type"])
assert g.vertices().properties.constant.get("type").collect() == ["Person 1", "Person 2", "Person 3", "Person 4", "Person 5", "Person 6"]
assert g.layers(["layer 1"]).edges().src().id().collect() == [1]
assert g.layers(["layer 1","layer 2"]).edges().src().id().collect() == [1,2]
assert g.layers(["layer 1","layer 2","layer 3"]).edges().src().id().collect() == [1,2,3]
assert g.layers(["layer 1","layer 4","layer 5"]).edges().src().id().collect() == [1,4,5]

g = Graph.load_from_pandas(edges_df, src="src", dst="dst", time="time", props=["weight", "marbles"],
vertex_df=vertices_df, vertex_col="id", vertex_time_col="time", vertex_props=["name"],layer_in_df="layers")

g.load_vertex_props_from_pandas(vertices_df, "id", const_props=["type"], shared_const_props={"tag": "test_tag"})
assert g.vertices().properties.constant.get("type").collect() == ["Person 1", "Person 2", "Person 3", "Person 4", "Person 5", "Person 6"]
assert g.vertices().properties.constant.get("tag").collect() == ["test_tag", "test_tag", "test_tag", "test_tag", "test_tag", "test_tag"]

g.load_edge_props_from_pandas(edges_df, "src", "dst", const_props=["marbles_const"], shared_const_props={"tag": "test_tag"},layer_in_df="layers")
assert g.layers(["layer 1", "layer 2", "layer 3"]).edges().properties.constant.get("marbles_const").collect() == [{'layer 1': 'red'}, {'layer 2': 'blue'}, {'layer 3': 'green'}]
assert g.edges().properties.constant.get("tag").collect() == [{'layer 1': 'test_tag'}, {'layer 2': 'test_tag'}, {'layer 3': 'test_tag'}, {'layer 4': 'test_tag'}, {'layer 5': 'test_tag'}]


def test_edge_layer():
g = Graph()
g.add_edge(1, 1, 2, layer="layer 1")
g.add_edge(1, 2, 3, layer="layer 2")
g.add_edge_properties(1, 2, {"test_prop": "test_val"}, layer="layer 1")
g.add_edge_properties(2, 3, {"test_prop": "test_val 2"}, layer="layer 2")
assert g.edges().properties.constant.get("test_prop") == [{'layer 1': 'test_val'}, {'layer 2': 'test_val 2'}]
assert g.edges().properties.constant.get("test_prop") == [{'layer 1': 'test_val'}, {'layer 2': 'test_val 2'}]


def test_hits_algorithm():
g = graph_loader.lotr_graph()
assert algorithms.hits(g).get('Aldor') == (0.0035840950440615416, 0.007476256228983402)

6 changes: 6 additions & 0 deletions raphtory/src/core/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -374,6 +374,12 @@ impl From<Vec<Prop>> for Prop {
}
}

impl From<&Prop> for Prop {
fn from(value: &Prop) -> Self {
value.clone()
}
}

pub trait IntoPropMap {
fn into_prop_map(self) -> Prop;
}
Expand Down
103 changes: 94 additions & 9 deletions raphtory/src/python/graph/graph.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,12 @@ use crate::{
};
use pyo3::prelude::*;

use crate::db::{
api::view::internal::{DynamicGraph, IntoDynamic},
graph::{edge::EdgeView, vertex::VertexView},
use crate::{
db::{
api::view::internal::{DynamicGraph, IntoDynamic},
graph::{edge::EdgeView, vertex::VertexView},
},
python::graph::pandas::{load_edges_props_from_df, load_vertex_props_from_df},
};
use std::{
collections::HashMap,
Expand Down Expand Up @@ -237,24 +240,38 @@ impl PyGraph {
}

#[staticmethod]
#[pyo3(signature = (edges_df, src = "source", dst = "destination", time = "time", props = None, layer = None, layer_in_df = None, vertex_df = None, vertex_col = None, vertex_time_col = None, vertex_props = None))]
#[pyo3(signature = (edges_df, src = "source", dst = "destination", time = "time", props = None, const_props=None,shared_const_props=None,layer = None, layer_in_df = None, vertex_df = None, vertex_col = None, vertex_time_col = None, vertex_props = None, vertex_const_props = None, vertex_shared_const_props = None))]
fn load_from_pandas(
edges_df: &PyAny,
src: &str,
dst: &str,
time: &str,
props: Option<Vec<&str>>,
const_props: Option<Vec<&str>>,
shared_const_props: Option<HashMap<String, Prop>>,
layer: Option<&str>,
layer_in_df: Option<&str>,
vertex_df: Option<&PyAny>,
vertex_col: Option<&str>,
vertex_time_col: Option<&str>,
vertex_props: Option<Vec<&str>>,
vertex_const_props: Option<Vec<&str>>,
vertex_shared_const_props: Option<HashMap<String, Prop>>,
) -> Result<Graph, GraphError> {
let graph = PyGraph {
graph: Graph::new(),
};
graph.load_edges_from_pandas(edges_df, src, dst, time, props, layer, layer_in_df)?;
graph.load_edges_from_pandas(
edges_df,
src,
dst,
time,
props,
const_props,
shared_const_props,
layer,
layer_in_df,
)?;
if let (Some(vertex_df), Some(vertex_col), Some(vertex_time_col)) =
(vertex_df, vertex_col, vertex_time_col)
{
Expand All @@ -263,39 +280,53 @@ impl PyGraph {
vertex_col,
vertex_time_col,
vertex_props,
vertex_const_props,
vertex_shared_const_props,
)?;
}
Ok(graph.graph)
}

#[pyo3(signature = (vertices_df, vertex_col = "id", time_col = "time", props = None))]
#[pyo3(signature = (vertices_df, vertex_col = "id", time_col = "time", props = None, const_props = None, shared_const_props = None))]
fn load_vertices_from_pandas(
&self,
vertices_df: &PyAny,
vertex_col: &str,
time_col: &str,
props: Option<Vec<&str>>,
const_props: Option<Vec<&str>>,
shared_const_props: Option<HashMap<String, Prop>>,
) -> Result<(), GraphError> {
let graph = &self.graph;
Python::with_gil(|py| {
let df = process_pandas_py_df(vertices_df, py)?;
load_vertices_from_df(&df, vertex_col, time_col, props, graph)
.map_err(|e| GraphLoadException::new_err(format!("{:?}", e)))?;
load_vertices_from_df(
&df,
vertex_col,
time_col,
props,
const_props,
shared_const_props,
graph,
)
.map_err(|e| GraphLoadException::new_err(format!("{:?}", e)))?;

Ok::<(), PyErr>(())
})
.map_err(|e| GraphError::LoadFailure(format!("Failed to load graph {e:?}")))?;
Ok(())
}

#[pyo3(signature = (edge_df, src_col = "source", dst_col = "destination", time_col = "time", props = None, layer=None,layer_in_df=None))]
#[pyo3(signature = (edge_df, src_col = "source", dst_col = "destination", time_col = "time", props = None, const_props=None,shared_const_props=None,layer=None,layer_in_df=None))]
fn load_edges_from_pandas(
&self,
edge_df: &PyAny,
src_col: &str,
dst_col: &str,
time_col: &str,
props: Option<Vec<&str>>,
const_props: Option<Vec<&str>>,
shared_const_props: Option<HashMap<String, Prop>>,
layer: Option<&str>,
layer_in_df: Option<&str>,
) -> Result<(), GraphError> {
Expand All @@ -308,6 +339,60 @@ impl PyGraph {
dst_col,
time_col,
props,
const_props,
shared_const_props,
layer,
layer_in_df,
graph,
)
.map_err(|e| GraphLoadException::new_err(format!("{:?}", e)))?;

Ok::<(), PyErr>(())
})
.map_err(|e| GraphError::LoadFailure(format!("Failed to load graph {e:?}")))?;
Ok(())
}

#[pyo3(signature = (vertices_df, vertex_col = "id", const_props = None, shared_const_props = None))]
fn load_vertex_props_from_pandas(
&self,
vertices_df: &PyAny,
vertex_col: &str,
const_props: Option<Vec<&str>>,
shared_const_props: Option<HashMap<String, Prop>>,
) -> Result<(), GraphError> {
let graph = &self.graph;
Python::with_gil(|py| {
let df = process_pandas_py_df(vertices_df, py)?;
load_vertex_props_from_df(&df, vertex_col, const_props, shared_const_props, graph)
.map_err(|e| GraphLoadException::new_err(format!("{:?}", e)))?;

Ok::<(), PyErr>(())
})
.map_err(|e| GraphError::LoadFailure(format!("Failed to load graph {e:?}")))?;
Ok(())
}

#[pyo3(signature = (edge_df, src_col = "source", dst_col = "destination", const_props=None,shared_const_props=None,layer=None,layer_in_df=None))]
fn load_edge_props_from_pandas(
&self,
edge_df: &PyAny,
src_col: &str,
dst_col: &str,
const_props: Option<Vec<&str>>,
shared_const_props: Option<HashMap<String, Prop>>,
layer: Option<&str>,
layer_in_df: Option<&str>,
) -> Result<(), GraphError> {
let graph = &self.graph;
Python::with_gil(|py| {
let df = process_pandas_py_df(edge_df, py)?;
load_edges_props_from_df(
&df,
src_col,
dst_col,
const_props,
shared_const_props,
layer,
layer_in_df,
graph,
Expand Down
Loading

0 comments on commit a105baf

Please sign in to comment.