KatanaGraph · oshofmann · Oct 8, 2021 · Oct 11, 2021 · Oct 11, 2021 · Oct 11, 2021
diff --git a/libgalois/include/katana/PropertyGraph.h b/libgalois/include/katana/PropertyGraph.h
@@ -74,6 +74,9 @@ class KATANA_EXPORT PropertyGraph {
   Result<void> WriteView(
       const std::string& uri, const std::string& command_line);
 
+  // Recreate indexes from json
+  katana::Result<void> RecreatePropertyIndexes();
+
   tsuba::RDG rdg_;
   std::unique_ptr<tsuba::RDGFile> file_;
   GraphTopology topology_;
@@ -88,9 +91,11 @@ class KATANA_EXPORT PropertyGraph {
   /// The edge EntityTypeID for each edge's most specific type
   EntityTypeIDArray edge_entity_type_ids_;
 
-  // List of node and edge indexes on this graph.
+  // List of node indexes on this graph.
   std::vector<std::unique_ptr<PropertyIndex<GraphTopology::Node>>>
       node_indexes_;
+
+  // List of edge indexes on this graph.
   std::vector<std::unique_ptr<PropertyIndex<GraphTopology::Edge>>>
       edge_indexes_;
 

diff --git a/libgalois/src/PropertyGraph.cpp b/libgalois/src/PropertyGraph.cpp
@@ -219,6 +219,8 @@ katana::PropertyGraph::Make(
   katana::GraphTopology topo =
       KATANA_CHECKED(MapTopology(rdg.topology_file_storage()));
 
+  std::unique_ptr<katana::PropertyGraph> property_graph;
+
   if (rdg.IsEntityTypeIDsOutsideProperties()) {
     KATANA_LOG_DEBUG("loading EntityType data from outside properties");
 
@@ -236,24 +238,26 @@ katana::PropertyGraph::Make(
     EntityTypeManager edge_type_manager =
         KATANA_CHECKED(rdg.edge_entity_type_manager());
 
-    return std::make_unique<PropertyGraph>(
+    property_graph = std::make_unique<PropertyGraph>(
         std::move(rdg_file), std::move(rdg), std::move(topo),
         std::move(node_type_ids), std::move(edge_type_ids),
         std::move(node_type_manager), std::move(edge_type_manager));
 
   } else {
     // we must construct id_arrays and managers from properties
 
-    auto pg = std::make_unique<PropertyGraph>(
+    property_graph = std::make_unique<PropertyGraph>(
         std::move(rdg_file), std::move(rdg), std::move(topo),
         MakeDefaultEntityTypeIDArray(topo.num_nodes()),
         MakeDefaultEntityTypeIDArray(topo.num_edges()), EntityTypeManager{},
         EntityTypeManager{});
 
-    KATANA_CHECKED(pg->ConstructEntityTypeIDs());
-
-    return MakeResult(std::move(pg));
+    KATANA_CHECKED(property_graph->ConstructEntityTypeIDs());
   }
+
+  KATANA_CHECKED(property_graph->RecreatePropertyIndexes());
+
+  return MakeResult(std::move(property_graph));
 }
 
 katana::Result<std::unique_ptr<katana::PropertyGraph>>
@@ -468,6 +472,19 @@ katana::PropertyGraph::DoWrite(
           ? KATANA_CHECKED(WriteEntityTypeIDsArray(edge_entity_type_ids_))
           : nullptr;
 
+  // Update lists of node and edge index columns.
+  std::vector<std::string> node_index_columns(node_indexes_.size());
+  std::transform(
+      node_indexes_.begin(), node_indexes_.end(), node_index_columns.begin(),
+      [](const auto& index) { return index->column_name(); });
+  rdg_.set_node_property_index_columns(std::move(node_index_columns));
+
+  std::vector<std::string> edge_index_columns(edge_indexes_.size());
+  std::transform(
+      edge_indexes_.begin(), edge_indexes_.end(), edge_index_columns.begin(),
+      [](const auto& index) { return index->column_name(); });
+  rdg_.set_edge_property_index_columns(std::move(edge_index_columns));
+
   return rdg_.Store(
       handle, command_line, versioning_action, std::move(topology_res),
       std::move(node_entity_type_id_array_res),
@@ -1289,3 +1306,16 @@ katana::PropertyGraph::GetNodePropertyIndex(
   }
   return KATANA_ERROR(katana::ErrorCode::NotFound, "node index not found");
 }
+
+katana::Result<void>
+katana::PropertyGraph::RecreatePropertyIndexes() {
+  for (const std::string& column_name : rdg_.node_property_index_columns()) {
+    KATANA_CHECKED(MakeNodeIndex(column_name));
+  }
+
+  for (const std::string& column_name : rdg_.edge_property_index_columns()) {
+    KATANA_CHECKED(MakeEdgeIndex(column_name));
+  }
+
+  return katana::ResultSuccess();
+}
diff --git a/libgalois/test/property-index.cpp b/libgalois/test/property-index.cpp
@@ -1,6 +1,7 @@
 #include <arrow/api.h>
 #include <arrow/type.h>
 #include <arrow/type_traits.h>
+#include <boost/filesystem.hpp>
 
 #include "TestTypedPropertyGraph.h"
 #include "katana/Logging.h"
@@ -11,8 +12,12 @@ template <typename node_or_edge>
 struct NodeOrEdge {
   static katana::Result<katana::PropertyIndex<node_or_edge>*> MakeIndex(
       katana::PropertyGraph* pg, const std::string& column_name);
+  static katana::Result<katana::PropertyIndex<node_or_edge>*> GetIndex(
+      katana::PropertyGraph* pg, const std::string& column_name);
   static katana::Result<void> AddProperties(
       katana::PropertyGraph* pg, std::shared_ptr<arrow::Table> properties);
+  static std::shared_ptr<arrow::Array> GetProperty(
+      katana::PropertyGraph* pg, const std::string& column_name);
   static size_t num_entities(katana::PropertyGraph* pg);
 };
 
@@ -21,12 +26,7 @@ using Edge = NodeOrEdge<katana::GraphTopology::Edge>;
 
 template <>
 katana::Result<katana::PropertyIndex<katana::GraphTopology::Node>*>
-Node::MakeIndex(katana::PropertyGraph* pg, const std::string& column_name) {
-  auto result = pg->MakeNodeIndex(column_name);
-  if (!result) {
-    return result.error();
-  }
-
+Node::GetIndex(katana::PropertyGraph* pg, const std::string& column_name) {
   for (const auto& index : pg->node_indexes()) {
     if (index->column_name() == column_name) {
       return index.get();
@@ -37,13 +37,15 @@ Node::MakeIndex(katana::PropertyGraph* pg, const std::string& column_name) {
 }
 
 template <>
-katana::Result<katana::PropertyIndex<katana::GraphTopology::Edge>*>
-Edge::MakeIndex(katana::PropertyGraph* pg, const std::string& column_name) {
-  auto result = pg->MakeEdgeIndex(column_name);
-  if (!result) {
-    return result.error();
-  }
+katana::Result<katana::PropertyIndex<katana::GraphTopology::Node>*>
+Node::MakeIndex(katana::PropertyGraph* pg, const std::string& column_name) {
+  KATANA_CHECKED(pg->MakeNodeIndex(column_name));
+  return Node::GetIndex(pg, column_name);
+}
 
+template <>
+katana::Result<katana::PropertyIndex<katana::GraphTopology::Edge>*>
+Edge::GetIndex(katana::PropertyGraph* pg, const std::string& column_name) {
   for (const auto& index : pg->edge_indexes()) {
     if (index->column_name() == column_name) {
       return index.get();
@@ -53,6 +55,13 @@ Edge::MakeIndex(katana::PropertyGraph* pg, const std::string& column_name) {
   return KATANA_ERROR(katana::ErrorCode::NotFound, "Created index not found");
 }
 
+template <>
+katana::Result<katana::PropertyIndex<katana::GraphTopology::Edge>*>
+Edge::MakeIndex(katana::PropertyGraph* pg, const std::string& column_name) {
+  KATANA_CHECKED(pg->MakeEdgeIndex(column_name));
+  return Edge::GetIndex(pg, column_name);
+}
+
 template <>
 size_t
 Node::num_entities(katana::PropertyGraph* pg) {
@@ -79,6 +88,22 @@ Edge::AddProperties(
   return pg->AddEdgeProperties(properties);
 }
 
+template <>
+std::shared_ptr<arrow::Array>
+Node::GetProperty(katana::PropertyGraph* pg, const std::string& column_name) {
+  auto prop_result = pg->GetNodeProperty(column_name);
+  KATANA_LOG_ASSERT(prop_result);
+  return prop_result.value()->chunk(0);
+}
+
+template <>
+std::shared_ptr<arrow::Array>
+Edge::GetProperty(katana::PropertyGraph* pg, const std::string& column_name) {
+  auto prop_result = pg->GetEdgeProperty(column_name);
+  KATANA_LOG_ASSERT(prop_result);
+  return prop_result.value()->chunk(0);
+}
+
 template <typename c_type>
 std::shared_ptr<arrow::Table>
 CreatePrimitiveProperty(
@@ -200,11 +225,8 @@ TestPrimitiveIndex(size_t num_nodes, size_t line_width) {
 }
 
 template <typename node_or_edge>
-void
-TestStringIndex(size_t num_nodes, size_t line_width) {
-  using IndexType = katana::StringPropertyIndex<node_or_edge>;
-  using ArrayType = arrow::LargeStringArray;
-
+std::unique_ptr<katana::PropertyGraph>
+MakeStringGraph(size_t num_nodes, size_t line_width) {
   LinePolicy policy{line_width};
 
   std::unique_ptr<katana::PropertyGraph> g =
@@ -230,6 +252,32 @@ TestStringIndex(size_t num_nodes, size_t line_width) {
       nonuniform_index_result, "Could not create index: {}",
       nonuniform_index_result.error());
 
+  return g;
+}
+
+template <typename node_or_edge>
+std::unique_ptr<katana::PropertyGraph>
+TestStringIndex(
+    std::unique_ptr<katana::PropertyGraph> g, size_t num_nodes,
+    size_t line_width) {
+  using IndexType = katana::StringPropertyIndex<node_or_edge>;
+  using ArrayType = arrow::LargeStringArray;
+
+  if (!g) {
+    g = MakeStringGraph<node_or_edge>(num_nodes, line_width);
+  }
+
+  auto uniform_index_result =
+      NodeOrEdge<node_or_edge>::GetIndex(g.get(), "uniform");
+  KATANA_LOG_VASSERT(
+      uniform_index_result, "Could not get index: {}",
+      uniform_index_result.error());
+  auto nonuniform_index_result =
+      NodeOrEdge<node_or_edge>::GetIndex(g.get(), "nonuniform");
+  KATANA_LOG_VASSERT(
+      nonuniform_index_result, "Could not get index: {}",
+      nonuniform_index_result.error());
+
   auto* uniform_index = static_cast<IndexType*>(uniform_index_result.value());
   auto* nonuniform_index =
       static_cast<IndexType*>(nonuniform_index_result.value());
@@ -253,8 +301,8 @@ TestStringIndex(size_t num_nodes, size_t line_width) {
   }
 
   // The non-uniform index starts at "aaaa" and increases by 2.
-  auto typed_prop =
-      std::static_pointer_cast<ArrayType>(nonuniform_prop->column(0)->chunk(0));
+  auto typed_prop = std::static_pointer_cast<ArrayType>(
+      NodeOrEdge<node_or_edge>::GetProperty(g.get(), "nonuniform"));
   it = nonuniform_index->Find("aaaj");
   KATANA_LOG_ASSERT(it == nonuniform_index->end());
   it = nonuniform_index->LowerBound("aaaj");
@@ -263,6 +311,31 @@ TestStringIndex(size_t num_nodes, size_t line_width) {
   it = nonuniform_index->UpperBound("aaak");
   KATANA_LOG_ASSERT(it != nonuniform_index->end());
   KATANA_LOG_ASSERT(typed_prop->GetView(*it) == "aaam");
+
+  return g;
+}
+
+std::unique_ptr<katana::PropertyGraph>
+ReloadGraph(std::unique_ptr<katana::PropertyGraph> g) {
+  auto uri_res = katana::Uri::MakeRand("/tmp/propertyfilegraph");
+  KATANA_LOG_ASSERT(uri_res);
+  std::string rdg_dir(uri_res.value().path());
+
+  auto write_result = g->Write(rdg_dir, "test command line");
+
+  if (!write_result) {
+    boost::filesystem::remove_all(rdg_dir);
+    KATANA_LOG_FATAL("writing result: {}", write_result.error());
+  }
+
+  katana::Result<std::unique_ptr<katana::PropertyGraph>> make_result =
+      katana::PropertyGraph::Make(rdg_dir, tsuba::RDGLoadOptions());
+  boost::filesystem::remove_all(rdg_dir);
+  if (!make_result) {
+    KATANA_LOG_FATAL("making result: {}", make_result.error());
+  }
+
+  return std::move(make_result.value());
 }
 
 int
@@ -274,8 +347,14 @@ main() {
   TestPrimitiveIndex<katana::GraphTopology::Node, double_t>(10, 3);
   TestPrimitiveIndex<katana::GraphTopology::Edge, double_t>(10, 3);
 
-  TestStringIndex<katana::GraphTopology::Node>(10, 3);
-  TestStringIndex<katana::GraphTopology::Edge>(10, 3);
+  auto node_g = TestStringIndex<katana::GraphTopology::Node>(nullptr, 10, 3);
+  auto edge_g = TestStringIndex<katana::GraphTopology::Edge>(nullptr, 10, 3);
+
+  node_g = ReloadGraph(std::move(node_g));
+  edge_g = ReloadGraph(std::move(edge_g));
+
+  TestStringIndex<katana::GraphTopology::Node>(std::move(node_g), 10, 3);
+  TestStringIndex<katana::GraphTopology::Edge>(std::move(edge_g), 10, 3);
 
   return 0;
 }
diff --git a/libtsuba/include/tsuba/RDG.h b/libtsuba/include/tsuba/RDG.h
@@ -230,6 +230,17 @@ class KATANA_EXPORT RDG {
   /// Remove all edge properties
   void DropEdgeProperties();
 
+  // Set the list of node and edge column names to persist. Consumes the
+  // provided parameters.
+  void set_node_property_index_columns(
+      std::vector<std::string>&& node_property_index_columns);
+  void set_edge_property_index_columns(
+      std::vector<std::string>&& edge_property_index_columns);
+
+  // Return the list of node and edge column names.
+  const std::vector<std::string>& node_property_index_columns();
+  const std::vector<std::string>& edge_property_index_columns();
+
   /// Remove topology data
   katana::Result<void> DropTopology();
 

diff --git a/libtsuba/src/RDG.cpp b/libtsuba/src/RDG.cpp
@@ -215,6 +215,30 @@ tsuba::RDG::WritePartArrays(const katana::Uri& dir, tsuba::WriteGroup* desc) {
   return next_properties;
 }
 
+void
+tsuba::RDG::set_node_property_index_columns(
+    std::vector<std::string>&& node_property_index_columns) {
+  core_->part_header().set_node_property_index_columns(
+      std::move(node_property_index_columns));
+}
+
+void
+tsuba::RDG::set_edge_property_index_columns(
+    std::vector<std::string>&& edge_property_index_columns) {
+  core_->part_header().set_edge_property_index_columns(
+      std::move(edge_property_index_columns));
+}
+
+const std::vector<std::string>&
+tsuba::RDG::node_property_index_columns() {
+  return core_->part_header().node_property_index_columns();
+}
+
+const std::vector<std::string>&
+tsuba::RDG::edge_property_index_columns() {
+  return core_->part_header().edge_property_index_columns();
+}
+
 katana::Result<void>
 tsuba::RDG::DoStoreTopology(
     RDGHandle handle, std::unique_ptr<FileFrame> topology_ff,

diff --git a/libtsuba/src/RDGPartHeader.cpp b/libtsuba/src/RDGPartHeader.cpp
@@ -33,6 +33,9 @@ const char* kEdgeEntityTypeIDDictionaryKey =
 const char* kNodeEntityTypeIDNameKey = "kg.v1.node_entity_type_id_name";
 // Name maps from Atomic Edge Entity Type ID to set of string names for the Edge Entity Type ID
 const char* kEdgeEntityTypeIDNameKey = "kg.v1.edge_entity_type_id_name";
+// List of node and edge indexed columns
+const char* kNodePropertyIndexColumnsKey = "kg.v1.node_property_index_columns";
+const char* kEdgePropertyIndexColumnsKey = "kg.v1.edge_property_index_columns";
 
 //
 //constexpr std::string_view  mirror_nodes_prop_name = "mirror_nodes";
@@ -288,6 +291,8 @@ tsuba::to_json(json& j, const tsuba::RDGPartHeader& header) {
       {kEdgeEntityTypeIDDictionaryKey, header.edge_entity_type_id_dictionary_},
       {kNodeEntityTypeIDNameKey, header.node_entity_type_id_name_},
       {kEdgeEntityTypeIDNameKey, header.edge_entity_type_id_name_},
+      {kNodePropertyIndexColumnsKey, header.node_property_index_columns_},
+      {kEdgePropertyIndexColumnsKey, header.edge_property_index_columns_},
   };
 }
 
@@ -319,6 +324,16 @@ tsuba::from_json(const json& j, tsuba::RDGPartHeader& header) {
     j.at(kNodeEntityTypeIDNameKey).get_to(header.node_entity_type_id_name_);
     j.at(kEdgeEntityTypeIDNameKey).get_to(header.edge_entity_type_id_name_);
   }
+
+  header.node_property_index_columns_ = {};
+  if (auto it = j.find(kNodePropertyIndexColumnsKey); it != j.end()) {
+    it->get_to(header.node_property_index_columns_);
+  }
+
+  header.edge_property_index_columns_ = {};
+  if (auto it = j.find(kEdgePropertyIndexColumnsKey); it != j.end()) {
+    it->get_to(header.edge_property_index_columns_);
+  }
 }
 
 void