From 187a3286abfdf7eed4499ec810606ad8e7ddafcc Mon Sep 17 00:00:00 2001 From: Anthony Mahanna <43019056+aMahanna@users.noreply.github.com> Date: Fri, 31 Dec 2021 14:02:59 -0500 Subject: [PATCH] new: blog post preparation --- examples/ArangoDB_DGL_Adapter.ipynb | 285 ++-- .../outputs/ArangoDB_DGL_Adapter_output.ipynb | 1449 +++++++++++++++++ 2 files changed, 1612 insertions(+), 122 deletions(-) create mode 100644 examples/outputs/ArangoDB_DGL_Adapter_output.ipynb diff --git a/examples/ArangoDB_DGL_Adapter.ipynb b/examples/ArangoDB_DGL_Adapter.ipynb index 742b3e7..53eb7c5 100644 --- a/examples/ArangoDB_DGL_Adapter.ipynb +++ b/examples/ArangoDB_DGL_Adapter.ipynb @@ -34,7 +34,7 @@ "id": "bpvZS-1aeG89" }, "source": [ - "Version: 1.0.0\n", + "Version: 1.0.2\n", "\n", "Objective: Export Graphs from [ArangoDB](https://www.arangodb.com/), a multi-model Graph Database, to [Deep Graph Library](https://www.dgl.ai/) (DGL), a python package for graph neural networks, and vice-versa." ] @@ -58,10 +58,9 @@ "source": [ "%%capture\n", "!git clone -b oasis_connector --single-branch https://github.com/arangodb/interactive_tutorials.git\n", - "!git clone -b 1.0.1 --single-branch https://github.com/arangoml/dgl-adapter.git\n", - "!rsync -av dgl-adapter/examples/ ./ --exclude=.git\n", + "!git clone -b 1.0.2 --single-branch https://github.com/arangoml/dgl-adapter.git\n", "!rsync -av interactive_tutorials/ ./ --exclude=.git\n", - "!pip3 install adbdgl_adapter==1.0.1\n", + "!pip3 install adbdgl_adapter==1.0.2\n", "!pip3 install matplotlib\n", "!pip3 install pyArango\n", "!pip3 install networkx ## For drawing purposes " @@ -75,7 +74,7 @@ "base_uri": "https://localhost:8080/" }, "id": "RpqvL4COeG8-", - "outputId": "2df55e4e-03fa-47ed-c2c9-baf9f597e1d8" + "outputId": "4e453af8-33d9-4834-fb4e-250032695e01" }, "outputs": [], "source": [ @@ -129,7 +128,7 @@ "base_uri": "https://localhost:8080/" }, "id": "vf0350qvj8up", - "outputId": "a65f00d2-cd6e-4583-94d8-2c9884e2e2e2" + "outputId": "eadc8e26-4edd-4859-e074-88db180dab84" }, "outputs": [], "source": [ @@ -162,7 +161,7 @@ "base_uri": "https://localhost:8080/" }, "id": "oOS3AVAnkQEV", - "outputId": "4609cdef-25ce-4f00-94b5-482c76274f88" + "outputId": "96640c60-adfd-4785-98bb-26a38e40507b" }, "outputs": [], "source": [ @@ -198,7 +197,7 @@ "base_uri": "https://localhost:8080/" }, "id": "meLon-KgkU4h", - "outputId": "976680a4-eadd-43f2-da17-e6a574fad8a7" + "outputId": "c9bcbd68-9c41-4f65-eab9-83d2c8069d0a" }, "outputs": [], "source": [ @@ -236,7 +235,7 @@ "base_uri": "https://localhost:8080/" }, "id": "zTebQ0LOlsGA", - "outputId": "9c84cb84-f7ce-42b3-9174-01f38295c5dd" + "outputId": "3be2ec4d-bd08-4f11-b626-1312e931480b" }, "outputs": [], "source": [ @@ -279,7 +278,7 @@ "base_uri": "https://localhost:8080/" }, "id": "KsxNujb0mSqZ", - "outputId": "3f3fd2b1-e1d3-4b03-c6c4-43566672cbb5" + "outputId": "716e7226-dca0-42ae-e469-df5c975f852f" }, "outputs": [], "source": [ @@ -322,7 +321,7 @@ "base_uri": "https://localhost:8080/" }, "id": "2ekGwnJDeG8-", - "outputId": "92e9d288-0259-45cc-e73d-a8e9f629063a" + "outputId": "5cfdfdd4-ec46-4ec5-c69d-5864bb393077" }, "outputs": [], "source": [ @@ -330,16 +329,14 @@ "con = oasis.getTempCredentials()\n", "\n", "# Connect to the db via the python-arango driver\n", - "python_arango_db_driver = oasis.connect_python_arango(con)\n", + "db = oasis.connect_python_arango(con)\n", "\n", - "# (Alternative) Connect to the db via the pyArango driver\n", - "# pyarango_db_driver = oasis.connect(con)[con['dbName']]\n", - "\n", - "print()\n", + "print('\\n--------------------')\n", "print(\"https://{}:{}\".format(con[\"hostname\"], con[\"port\"]))\n", "print(\"Username: \" + con[\"username\"])\n", "print(\"Password: \" + con[\"password\"])\n", - "print(\"Database: \" + con[\"dbName\"])" + "print(\"Database: \" + con[\"dbName\"])\n", + "print('--------------------\\n')" ] }, { @@ -366,16 +363,7 @@ "id": "BM0iRYPDeG8_" }, "source": [ - "We will use an Fraud Detection example graph, explained in more detail in this [interactive notebook](https://colab.research.google.com/github/joerg84/Graph_Powered_ML_Workshop/blob/master/Fraud_Detection.ipynb)." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "1jWclaDdeG8_" - }, - "source": [ - "*Note the included arangorestore will only work on Linux system, if you want to run this notebook on a different OS please consider using the appropriate arangorestore from the [Download area](https://www.arangodb.com/download-major/).*" + "For demo purposes, we will be using the [ArangoDB Fraud Detection example graph](https://colab.research.google.com/github/joerg84/Graph_Powered_ML_Workshop/blob/master/Fraud_Detection.ipynb)." ] }, { @@ -388,76 +376,7 @@ "source": [ "%%capture\n", "!chmod -R 755 ./tools\n", - "!./tools/arangorestore -c none --server.endpoint http+ssl://{con[\"hostname\"]}:{con[\"port\"]} --server.username {con[\"username\"]} --server.database {con[\"dbName\"]} --server.password {con[\"password\"]} --replication-factor 3 --input-directory \"data/fraud_dump\"" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "227hLXnPeG8_" - }, - "source": [ - "# Create Graph" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "howeguvmeG8_" - }, - "source": [ - "The graph we will be using in the following looks as follows:" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "WqRlqnJCeG8_" - }, - "source": [ - "![networkX](https://github.com/arangoml/networkx-adapter/blob/master/examples/assets/fraud_graph.jpeg?raw=1) " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "PybHP7jpeG8_", - "outputId": "ba3bfc7c-ef56-47e7-8e98-3763d4f34afe" - }, - "outputs": [], - "source": [ - "edge_definitions = [\n", - " {\n", - " \"edge_collection\": \"accountHolder\",\n", - " \"from_vertex_collections\": [\"customer\"],\n", - " \"to_vertex_collections\": [\"account\"],\n", - " },\n", - " {\n", - " \"edge_collection\": \"transaction\",\n", - " \"from_vertex_collections\": [\"account\"],\n", - " \"to_vertex_collections\": [\"account\"],\n", - " },\n", - "]\n", - "\n", - "name = \"fraud-detection\"\n", - "python_arango_db_driver.delete_graph(name, ignore_missing=True)\n", - "fraud_graph = python_arango_db_driver.create_graph(name, edge_definitions=edge_definitions)\n", - "\n", - "print(\"Graph Setup done.\")\n", - "print(fraud_graph)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "ANrsn9GreG9A" - }, - "source": [ - "Feel free to visit the ArangoDB UI using the above link and login data and check the Graph!" + "!./tools/arangorestore -c none --server.endpoint http+ssl://{con[\"hostname\"]}:{con[\"port\"]} --server.username {con[\"username\"]} --server.database {con[\"dbName\"]} --server.password {con[\"password\"]} --replication-factor 3 --input-directory \"dgl-adapter/examples/data/fraud_dump\" --include-system-collections true" ] }, { @@ -466,7 +385,7 @@ "id": "QfE_tKxneG9A" }, "source": [ - "# Create Adapter" + "# Instantiate the Adapter" ] }, { @@ -475,7 +394,7 @@ "id": "kGfhzPT9eG9A" }, "source": [ - "Connect the ArangoDB_DGL_Adapter to our temp ArangoDB cluster:" + "Connect the ArangoDB-DGL Adapter to our temporary ArangoDB cluster:" ] }, { @@ -486,7 +405,7 @@ "base_uri": "https://localhost:8080/" }, "id": "oG496kBeeG9A", - "outputId": "50ecbdf5-c82f-4540-d345-14eb4a488f2c" + "outputId": "235419bb-52cc-429c-f497-b79b361689cc" }, "outputs": [], "source": [ @@ -512,6 +431,22 @@ "## Via ArangoDB Graph" ] }, + { + "cell_type": "markdown", + "metadata": { + "id": "H8nlvWCryPW0" + }, + "source": [ + "Data source\n", + "* ArangoDB Fraud-Detection Graph\n", + "\n", + "Package methods used\n", + "* [`adbdgl_adapter.adapter.arangodb_graph_to_dgl()`](https://github.com/arangoml/dgl-adapter/blob/1.0.1/adbdgl_adapter/adapter.py#L182-L197)\n", + "\n", + "Important notes\n", + "* The `name` parameter in this case must point to an existing ArangoDB graph in your ArangoDB instance. " + ] + }, { "cell_type": "code", "execution_count": null, @@ -520,7 +455,7 @@ "base_uri": "https://localhost:8080/" }, "id": "zZ-Hu3lLVHgd", - "outputId": "39f32c51-0753-45a8-a361-dcf46d4e6148" + "outputId": "02940624-1b41-488f-99fa-d081a065e1b2" }, "outputs": [], "source": [ @@ -535,6 +470,7 @@ "# See more here: https://docs.python-arango.com/en/main/specs.html#arango.aql.AQL.execute\n", "\n", "# Show graph data\n", + "print('\\n--------------------')\n", "print(dgl_g)\n", "print(dgl_g.ntypes)\n", "print(dgl_g.etypes)" @@ -549,6 +485,23 @@ "## Via ArangoDB Collections" ] }, + { + "cell_type": "markdown", + "metadata": { + "id": "bRcCmqWGy1Kf" + }, + "source": [ + "Data source\n", + "* ArangoDB Fraud-Detection Collections\n", + "\n", + "Package methods used\n", + "* [`adbdgl_adapter.adapter.arangodb_collections_to_dgl()`](https://github.com/arangoml/dgl-adapter/blob/1.0.1/adbdgl_adapter/adapter.py#L153-L180)\n", + "\n", + "Important notes\n", + "* The `name` parameter in this case is simply for naming your DGL graph.\n", + "* The `vertex_collections` & `edge_collections` parameters must point to existing ArangoDB collections within your ArangoDB instance." + ] + }, { "cell_type": "code", "execution_count": null, @@ -557,7 +510,7 @@ "base_uri": "https://localhost:8080/" }, "id": "i4XOpdRLUNlJ", - "outputId": "b58e75d1-e935-4abd-9bdb-bc8935d9cdc8" + "outputId": "1cdd6aed-a084-4da8-b342-ee650efe0ccd" }, "outputs": [], "source": [ @@ -573,6 +526,7 @@ "# See more here: https://docs.python-arango.com/en/main/specs.html#arango.aql.AQL.execute\n", "\n", "# Show graph data\n", + "print('\\n--------------------')\n", "print(dgl_g)\n", "print(dgl_g.ntypes)\n", "print(dgl_g.etypes)" @@ -587,6 +541,23 @@ "## Via ArangoDB Metagraph" ] }, + { + "cell_type": "markdown", + "metadata": { + "id": "PipFzJ0HzTMA" + }, + "source": [ + "Data source\n", + "* ArangoDB Fraud-Detection Collections\n", + "\n", + "Package methods used\n", + "* [`adbdgl_adapter.adapter.arangodb_to_dgl()`](https://github.com/arangoml/dgl-adapter/blob/1.0.1/adbdgl_adapter/adapter.py#L58-L151)\n", + "\n", + "Important notes\n", + "* The `name` parameter in this case is simply for naming your DGL graph.\n", + "* The `metagraph` parameter should contain collections & associated document attributes names that exist within your ArangoDB instance." + ] + }, { "cell_type": "code", "execution_count": null, @@ -595,7 +566,7 @@ "base_uri": "https://localhost:8080/" }, "id": "7Kz8lXXq23Yk", - "outputId": "1458aef6-14e5-48c0-98bf-77f21431bc73" + "outputId": "2ce0cff8-a4a5-4373-8297-17e313a16aae" }, "outputs": [], "source": [ @@ -638,6 +609,25 @@ "## Via ArangoDB Metagraph with a custom controller" ] }, + { + "cell_type": "markdown", + "metadata": { + "id": "PGkGh_KjzlYM" + }, + "source": [ + "Data source\n", + "* ArangoDB Fraud-Detection Collections\n", + "\n", + "Package methods used\n", + "* [`adbdgl_adapter.adapter.arangodb_to_dgl()`](https://github.com/arangoml/dgl-adapter/blob/1.0.1/adbdgl_adapter/adapter.py#L58-L151)\n", + "* [`adbdgl_adapter.controller._adb_attribute_to_dgl_feature()`](https://github.com/arangoml/dgl-adapter/blob/1.0.1/adbdgl_adapter/controller.py#L21-L47)\n", + "\n", + "Important notes\n", + "* The `name` parameter in this case is simply for naming your DGL graph.\n", + "* The `metagraph` parameter should contain collections & associated document attributes names that exist within your ArangoDB instance.\n", + "* We are creating a custom `ADBDGL_Controller` to specify *how* to convert our ArangoDB vertex/edge attributes into DGL node/edge features. View the default `ADBDGL_Controller` [here](https://github.com/arangoml/dgl-adapter/blob/1.0.1/adbdgl_adapter/controller.py#L11)." + ] + }, { "cell_type": "code", "execution_count": null, @@ -646,7 +636,7 @@ "base_uri": "https://localhost:8080/" }, "id": "U4_vSdU_4AS4", - "outputId": "b719b0d4-c0a4-43a0-915f-8ee765e1ec86" + "outputId": "9d958954-ea94-4fa0-9778-255b2b02712e" }, "outputs": [], "source": [ @@ -764,16 +754,32 @@ "## Example 1: DGL Karate Graph" ] }, + { + "cell_type": "markdown", + "metadata": { + "id": "tx-tjPfx0U_h" + }, + "source": [ + "Data source\n", + "* [DGL Karate Graph](https://docs.dgl.ai/en/0.6.x/api/python/dgl.data.html#karate-club-dataset)\n", + "\n", + "Package methods used\n", + "* [`adbdgl_adapter.adapter.dgl_to_arangodb()`](https://github.com/arangoml/dgl-adapter/blob/1.0.1/adbdgl_adapter/adapter.py#L199-L297)\n", + "\n", + "Important notes\n", + "* The `name` parameter in this case is simply for naming your ArangoDB graph." + ] + }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", - "height": 577 + "height": 0 }, "id": "eRVbiBy4ZdE4", - "outputId": "d44eb9d9-e046-443b-8ded-79654f004e02" + "outputId": "da33e345-87d7-442f-fbb7-5484b79889a5" }, "outputs": [], "source": [ @@ -783,7 +789,7 @@ "\n", "# Create the ArangoDB graph\n", "name = \"Karate\"\n", - "python_arango_db_driver.delete_graph(name, drop_collections=True, ignore_missing=True)\n", + "db.delete_graph(name, drop_collections=True, ignore_missing=True)\n", "adb_karate_graph = adbdgl_adapter.dgl_to_arangodb(name, dgl_karate_graph)\n", "\n", "print('\\n--------------------')\n", @@ -792,8 +798,8 @@ "print(\"Password: \" + con[\"password\"])\n", "print(\"Database: \" + con[\"dbName\"])\n", "print('--------------------\\n')\n", - "print(f\"\\nInspect the graph here: https://tutorials.arangodb.cloud:8529/_db/{con['dbName']}/_admin/aardvark/index.html#graph/{name}\\n\")\n", - "print(f\"\\nView the original graph below:\")" + "print(f\"Inspect the graph here: https://tutorials.arangodb.cloud:8529/_db/{con['dbName']}/_admin/aardvark/index.html#graph/{name}\\n\")\n", + "print(f\"View the original graph below:\")" ] }, { @@ -806,16 +812,32 @@ "## Example 2: DGL MiniGCDataset Graphs" ] }, + { + "cell_type": "markdown", + "metadata": { + "id": "KaExiE2x0-M6" + }, + "source": [ + "Data source\n", + "* [DGL Mini Graph Classification Dataset](https://docs.dgl.ai/en/0.6.x/api/python/dgl.data.html#mini-graph-classification-dataset)\n", + "\n", + "Package methods used\n", + "* [`adbdgl_adapter.adapter.dgl_to_arangodb()`](https://github.com/arangoml/dgl-adapter/blob/1.0.1/adbdgl_adapter/adapter.py#L199-L297)\n", + "\n", + "Important notes\n", + "* The `name` parameters in this case are simply for naming your ArangoDB graph." + ] + }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", - "height": 1000 + "height": 0 }, "id": "dADiexlAioGH", - "outputId": "273988c8-1749-4fe0-85fe-51b0e1ab2058" + "outputId": "f7926863-61bb-4202-ba7e-e55413d20ca5" }, "outputs": [], "source": [ @@ -837,9 +859,9 @@ "hypercube = \"Hypercube\"\n", "clique = \"Clique\"\n", "\n", - "python_arango_db_driver.delete_graph(lollipop, drop_collections=True, ignore_missing=True)\n", - "python_arango_db_driver.delete_graph(hypercube, drop_collections=True, ignore_missing=True)\n", - "python_arango_db_driver.delete_graph(clique, drop_collections=True, ignore_missing=True)\n", + "db.delete_graph(lollipop, drop_collections=True, ignore_missing=True)\n", + "db.delete_graph(hypercube, drop_collections=True, ignore_missing=True)\n", + "db.delete_graph(clique, drop_collections=True, ignore_missing=True)\n", "\n", "adb_lollipop_graph = adbdgl_adapter.dgl_to_arangodb(lollipop, dgl_lollipop_graph)\n", "adb_hypercube_graph = adbdgl_adapter.dgl_to_arangodb(hypercube, dgl_hypercube_graph)\n", @@ -868,6 +890,24 @@ "## Example 3: DGL MiniGCDataset Graphs with a custom controller" ] }, + { + "cell_type": "markdown", + "metadata": { + "id": "CZ1UX9YX1Zzo" + }, + "source": [ + "Data source\n", + "* [DGL Mini Graph Classification Dataset](https://docs.dgl.ai/en/0.6.x/api/python/dgl.data.html#mini-graph-classification-dataset)\n", + "\n", + "Package methods used\n", + "* [`adbdgl_adapter.adapter.dgl_to_arangodb()`](https://github.com/arangoml/dgl-adapter/blob/1.0.1/adbdgl_adapter/adapter.py#L199-L297)\n", + "* [`adbdgl_adapter.controller._dgl_feature_to_adb_attribute()`](https://github.com/arangoml/dgl-adapter/blob/1.0.1/adbdgl_adapter/controller.py#L49-L70)\n", + "\n", + "Important notes\n", + "* The `name` parameters in this case are simply for naming your ArangoDB graph.\n", + "* We are creating a custom `ADBDGL_Controller` to specify *how* to convert our DGL node/edge features into ArangoDB vertex/edge attributes. View the default `ADBDGL_Controller` [here](https://github.com/arangoml/dgl-adapter/blob/1.0.1/adbdgl_adapter/controller.py#L11)." + ] + }, { "cell_type": "code", "execution_count": null, @@ -876,7 +916,7 @@ "base_uri": "https://localhost:8080/" }, "id": "jbJsvMMaoJoT", - "outputId": "2ddca41f-9c8b-4db4-c0aa-c1b2cc124fa5" + "outputId": "7cc01a20-201b-42ea-85e7-c60d0d8d2fbb" }, "outputs": [], "source": [ @@ -961,9 +1001,9 @@ "hypercube = \"Hypercube_With_Attributes\"\n", "clique = \"Clique_With_Attributes\"\n", "\n", - "python_arango_db_driver.delete_graph(lollipop, drop_collections=True, ignore_missing=True)\n", - "python_arango_db_driver.delete_graph(hypercube, drop_collections=True, ignore_missing=True)\n", - "python_arango_db_driver.delete_graph(clique, drop_collections=True, ignore_missing=True)\n", + "db.delete_graph(lollipop, drop_collections=True, ignore_missing=True)\n", + "db.delete_graph(hypercube, drop_collections=True, ignore_missing=True)\n", + "db.delete_graph(clique, drop_collections=True, ignore_missing=True)\n", "\n", "adb_lollipop_graph = adbdgl_adapter.dgl_to_arangodb(lollipop, dgl_lollipop_graph)\n", "adb_hypercube_graph = adbdgl_adapter.dgl_to_arangodb(hypercube, dgl_hypercube_graph)\n", @@ -986,15 +1026,16 @@ "colab": { "collapsed_sections": [ "ot1oJqn7m78n", - "7y81WHO8eG8_", - "227hLXnPeG8_", "QfE_tKxneG9A", "ZrEDmtqCVD0W", + "RQ4CknYfUEuz", "qEH6OdSB23Ya", + "DqIKT1lO4ASw", "UafSB_3JZNwK", - "gshTlSX_ZZsS" + "gshTlSX_ZZsS", + "CNj1xKhwoJoL" ], - "name": "ArangoDB_DGL_Adapter_v1.0.0.ipynb", + "name": "ArangoDB_DGL_Adapter_v1.ipynb", "provenance": [] }, "kernelspec": { diff --git a/examples/outputs/ArangoDB_DGL_Adapter_output.ipynb b/examples/outputs/ArangoDB_DGL_Adapter_output.ipynb new file mode 100644 index 0000000..b6ea068 --- /dev/null +++ b/examples/outputs/ArangoDB_DGL_Adapter_output.ipynb @@ -0,0 +1,1449 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "q8KesL7xeG89" + }, + "source": [ + "# ArangoDB DGL Adapter Getting Started Guide " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "U1d45V4OeG89" + }, + "source": [ + "\"Open" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Nx9aa3LAeG89" + }, + "source": [ + "![arangodb](https://raw.githubusercontent.com/arangoml/dgl-adapter/master/examples/assets/adb_logo.png)\n", + "" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "bpvZS-1aeG89" + }, + "source": [ + "Version: 1.0.2\n", + "\n", + "Objective: Export Graphs from [ArangoDB](https://www.arangodb.com/), a multi-model Graph Database, to [Deep Graph Library](https://www.dgl.ai/) (DGL), a python package for graph neural networks, and vice-versa." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "KS9c-vE5eG89" + }, + "source": [ + "# Setup" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "id": "fUnFAFAheG89" + }, + "outputs": [], + "source": [ + "%%capture\n", + "!git clone -b oasis_connector --single-branch https://github.com/arangodb/interactive_tutorials.git\n", + "!git clone -b 1.0.2 --single-branch https://github.com/arangoml/dgl-adapter.git\n", + "!rsync -av interactive_tutorials/ ./ --exclude=.git\n", + "!pip3 install adbdgl_adapter==1.0.2\n", + "!pip3 install matplotlib\n", + "!pip3 install pyArango\n", + "!pip3 install networkx ## For drawing purposes " + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "RpqvL4COeG8-", + "outputId": "4e453af8-33d9-4834-fb4e-250032695e01" + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "DGL backend not selected or invalid. Assuming PyTorch for now.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Setting the default backend to \"pytorch\". You can change it in the ~/.dgl/config.json file or export the DGLBACKEND environment variable. Valid options are: pytorch, mxnet, tensorflow (all lowercase)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Using backend: pytorch\n" + ] + } + ], + "source": [ + "import json\n", + "import oasis\n", + "import matplotlib.pyplot as plt\n", + "\n", + "import dgl\n", + "import torch\n", + "import networkx as nx\n", + "\n", + "from dgl import remove_self_loop\n", + "from dgl.data import KarateClubDataset\n", + "from dgl.data import MiniGCDataset\n", + "\n", + "from adbdgl_adapter.adapter import ADBDGL_Adapter\n", + "from adbdgl_adapter.controller import ADBDGL_Controller\n", + "from adbdgl_adapter.typings import Json, ArangoMetagraph, DGLCanonicalEType, DGLDataDict" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ot1oJqn7m78n" + }, + "source": [ + "# Understanding DGL" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "N6Bx3hTjnGd7" + }, + "source": [ + "(referenced from [docs.dgl.ai](https://docs.dgl.ai/en/0.6.x/))\n", + "\n", + "\n", + "Deep Graph Library (DGL) is a Python package built for easy implementation of graph neural network model family, on top of existing DL frameworks (currently supporting **PyTorch**, **MXNet** and **TensorFlow**).\n", + "\n", + "DGL represents a directed graph as a `DGLGraph` object. You can construct a graph by specifying the number of nodes in the graph as well as the list of source and destination nodes. **Nodes in the graph have consecutive IDs starting from 0.**\n", + "\n", + "The following code constructs a directed \"star\" homogeneous graph with 6 nodes and 5 edges. \n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "vf0350qvj8up", + "outputId": "eadc8e26-4edd-4859-e074-88db180dab84" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Graph(num_nodes=6, num_edges=5,\n", + " ndata_schemes={}\n", + " edata_schemes={})\n", + "\n", + "Canonical Edge Types: [('_N', '_E', '_N')]\n" + ] + } + ], + "source": [ + "# A homogeneous graph with 6 nodes, and 5 edges\n", + "g = dgl.graph(([0, 0, 0, 0, 0], [1, 2, 3, 4, 5]))\n", + "print(g)\n", + "\n", + "# Print the graph's canonical edge types\n", + "print(\"\\nCanonical Edge Types: \", g.canonical_etypes)\n", + "# [('_N', '_E', '_N')]\n", + "# '_N' being the only Node type\n", + "# '_E' being the only Edge type" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "wvJ0506mkMjz" + }, + "source": [ + "\n", + "In DGL, a heterogeneous graph (heterograph for short) is specified with a series of graphs as below, one per relation. Each relation is a string triplet `(source node type, edge type, destination node type)`. Since relations disambiguate the edge types, DGL calls them canonical edge types:" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "oOS3AVAnkQEV", + "outputId": "96640c60-adfd-4785-98bb-26a38e40507b" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Graph(num_nodes={'game': 4, 'user': 4},\n", + " num_edges={('user', 'follows', 'game'): 3, ('user', 'follows', 'user'): 2, ('user', 'plays', 'game'): 2},\n", + " metagraph=[('user', 'game', 'follows'), ('user', 'game', 'plays'), ('user', 'user', 'follows')])\n", + "\n", + "Canonical Edge Types: [('user', 'follows', 'game'), ('user', 'follows', 'user'), ('user', 'plays', 'game')]\n", + "\n", + "Node Types: ['game', 'user']\n", + "\n", + "Edge Types: ['follows', 'follows', 'plays']\n" + ] + } + ], + "source": [ + "# A heterogeneous graph with 8 nodes, and 7 edges\n", + "g = dgl.heterograph({\n", + " ('user', 'follows', 'user'): (torch.tensor([0, 1]), torch.tensor([1, 2])),\n", + " ('user', 'follows', 'game'): (torch.tensor([0, 1, 2]), torch.tensor([1, 2, 3])),\n", + " ('user', 'plays', 'game'): (torch.tensor([1, 3]), torch.tensor([2, 3]))\n", + "})\n", + "\n", + "print(g)\n", + "print(\"\\nCanonical Edge Types: \", g.canonical_etypes)\n", + "print(\"\\nNode Types: \", g.ntypes)\n", + "print(\"\\nEdge Types: \", g.etypes)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "5C-R0tkkkS5N" + }, + "source": [ + "Many graph data contain attributes on nodes and edges. Although the types of node and edge attributes can be arbitrary in real world, **DGLGraph only accepts attributes stored in tensors** (with numerical contents). Consequently, an attribute of all the nodes or edges must have the same shape. In the context of deep learning, those attributes are often called features.\n", + "\n", + "You can assign and retrieve node and edge features via ndata and edata interface." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "meLon-KgkU4h", + "outputId": "c9bcbd68-9c41-4f65-eab9-83d2c8069d0a" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Graph(num_nodes=6, num_edges=5,\n", + " ndata_schemes={'x': Scheme(shape=(), dtype=torch.int64)}\n", + " edata_schemes={'a': Scheme(shape=(4,), dtype=torch.float32)})\n", + "\n", + "Node Data X attribute: tensor([151, 124, 41, 89, 76, 55])\n", + "\n", + "Edge Data A attribute: tensor([[-0.9712, 0.3131, -1.7787, -0.4953],\n", + " [ 1.5366, -0.8591, -1.4719, 0.5857],\n", + " [-0.5803, 0.6757, 0.9276, -0.9756],\n", + " [ 0.4396, 1.0612, 0.0943, 0.6856],\n", + " [-0.8685, -1.3693, -0.1184, -1.0903]])\n" + ] + } + ], + "source": [ + "# A homogeneous graph with 6 nodes, and 5 edges\n", + "g = dgl.graph(([0, 0, 0, 0, 0], [1, 2, 3, 4, 5]))\n", + "\n", + "# Assign an integer value for each node.\n", + "g.ndata['x'] = torch.tensor([151, 124, 41, 89, 76, 55])\n", + "# Assign a 4-dimensional edge feature vector for each edge.\n", + "g.edata['a'] = torch.randn(5, 4)\n", + "\n", + "print(g)\n", + "print(\"\\nNode Data X attribute: \", g.ndata['x'])\n", + "print(\"\\nEdge Data A attribute: \", g.edata['a'])\n", + "\n", + "\n", + "# NOTE: The following line ndata insertion will fail, since not all nodes have been assigned an attribute value\n", + "# g.ndata['bad_attribute'] = torch.tensor([0,10,20,30,40])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ETRCZzF1lSLE" + }, + "source": [ + "When multiple node/edge types are introduced, users need to specify the particular node/edge type when invoking a DGLGraph API for type-specific information. In addition, nodes/edges of different types have separate IDs." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "zTebQ0LOlsGA", + "outputId": "3be2ec4d-bd08-4f11-b626-1312e931480b" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "All nodes: 8\n", + "User nodes: 4\n", + "tensor([0, 1, 2, 3])\n" + ] + } + ], + "source": [ + "g = dgl.heterograph({\n", + " ('user', 'follows', 'user'): (torch.tensor([0, 1]), torch.tensor([1, 2])),\n", + " ('user', 'follows', 'game'): (torch.tensor([0, 1, 2]), torch.tensor([1, 2, 3])),\n", + " ('user', 'plays', 'game'): (torch.tensor([1, 3]), torch.tensor([2, 3]))\n", + "})\n", + "\n", + "# Get the number of all nodes in the graph\n", + "print(\"All nodes: \", g.num_nodes())\n", + "\n", + "# Get the number of user nodes\n", + "print(\"User nodes: \", g.num_nodes('user'))\n", + "\n", + "# Nodes of different types have separate IDs,\n", + "# hence not well-defined without a type specified\n", + "# print(g.nodes())\n", + "#DGLError: Node type name must be specified if there are more than one node types.\n", + "\n", + "print(g.nodes('user'))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "H9dwJuKvmK0w" + }, + "source": [ + "To set/get features for a specific node/edge type, DGL provides two new types of syntax – g.nodes[‘node_type’].data[‘feat_name’] and g.edges[‘edge_type’].data[‘feat_name’].\n", + "\n", + "**Note:** If the graph only has one node/edge type, there is no need to specify the node/edge type." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "KsxNujb0mSqZ", + "outputId": "716e7226-dca0-42ae-e469-df5c975f852f" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "defaultdict(, {'age': {'user': tensor([21, 16, 38, 64])}})\n" + ] + } + ], + "source": [ + "g = dgl.heterograph({\n", + " ('user', 'follows', 'user'): (torch.tensor([0, 1]), torch.tensor([1, 2])),\n", + " ('user', 'follows', 'game'): (torch.tensor([0, 1, 2]), torch.tensor([1, 2, 3])),\n", + " ('user', 'plays', 'game'): (torch.tensor([1, 3]), torch.tensor([2, 3]))\n", + "})\n", + "\n", + "g.nodes['user'].data['age'] = torch.tensor([21, 16, 38, 64])\n", + "# An alternative (yet equivalent) syntax:\n", + "# g.ndata['age'] = {'user': torch.tensor([21, 16, 38, 64])}\n", + "\n", + "print(g.ndata)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "1M_isKWLnCfr" + }, + "source": [ + "For more info, visit https://docs.dgl.ai/en/0.6.x/. " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Oc__NAd1eG8-" + }, + "source": [ + "# Create a Temporary ArangoDB Instance" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "2ekGwnJDeG8-", + "outputId": "5cfdfdd4-ec46-4ec5-c69d-5864bb393077" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requesting new temp credentials.\n", + "Temp database ready to use.\n", + "\n", + "--------------------\n", + "https://tutorials.arangodb.cloud:8529\n", + "Username: TUT487i8kal98gb73c2iklds\n", + "Password: TUTn5t85w8t50kcupmo2mmyb\n", + "Database: TUTn187e39v9qho3768ilyk4\n", + "--------------------\n", + "\n" + ] + } + ], + "source": [ + "# Request temporary instance from the managed ArangoDB Cloud Oasis.\n", + "con = oasis.getTempCredentials()\n", + "\n", + "# Connect to the db via the python-arango driver\n", + "db = oasis.connect_python_arango(con)\n", + "\n", + "print('\\n--------------------')\n", + "print(\"https://{}:{}\".format(con[\"hostname\"], con[\"port\"]))\n", + "print(\"Username: \" + con[\"username\"])\n", + "print(\"Password: \" + con[\"password\"])\n", + "print(\"Database: \" + con[\"dbName\"])\n", + "print('--------------------\\n')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "e4QfL37neG8_" + }, + "source": [ + "Feel free to use to above URL to checkout the UI!" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "7y81WHO8eG8_" + }, + "source": [ + "# Data Import" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "BM0iRYPDeG8_" + }, + "source": [ + "For demo purposes, we will be using the [ArangoDB Fraud Detection example graph](https://colab.research.google.com/github/joerg84/Graph_Powered_ML_Workshop/blob/master/Fraud_Detection.ipynb)." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "id": "7bgGJ3QkeG8_" + }, + "outputs": [], + "source": [ + "%%capture\n", + "!chmod -R 755 ./tools\n", + "!./tools/arangorestore -c none --server.endpoint http+ssl://{con[\"hostname\"]}:{con[\"port\"]} --server.username {con[\"username\"]} --server.database {con[\"dbName\"]} --server.password {con[\"password\"]} --replication-factor 3 --input-directory \"dgl-adapter/examples/data/fraud_dump\" --include-system-collections true" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "QfE_tKxneG9A" + }, + "source": [ + "# Instantiate the Adapter" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "kGfhzPT9eG9A" + }, + "source": [ + "Connect the ArangoDB-DGL Adapter to our temporary ArangoDB cluster:" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "oG496kBeeG9A", + "outputId": "235419bb-52cc-429c-f497-b79b361689cc" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Connecting to https://tutorials.arangodb.cloud:8529\n" + ] + } + ], + "source": [ + "adbdgl_adapter = ADBDGL_Adapter(con)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "uByvwf9feG9A" + }, + "source": [ + "# ArangoDB to DGL\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ZrEDmtqCVD0W" + }, + "source": [ + "## Via ArangoDB Graph" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "H8nlvWCryPW0" + }, + "source": [ + "Data source\n", + "* ArangoDB Fraud-Detection Graph\n", + "\n", + "Package methods used\n", + "* [`adbdgl_adapter.adapter.arangodb_graph_to_dgl()`](https://github.com/arangoml/dgl-adapter/blob/1.0.1/adbdgl_adapter/adapter.py#L182-L197)\n", + "\n", + "Important notes\n", + "* The `name` parameter in this case must point to an existing ArangoDB graph in your ArangoDB instance. " + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "zZ-Hu3lLVHgd", + "outputId": "02940624-1b41-488f-99fa-d081a065e1b2" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "DGL: fraud-detection created\n", + "\n", + "--------------------\n", + "Graph(num_nodes={'account': 54, 'customer': 17},\n", + " num_edges={('account', 'accountHolder', 'customer'): 54, ('account', 'transaction', 'account'): 62},\n", + " metagraph=[('account', 'customer', 'accountHolder'), ('account', 'account', 'transaction')])\n", + "['account', 'customer']\n", + "['accountHolder', 'transaction']\n" + ] + } + ], + "source": [ + "# Define graph name\n", + "graph_name = \"fraud-detection\"\n", + "\n", + "# Create DGL graph from ArangoDB graph\n", + "dgl_g = adbdgl_adapter.arangodb_graph_to_dgl(graph_name)\n", + "\n", + "# You can also provide valid Python-Arango AQL query options to the command above, like such:\n", + "# dgl_g = aadbdgl_adapter.arangodb_graph_to_dgl(graph_name, ttl=1000, stream=True)\n", + "# See more here: https://docs.python-arango.com/en/main/specs.html#arango.aql.AQL.execute\n", + "\n", + "# Show graph data\n", + "print('\\n--------------------')\n", + "print(dgl_g)\n", + "print(dgl_g.ntypes)\n", + "print(dgl_g.etypes)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "RQ4CknYfUEuz" + }, + "source": [ + "## Via ArangoDB Collections" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "bRcCmqWGy1Kf" + }, + "source": [ + "Data source\n", + "* ArangoDB Fraud-Detection Collections\n", + "\n", + "Package methods used\n", + "* [`adbdgl_adapter.adapter.arangodb_collections_to_dgl()`](https://github.com/arangoml/dgl-adapter/blob/1.0.1/adbdgl_adapter/adapter.py#L153-L180)\n", + "\n", + "Important notes\n", + "* The `name` parameter in this case is simply for naming your DGL graph.\n", + "* The `vertex_collections` & `edge_collections` parameters must point to existing ArangoDB collections within your ArangoDB instance." + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "i4XOpdRLUNlJ", + "outputId": "1cdd6aed-a084-4da8-b342-ee650efe0ccd" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "DGL: fraud-detection created\n", + "\n", + "--------------------\n", + "Graph(num_nodes={'Class': 4, 'account': 54, 'customer': 17},\n", + " num_edges={('Class', 'Relationship', 'Class'): 4, ('account', 'accountHolder', 'customer'): 54, ('account', 'transaction', 'account'): 62},\n", + " metagraph=[('Class', 'Class', 'Relationship'), ('account', 'customer', 'accountHolder'), ('account', 'account', 'transaction')])\n", + "['Class', 'account', 'customer']\n", + "['Relationship', 'accountHolder', 'transaction']\n" + ] + } + ], + "source": [ + "# Define collection\n", + "vertex_collections = {\"account\", \"Class\", \"customer\"}\n", + "edge_collections = {\"accountHolder\", \"Relationship\", \"transaction\"}\n", + "\n", + "# Create DGL from ArangoDB collections\n", + "dgl_g = adbdgl_adapter.arangodb_collections_to_dgl(\"fraud-detection\", vertex_collections, edge_collections)\n", + "\n", + "# You can also provide valid Python-Arango AQL query options to the command above, like such:\n", + "# dgl_g = adbdgl_adapter.arangodb_collections_to_dgl(\"fraud-detection\", vertex_collections, edge_collections, ttl=1000, stream=True)\n", + "# See more here: https://docs.python-arango.com/en/main/specs.html#arango.aql.AQL.execute\n", + "\n", + "# Show graph data\n", + "print('\\n--------------------')\n", + "print(dgl_g)\n", + "print(dgl_g.ntypes)\n", + "print(dgl_g.etypes)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "qEH6OdSB23Ya" + }, + "source": [ + "## Via ArangoDB Metagraph" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "PipFzJ0HzTMA" + }, + "source": [ + "Data source\n", + "* ArangoDB Fraud-Detection Collections\n", + "\n", + "Package methods used\n", + "* [`adbdgl_adapter.adapter.arangodb_to_dgl()`](https://github.com/arangoml/dgl-adapter/blob/1.0.1/adbdgl_adapter/adapter.py#L58-L151)\n", + "\n", + "Important notes\n", + "* The `name` parameter in this case is simply for naming your DGL graph.\n", + "* The `metagraph` parameter should contain collections & associated document attributes names that exist within your ArangoDB instance." + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "7Kz8lXXq23Yk", + "outputId": "2ce0cff8-a4a5-4373-8297-17e313a16aae" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "DGL: FraudDetection created\n", + "\n", + "--------------\n", + "Graph(num_nodes={'Class': 4, 'account': 54, 'customer': 17},\n", + " num_edges={('Class', 'Relationship', 'Class'): 4, ('account', 'accountHolder', 'customer'): 54, ('account', 'transaction', 'account'): 62},\n", + " metagraph=[('Class', 'Class', 'Relationship'), ('account', 'customer', 'accountHolder'), ('account', 'account', 'transaction')])\n", + "\n", + "--------------\n", + "defaultdict(, {'concrete': {'Class': tensor([True, True, True, True])}, 'Balance': {'account': tensor([5331, 7630, 1433, 2201, 4837, 5817, 1689, 1042, 4104, 10, 2338, 10,\n", + " 3779, 0, 529, 0, 1992, 2912, 6367, 1819, 0, 221, 5062, 2372,\n", + " 841, 5393, 1138, 8414, 4064, 5686, 6294, 6540, 7358, 3452, 0, 3993,\n", + " 10, 0, 471, 8148, 5832, 1758, 1747, 1679, 6789, 1599, 8320, 0,\n", + " 10, 8626, 7199, 8644, 3879, 10])}, 'customer_id': {'account': tensor([10000009, 10000004, 10000004, 10000010, 10000002, 10000011, 10000015,\n", + " 10000006, 10000010, 10810, 10000002, 10000014, 10000008, 0,\n", + " 10000002, 0, 10000008, 10000006, 10000012, 10000015, 10000001,\n", + " 10000010, 10000015, 10000005, 10000009, 10000008, 10000011, 10000014,\n", + " 10000010, 10000006, 10000002, 10000007, 10000006, 10000005, 0,\n", + " 10000010, 10810, 0, 10000009, 10000006, 10000002, 10000005,\n", + " 10000009, 10000012, 10000007, 10000002, 10000014, 0, 10810,\n", + " 10000016, 10000006, 10000016, 10000013, 10810])}, 'rank': {'account': tensor([0.0021, 0.0031, 0.0052, 0.0021, 0.0046, 0.0037, 0.0032, 0.0042, 0.0021,\n", + " 0.0021, 0.0030, 0.0037, 0.0040, 0.0037, 0.0021, 0.0046, 0.0040, 0.0030,\n", + " 0.0026, 0.0032, 0.0021, 0.0034, 0.0032, 0.0021, 0.0021, 0.0035, 0.0026,\n", + " 0.0026, 0.0046, 0.0021, 0.0021, 0.0035, 0.0036, 0.0036, 0.0038, 0.0055,\n", + " 0.0021, 0.0041, 0.0044, 0.0021, 0.0030, 0.0035, 0.0033, 0.0026, 0.0071,\n", + " 0.0036, 0.0032, 0.0059, 0.0021, 0.0090, 0.0057, 0.0032, 0.0026, 0.0021]), 'customer': tensor([0.0135, 0.0050, 0.0062, 0.0066, 0.0096, 0.0088, 0.0089, 0.0047, 0.0066,\n", + " 0.0045, 0.0062, 0.0103, 0.0081, 0.0039, 0.0054, 0.0044, 0.0093])}})\n", + "--------------\n", + "\n", + "defaultdict(, {'sender_bank_id': {('account', 'transaction', 'account'): tensor([10000000003, 10000000002, 10000000001, 10000000001, 10000000002,\n", + " 10000000003, 10000000003, 10000000002, 10000000002, 10000000003,\n", + " 10000000001, 10000000001, 0, 10000000003, 10000000003,\n", + " 0, 10000000002, 0, 10000000001, 10000000003,\n", + " 10000000001, 10000000003, 10000000002, 0, 10000000003,\n", + " 10000000003, 10000000003, 10000000003, 10000000001, 10000000001,\n", + " 10000000002, 10000000001, 10000000003, 10000000003, 10000000001,\n", + " 10000000001, 0, 10000000003, 10000000002, 10000000001,\n", + " 10000000002, 10000000003, 10000000003, 10000000003, 10000000002,\n", + " 10000000003, 10000000002, 10000000003, 10000000002, 10000000001,\n", + " 10000000001, 0, 10000000003, 10000000003, 0,\n", + " 10000000003, 10000000003, 10000000001, 10000000001, 10000000003,\n", + " 10000000003, 10000000002])}, 'receiver_bank_id': {('account', 'transaction', 'account'): tensor([10000000003, 10000000003, 10000000001, 10000000002, 10000000002,\n", + " 10000000003, 10000000001, 10000000003, 10000000001, 10000000003,\n", + " 10000000002, 10000000003, 0, 10000000003, 10000000003,\n", + " 0, 10000000001, 0, 10000000002, 10000000003,\n", + " 10000000003, 10000000003, 10000000001, 0, 10000000003,\n", + " 10000000002, 10000000003, 10000000003, 10000000001, 10000000001,\n", + " 10000000003, 10000000003, 10000000003, 10000000003, 10000000001,\n", + " 10000000002, 0, 10000000001, 10000000001, 10000000002,\n", + " 10000000001, 10000000003, 10000000003, 10000000003, 10000000001,\n", + " 10000000003, 10000000002, 10000000003, 10000000002, 10000000001,\n", + " 10000000003, 0, 10000000003, 10000000003, 0,\n", + " 10000000003, 10000000002, 10000000002, 10000000001, 10000000003,\n", + " 10000000003, 10000000003])}, 'transaction_amt': {('account', 'transaction', 'account'): tensor([9000, 299, 498, 954, 756, 627, 142, 946, 920, 9000, 421, 343,\n", + " 9000, 457, 9000, 9000, 53, 9000, 284, 120, 441, 9000, 364, 901,\n", + " 9000, 279, 9000, 9000, 273, 127, 952, 354, 795, 9000, 835, 761,\n", + " 9000, 478, 172, 804, 665, 995, 9000, 9000, 670, 9000, 340, 9000,\n", + " 747, 347, 52, 911, 762, 9000, 0, 790, 619, 491, 954, 9000,\n", + " 9000, 843])}})\n" + ] + } + ], + "source": [ + "# Define Metagraph\n", + "fraud_detection_metagraph = {\n", + " \"vertexCollections\": {\n", + " \"account\": {\"rank\", \"Balance\", \"customer_id\"},\n", + " \"Class\": {\"concrete\"},\n", + " \"customer\": {\"rank\"},\n", + " },\n", + " \"edgeCollections\": {\n", + " \"accountHolder\": {},\n", + " \"Relationship\": {},\n", + " \"transaction\": {\"receiver_bank_id\", \"sender_bank_id\", \"transaction_amt\"},\n", + " },\n", + "}\n", + "\n", + "# Create DGL Graph from attributes\n", + "dgl_g = adbdgl_adapter.arangodb_to_dgl('FraudDetection', fraud_detection_metagraph)\n", + "\n", + "# You can also provide valid Python-Arango AQL query options to the command above, like such:\n", + "# dgl_g = adbdgl_adapter.arangodb_to_dgl(graph_name = 'FraudDetection', fraud_detection_metagraph, ttl=1000, stream=True)\n", + "# See more here: https://docs.python-arango.com/en/main/specs.html#arango.aql.AQL.execute\n", + "\n", + "# Show graph data\n", + "print('\\n--------------')\n", + "print(dgl_g)\n", + "print('\\n--------------')\n", + "print(dgl_g.ndata)\n", + "print('--------------\\n')\n", + "print(dgl_g.edata)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "DqIKT1lO4ASw" + }, + "source": [ + "## Via ArangoDB Metagraph with a custom controller" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "PGkGh_KjzlYM" + }, + "source": [ + "Data source\n", + "* ArangoDB Fraud-Detection Collections\n", + "\n", + "Package methods used\n", + "* [`adbdgl_adapter.adapter.arangodb_to_dgl()`](https://github.com/arangoml/dgl-adapter/blob/1.0.1/adbdgl_adapter/adapter.py#L58-L151)\n", + "* [`adbdgl_adapter.controller._adb_attribute_to_dgl_feature()`](https://github.com/arangoml/dgl-adapter/blob/1.0.1/adbdgl_adapter/controller.py#L21-L47)\n", + "\n", + "Important notes\n", + "* The `name` parameter in this case is simply for naming your DGL graph.\n", + "* The `metagraph` parameter should contain collections & associated document attributes names that exist within your ArangoDB instance.\n", + "* We are creating a custom `ADBDGL_Controller` to specify *how* to convert our ArangoDB vertex/edge attributes into DGL node/edge features. View the default `ADBDGL_Controller` [here](https://github.com/arangoml/dgl-adapter/blob/1.0.1/adbdgl_adapter/controller.py#L11)." + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "U4_vSdU_4AS4", + "outputId": "9d958954-ea94-4fa0-9778-255b2b02712e" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Connecting to https://tutorials.arangodb.cloud:8529\n", + "DGL: FraudDetection created\n", + "\n", + "--------------\n", + "Graph(num_nodes={'Class': 4, 'account': 54, 'customer': 17},\n", + " num_edges={('Class', 'Relationship', 'Class'): 4, ('account', 'accountHolder', 'customer'): 54, ('account', 'transaction', 'account'): 62},\n", + " metagraph=[('Class', 'Class', 'Relationship'), ('account', 'customer', 'accountHolder'), ('account', 'account', 'transaction')])\n", + "\n", + "--------------\n", + "defaultdict(, {'concrete': {'Class': tensor([True, True, True, True])}, 'name': {'Class': tensor([0, 1, 2, 3])}, 'rank': {'account': tensor([0.0021, 0.0031, 0.0052, 0.0021, 0.0046, 0.0037, 0.0032, 0.0042, 0.0021,\n", + " 0.0021, 0.0030, 0.0037, 0.0040, 0.0037, 0.0021, 0.0046, 0.0040, 0.0030,\n", + " 0.0026, 0.0032, 0.0021, 0.0034, 0.0032, 0.0021, 0.0021, 0.0035, 0.0026,\n", + " 0.0026, 0.0046, 0.0021, 0.0021, 0.0035, 0.0036, 0.0036, 0.0038, 0.0055,\n", + " 0.0021, 0.0041, 0.0044, 0.0021, 0.0030, 0.0035, 0.0033, 0.0026, 0.0071,\n", + " 0.0036, 0.0032, 0.0059, 0.0021, 0.0090, 0.0057, 0.0032, 0.0026, 0.0021]), 'customer': tensor([0.0135, 0.0050, 0.0062, 0.0066, 0.0096, 0.0088, 0.0089, 0.0047, 0.0066,\n", + " 0.0045, 0.0062, 0.0103, 0.0081, 0.0039, 0.0054, 0.0044, 0.0093])}, 'Ssn': {'customer': tensor([123456786, 123456780, 123456780, 123456787, 123456780, 123456789,\n", + " 123456780, 123456785, 123456783, 123456784, 123456780, 123456788,\n", + " 123456782, 123456781, 123456780, 123456780, 111223333])}, 'Sex': {'customer': tensor([1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1])}})\n", + "--------------\n", + "\n", + "defaultdict(, {'sender_bank_id': {('account', 'transaction', 'account'): tensor([10000000003, 10000000002, 10000000001, 10000000001, 10000000002,\n", + " 10000000003, 10000000003, 10000000002, 10000000002, 10000000003,\n", + " 10000000001, 10000000001, 0, 10000000003, 10000000003,\n", + " 0, 10000000002, 0, 10000000001, 10000000003,\n", + " 10000000001, 10000000003, 10000000002, 0, 10000000003,\n", + " 10000000003, 10000000003, 10000000003, 10000000001, 10000000001,\n", + " 10000000002, 10000000001, 10000000003, 10000000003, 10000000001,\n", + " 10000000001, 0, 10000000003, 10000000002, 10000000001,\n", + " 10000000002, 10000000003, 10000000003, 10000000003, 10000000002,\n", + " 10000000003, 10000000002, 10000000003, 10000000002, 10000000001,\n", + " 10000000001, 0, 10000000003, 10000000003, 0,\n", + " 10000000003, 10000000003, 10000000001, 10000000001, 10000000003,\n", + " 10000000003, 10000000002])}, 'receiver_bank_id': {('account', 'transaction', 'account'): tensor([10000000003, 10000000003, 10000000001, 10000000002, 10000000002,\n", + " 10000000003, 10000000001, 10000000003, 10000000001, 10000000003,\n", + " 10000000002, 10000000003, 0, 10000000003, 10000000003,\n", + " 0, 10000000001, 0, 10000000002, 10000000003,\n", + " 10000000003, 10000000003, 10000000001, 0, 10000000003,\n", + " 10000000002, 10000000003, 10000000003, 10000000001, 10000000001,\n", + " 10000000003, 10000000003, 10000000003, 10000000003, 10000000001,\n", + " 10000000002, 0, 10000000001, 10000000001, 10000000002,\n", + " 10000000001, 10000000003, 10000000003, 10000000003, 10000000001,\n", + " 10000000003, 10000000002, 10000000003, 10000000002, 10000000001,\n", + " 10000000003, 0, 10000000003, 10000000003, 0,\n", + " 10000000003, 10000000002, 10000000002, 10000000001, 10000000003,\n", + " 10000000003, 10000000003])}, 'transaction_date': {('account', 'transaction', 'account'): tensor([ 201966, 201721, 2017528, 2018924, 2017516, 2018128, 2019213,\n", + " 201847, 2017914, 201966, 2017810, 20181020, 0, 2017724,\n", + " 201966, 0, 2019311, 0, 2018211, 2018125, 201932,\n", + " 201966, 201795, 0, 201966, 2017111, 201966, 201966,\n", + " 2019822, 2017317, 2019124, 2017121, 2017110, 201966, 2017717,\n", + " 20181012, 0, 20181023, 2019724, 2019611, 2019928, 2019117,\n", + " 201966, 201966, 2017328, 201966, 2019316, 201966, 2017914,\n", + " 2017521, 201713, 0, 2018124, 201966, 0, 201784,\n", + " 201713, 20171212, 2019413, 201966, 201966, 201887])}, 'trans_time': {('account', 'transaction', 'account'): tensor([1136, 1516, 1340, 1030, 1552, 1116, 1450, 924, 1046, 1426, 1247, 1459,\n", + " 0, 1459, 1258, 0, 1758, 0, 1230, 1210, 1252, 1039, 1741, 0,\n", + " 1420, 1713, 1710, 1028, 1636, 1054, 1658, 1332, 1316, 955, 1629, 1642,\n", + " 0, 1710, 932, 1652, 1018, 1527, 1555, 1640, 1158, 1035, 1015, 1133,\n", + " 1320, 1514, 1213, 0, 1133, 1340, 0, 1026, 1312, 1027, 1745, 1342,\n", + " 1520, 1141])}, 'transaction_amt': {('account', 'transaction', 'account'): tensor([9000, 299, 498, 954, 756, 627, 142, 946, 920, 9000, 421, 343,\n", + " 9000, 457, 9000, 9000, 53, 9000, 284, 120, 441, 9000, 364, 901,\n", + " 9000, 279, 9000, 9000, 273, 127, 952, 354, 795, 9000, 835, 761,\n", + " 9000, 478, 172, 804, 665, 995, 9000, 9000, 670, 9000, 340, 9000,\n", + " 747, 347, 52, 911, 762, 9000, 0, 790, 619, 491, 954, 9000,\n", + " 9000, 843])}})\n" + ] + } + ], + "source": [ + "# Define Metagraph\n", + "fraud_detection_metagraph = {\n", + " \"vertexCollections\": {\n", + " \"account\": {\"rank\"},\n", + " \"Class\": {\"concrete\", \"name\"},\n", + " \"customer\": {\"Sex\", \"Ssn\", \"rank\"},\n", + " },\n", + " \"edgeCollections\": {\n", + " \"accountHolder\": {},\n", + " \"Relationship\": {},\n", + " \"transaction\": {\"receiver_bank_id\", \"sender_bank_id\", \"transaction_amt\", \"transaction_date\", \"trans_time\"},\n", + " },\n", + "}\n", + "\n", + "# When converting to DGL via an ArangoDB Metagraph that contains non-numerical values, a user-defined \n", + "# Controller class is required to specify how ArangoDB attributes should be converted to DGL features.\n", + "class FraudDetection_ADBDGL_Controller(ADBDGL_Controller):\n", + " \"\"\"ArangoDB-DGL controller.\n", + "\n", + " Responsible for controlling how ArangoDB attributes\n", + " are converted into DGL features, and vice-versa.\n", + "\n", + " You can derive your own custom ADBDGL_Controller if you want to maintain\n", + " consistency between your ArangoDB attributes & your DGL features.\n", + " \"\"\"\n", + "\n", + " def _adb_attribute_to_dgl_feature(self, key: str, col: str, val):\n", + " \"\"\"\n", + " Given an ArangoDB attribute key, its assigned value (for an arbitrary document),\n", + " and the collection it belongs to, convert it to a valid\n", + " DGL feature: https://docs.dgl.ai/en/0.6.x/guide/graph-feature.html.\n", + "\n", + " NOTE: You must override this function if you want to transfer non-numerical\n", + " ArangoDB attributes to DGL (DGL only accepts 'attributes' (a.k.a features)\n", + " of numerical types). Read more about DGL features here:\n", + " https://docs.dgl.ai/en/0.6.x/new-tutorial/2_dglgraph.html#assigning-node-and-edge-features-to-graph.\n", + "\n", + " :param key: The ArangoDB attribute key name\n", + " :type key: str\n", + " :param col: The ArangoDB collection of the ArangoDB document.\n", + " :type col: str\n", + " :param val: The assigned attribute value of the ArangoDB document.\n", + " :type val: Any\n", + " :return: The attribute's representation as a DGL Feature\n", + " :rtype: Any\n", + " \"\"\"\n", + " try:\n", + " if col == \"transaction\":\n", + " if key == \"transaction_date\":\n", + " return int(str(val).replace(\"-\", \"\"))\n", + " \n", + " if key == \"trans_time\":\n", + " return int(str(val).replace(\":\", \"\"))\n", + " \n", + " if col == \"customer\":\n", + " if key == \"Sex\":\n", + " return 0 if val == \"M\" else 1\n", + "\n", + " if key == \"Ssn\":\n", + " return int(str(val).replace(\"-\", \"\"))\n", + "\n", + " if col == \"Class\":\n", + " if key == \"name\":\n", + " if val == \"Bank\":\n", + " return 0\n", + " elif val == \"Branch\":\n", + " return 1\n", + " elif val == \"Account\":\n", + " return 2\n", + " elif val == \"Customer\":\n", + " return 3\n", + " else:\n", + " return -1\n", + " except (ValueError, TypeError, SyntaxError):\n", + " return 0\n", + "\n", + " return super()._adb_attribute_to_dgl_feature(key, col, val)\n", + "\n", + "fraud_adbdgl_adapter = ADBDGL_Adapter(con, FraudDetection_ADBDGL_Controller())\n", + "\n", + "# Create DGL Graph from attributes\n", + "dgl_g = fraud_adbdgl_adapter.arangodb_to_dgl('FraudDetection', fraud_detection_metagraph)\n", + "\n", + "# You can also provide valid Python-Arango AQL query options to the command above, like such:\n", + "# dgl_g = fraud_adbdgl_adapter.arangodb_to_dgl(graph_name = 'FraudDetection', fraud_detection_metagraph, ttl=1000, stream=True)\n", + "# See more here: https://docs.python-arango.com/en/main/specs.html#arango.aql.AQL.execute\n", + "\n", + "# Show graph data\n", + "print('\\n--------------')\n", + "print(dgl_g)\n", + "print('\\n--------------')\n", + "print(dgl_g.ndata)\n", + "print('--------------\\n')\n", + "print(dgl_g.edata)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "bvzJXSHHTi3v" + }, + "source": [ + "# DGL to ArangoDB" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "UafSB_3JZNwK" + }, + "source": [ + "## Example 1: DGL Karate Graph" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "tx-tjPfx0U_h" + }, + "source": [ + "Data source\n", + "* [DGL Karate Graph](https://docs.dgl.ai/en/0.6.x/api/python/dgl.data.html#karate-club-dataset)\n", + "\n", + "Package methods used\n", + "* [`adbdgl_adapter.adapter.dgl_to_arangodb()`](https://github.com/arangoml/dgl-adapter/blob/1.0.1/adbdgl_adapter/adapter.py#L199-L297)\n", + "\n", + "Important notes\n", + "* The `name` parameter in this case is simply for naming your ArangoDB graph." + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 0 + }, + "id": "eRVbiBy4ZdE4", + "outputId": "da33e345-87d7-442f-fbb7-5484b79889a5" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ArangoDB: Karate created\n", + "\n", + "--------------------\n", + "https://tutorials.arangodb.cloud:8529\n", + "Username: TUT487i8kal98gb73c2iklds\n", + "Password: TUTn5t85w8t50kcupmo2mmyb\n", + "Database: TUTn187e39v9qho3768ilyk4\n", + "--------------------\n", + "\n", + "Inspect the graph here: https://tutorials.arangodb.cloud:8529/_db/TUTn187e39v9qho3768ilyk4/_admin/aardvark/index.html#graph/Karate\n", + "\n", + "View the original graph below:\n" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Load the dgl graph & draw\n", + "dgl_karate_graph = KarateClubDataset()[0]\n", + "nx.draw(dgl_karate_graph.to_networkx(), with_labels=True)\n", + "\n", + "# Create the ArangoDB graph\n", + "name = \"Karate\"\n", + "db.delete_graph(name, drop_collections=True, ignore_missing=True)\n", + "adb_karate_graph = adbdgl_adapter.dgl_to_arangodb(name, dgl_karate_graph)\n", + "\n", + "print('\\n--------------------')\n", + "print(\"https://{}:{}\".format(con[\"hostname\"], con[\"port\"]))\n", + "print(\"Username: \" + con[\"username\"])\n", + "print(\"Password: \" + con[\"password\"])\n", + "print(\"Database: \" + con[\"dbName\"])\n", + "print('--------------------\\n')\n", + "print(f\"Inspect the graph here: https://tutorials.arangodb.cloud:8529/_db/{con['dbName']}/_admin/aardvark/index.html#graph/{name}\\n\")\n", + "print(f\"View the original graph below:\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "gshTlSX_ZZsS" + }, + "source": [ + "\n", + "## Example 2: DGL MiniGCDataset Graphs" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "KaExiE2x0-M6" + }, + "source": [ + "Data source\n", + "* [DGL Mini Graph Classification Dataset](https://docs.dgl.ai/en/0.6.x/api/python/dgl.data.html#mini-graph-classification-dataset)\n", + "\n", + "Package methods used\n", + "* [`adbdgl_adapter.adapter.dgl_to_arangodb()`](https://github.com/arangoml/dgl-adapter/blob/1.0.1/adbdgl_adapter/adapter.py#L199-L297)\n", + "\n", + "Important notes\n", + "* The `name` parameters in this case are simply for naming your ArangoDB graph." + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 0 + }, + "id": "dADiexlAioGH", + "outputId": "f7926863-61bb-4202-ba7e-e55413d20ca5" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ArangoDB: Lollipop created\n", + "ArangoDB: Hypercube created\n", + "ArangoDB: Clique created\n", + "\n", + "--------------------\n", + "https://tutorials.arangodb.cloud:8529\n", + "Username: TUT487i8kal98gb73c2iklds\n", + "Password: TUTn5t85w8t50kcupmo2mmyb\n", + "Database: TUTn187e39v9qho3768ilyk4\n", + "--------------------\n", + "\n", + "\n", + "Inspect the graphs here:\n", + "\n", + "1) https://tutorials.arangodb.cloud:8529/_db/TUTn187e39v9qho3768ilyk4/_admin/aardvark/index.html#graph/Lollipop\n", + "2) https://tutorials.arangodb.cloud:8529/_db/TUTn187e39v9qho3768ilyk4/_admin/aardvark/index.html#graph/Hypercube\n", + "3) https://tutorials.arangodb.cloud:8529/_db/TUTn187e39v9qho3768ilyk4/_admin/aardvark/index.html#graph/Clique\n", + "\n", + "\n", + "View the original graphs below:\n" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Load the dgl graphs & draw\n", + "dgl_lollipop_graph = remove_self_loop(MiniGCDataset(8, 7, 8)[3][0])\n", + "plt.figure(1)\n", + "nx.draw(dgl_lollipop_graph.to_networkx(), with_labels=True)\n", + "\n", + "dgl_hypercube_graph = remove_self_loop(MiniGCDataset(8, 8, 9)[4][0])\n", + "plt.figure(2)\n", + "nx.draw(dgl_hypercube_graph.to_networkx(), with_labels=True)\n", + "\n", + "dgl_clique_graph = remove_self_loop(MiniGCDataset(8, 6, 7)[6][0])\n", + "plt.figure(3)\n", + "nx.draw(dgl_clique_graph.to_networkx(), with_labels=True)\n", + "\n", + "# Create the ArangoDB graphs\n", + "lollipop = \"Lollipop\"\n", + "hypercube = \"Hypercube\"\n", + "clique = \"Clique\"\n", + "\n", + "db.delete_graph(lollipop, drop_collections=True, ignore_missing=True)\n", + "db.delete_graph(hypercube, drop_collections=True, ignore_missing=True)\n", + "db.delete_graph(clique, drop_collections=True, ignore_missing=True)\n", + "\n", + "adb_lollipop_graph = adbdgl_adapter.dgl_to_arangodb(lollipop, dgl_lollipop_graph)\n", + "adb_hypercube_graph = adbdgl_adapter.dgl_to_arangodb(hypercube, dgl_hypercube_graph)\n", + "adb_clique_graph = adbdgl_adapter.dgl_to_arangodb(clique, dgl_clique_graph)\n", + "\n", + "print('\\n--------------------')\n", + "print(\"https://{}:{}\".format(con[\"hostname\"], con[\"port\"]))\n", + "print(\"Username: \" + con[\"username\"])\n", + "print(\"Password: \" + con[\"password\"])\n", + "print(\"Database: \" + con[\"dbName\"])\n", + "print('--------------------\\n')\n", + "print(\"\\nInspect the graphs here:\\n\")\n", + "print(f\"1) https://tutorials.arangodb.cloud:8529/_db/{con['dbName']}/_admin/aardvark/index.html#graph/{lollipop}\")\n", + "print(f\"2) https://tutorials.arangodb.cloud:8529/_db/{con['dbName']}/_admin/aardvark/index.html#graph/{hypercube}\")\n", + "print(f\"3) https://tutorials.arangodb.cloud:8529/_db/{con['dbName']}/_admin/aardvark/index.html#graph/{clique}\\n\")\n", + "print(f\"\\nView the original graphs below:\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "CNj1xKhwoJoL" + }, + "source": [ + "\n", + "## Example 3: DGL MiniGCDataset Graphs with a custom controller" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "CZ1UX9YX1Zzo" + }, + "source": [ + "Data source\n", + "* [DGL Mini Graph Classification Dataset](https://docs.dgl.ai/en/0.6.x/api/python/dgl.data.html#mini-graph-classification-dataset)\n", + "\n", + "Package methods used\n", + "* [`adbdgl_adapter.adapter.dgl_to_arangodb()`](https://github.com/arangoml/dgl-adapter/blob/1.0.1/adbdgl_adapter/adapter.py#L199-L297)\n", + "* [`adbdgl_adapter.controller._dgl_feature_to_adb_attribute()`](https://github.com/arangoml/dgl-adapter/blob/1.0.1/adbdgl_adapter/controller.py#L49-L70)\n", + "\n", + "Important notes\n", + "* The `name` parameters in this case are simply for naming your ArangoDB graph.\n", + "* We are creating a custom `ADBDGL_Controller` to specify *how* to convert our DGL node/edge features into ArangoDB vertex/edge attributes. View the default `ADBDGL_Controller` [here](https://github.com/arangoml/dgl-adapter/blob/1.0.1/adbdgl_adapter/controller.py#L11)." + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "jbJsvMMaoJoT", + "outputId": "7cc01a20-201b-42ea-85e7-c60d0d8d2fbb" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Connecting to https://tutorials.arangodb.cloud:8529\n", + "ArangoDB: Lollipop_With_Attributes created\n", + "ArangoDB: Hypercube_With_Attributes created\n", + "ArangoDB: Clique_With_Attributes created\n", + "\n", + "--------------------\n", + "https://tutorials.arangodb.cloud:8529\n", + "Username: TUT487i8kal98gb73c2iklds\n", + "Password: TUTn5t85w8t50kcupmo2mmyb\n", + "Database: TUTn187e39v9qho3768ilyk4\n", + "--------------------\n", + "\n", + "\n", + "Inspect the graphs here:\n", + "\n", + "1) https://tutorials.arangodb.cloud:8529/_db/TUTn187e39v9qho3768ilyk4/_admin/aardvark/index.html#graph/Lollipop_With_Attributes\n", + "2) https://tutorials.arangodb.cloud:8529/_db/TUTn187e39v9qho3768ilyk4/_admin/aardvark/index.html#graph/Hypercube_With_Attributes\n", + "3) https://tutorials.arangodb.cloud:8529/_db/TUTn187e39v9qho3768ilyk4/_admin/aardvark/index.html#graph/Clique_With_Attributes\n", + "\n" + ] + } + ], + "source": [ + "from torch.functional import Tensor\n", + "\n", + "# Load the dgl graphs\n", + "dgl_lollipop_graph = remove_self_loop(MiniGCDataset(8, 7, 8)[3][0])\n", + "dgl_hypercube_graph = remove_self_loop(MiniGCDataset(8, 8, 9)[4][0])\n", + "dgl_clique_graph = remove_self_loop(MiniGCDataset(8, 6, 7)[6][0])\n", + "\n", + " # Add DGL Node & Edge Features to each graph\n", + "dgl_lollipop_graph.ndata[\"random_ndata\"] = torch.tensor(\n", + " [[i, i, i] for i in range(0, dgl_lollipop_graph.num_nodes())]\n", + ")\n", + "dgl_lollipop_graph.edata[\"random_edata\"] = torch.rand(dgl_lollipop_graph.num_edges())\n", + "\n", + "dgl_hypercube_graph.ndata[\"random_ndata\"] = torch.rand(dgl_hypercube_graph.num_nodes())\n", + "dgl_hypercube_graph.edata[\"random_edata\"] = torch.tensor(\n", + " [[[i], [i], [i]] for i in range(0, dgl_hypercube_graph.num_edges())]\n", + ")\n", + "\n", + "dgl_clique_graph.ndata['clique_ndata'] = torch.tensor([1,2,3,4,5,6])\n", + "dgl_clique_graph.edata['clique_edata'] = torch.tensor(\n", + " [1 if i % 2 == 0 else 0 for i in range(0, dgl_clique_graph.num_edges())]\n", + ")\n", + "\n", + "\n", + "# When converting to ArangoDB from DGL, a user-defined Controller class\n", + "# is required to specify how DGL features (aka attributes) should be converted \n", + "# into ArangoDB attributes. NOTE: A custom Controller is NOT needed you want to\n", + "# keep the numerical-based values of your DGL features.\n", + "class Clique_ADBDGL_Controller(ADBDGL_Controller):\n", + " \"\"\"ArangoDB-DGL controller.\n", + "\n", + " Responsible for controlling how ArangoDB attributes\n", + " are converted into DGL features, and vice-versa.\n", + "\n", + " You can derive your own custom ADBDGL_Controller if you want to maintain\n", + " consistency between your ArangoDB attributes & your DGL features.\n", + " \"\"\"\n", + "\n", + " def _dgl_feature_to_adb_attribute(self, key: str, col: str, val: Tensor):\n", + " \"\"\"\n", + " Given a DGL feature key, its assigned value (for an arbitrary node or edge),\n", + " and the collection it belongs to, convert it to a valid ArangoDB attribute\n", + " (e.g string, list, number, ...).\n", + "\n", + " NOTE: No action is needed here if you want to keep the numerical-based values\n", + " of your DGL features.\n", + "\n", + " :param key: The DGL attribute key name\n", + " :type key: str\n", + " :param col: The ArangoDB collection of the (soon-to-be) ArangoDB document.\n", + " :type col: str\n", + " :param val: The assigned attribute value of the DGL node.\n", + " :type val: Tensor\n", + " :return: The feature's representation as an ArangoDB Attribute\n", + " :rtype: Any\n", + " \"\"\"\n", + " if key == \"clique_ndata\":\n", + " if val == 1:\n", + " return \"one is fun\"\n", + " elif val == 2:\n", + " return \"two is blue\"\n", + " elif val == 3:\n", + " return \"three is free\"\n", + " elif val == 4:\n", + " return \"four is more\"\n", + " else: # No special string for values 5 & 6\n", + " return f\"ERROR! Unrecognized value, got {val}\"\n", + "\n", + " if key == \"clique_edata\":\n", + " return bool(val)\n", + "\n", + " return super()._dgl_feature_to_adb_attribute(key, col, val)\n", + "\n", + "# Re-instantiate a new adapter specifically for the Clique Graph Conversion\n", + "clique_adbgl_adapter = ADBDGL_Adapter(con, Clique_ADBDGL_Controller())\n", + "\n", + "# Create the ArangoDB graphs\n", + "lollipop = \"Lollipop_With_Attributes\"\n", + "hypercube = \"Hypercube_With_Attributes\"\n", + "clique = \"Clique_With_Attributes\"\n", + "\n", + "db.delete_graph(lollipop, drop_collections=True, ignore_missing=True)\n", + "db.delete_graph(hypercube, drop_collections=True, ignore_missing=True)\n", + "db.delete_graph(clique, drop_collections=True, ignore_missing=True)\n", + "\n", + "adb_lollipop_graph = adbdgl_adapter.dgl_to_arangodb(lollipop, dgl_lollipop_graph)\n", + "adb_hypercube_graph = adbdgl_adapter.dgl_to_arangodb(hypercube, dgl_hypercube_graph)\n", + "adb_clique_graph = clique_adbgl_adapter.dgl_to_arangodb(clique, dgl_clique_graph) # Notice the new adapter here!\n", + "\n", + "print('\\n--------------------')\n", + "print(\"https://{}:{}\".format(con[\"hostname\"], con[\"port\"]))\n", + "print(\"Username: \" + con[\"username\"])\n", + "print(\"Password: \" + con[\"password\"])\n", + "print(\"Database: \" + con[\"dbName\"])\n", + "print('--------------------\\n')\n", + "print(\"\\nInspect the graphs here:\\n\")\n", + "print(f\"1) https://tutorials.arangodb.cloud:8529/_db/{con['dbName']}/_admin/aardvark/index.html#graph/{lollipop}\")\n", + "print(f\"2) https://tutorials.arangodb.cloud:8529/_db/{con['dbName']}/_admin/aardvark/index.html#graph/{hypercube}\")\n", + "print(f\"3) https://tutorials.arangodb.cloud:8529/_db/{con['dbName']}/_admin/aardvark/index.html#graph/{clique}\\n\")" + ] + } + ], + "metadata": { + "colab": { + "collapsed_sections": [ + "ot1oJqn7m78n", + "QfE_tKxneG9A", + "ZrEDmtqCVD0W", + "RQ4CknYfUEuz", + "qEH6OdSB23Ya", + "DqIKT1lO4ASw", + "UafSB_3JZNwK", + "gshTlSX_ZZsS", + "CNj1xKhwoJoL" + ], + "name": "ArangoDB_DGL_Adapter_v1.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.6" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +}