From 25cb51165d59f408ef6c833e18c26447e1078cec Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Hasan=20Balc=C4=B1?= <balcihasan99@gmail.com>
Date: Wed, 29 Nov 2023 14:32:15 +0000
Subject: [PATCH] Change links to compact identifiers and disable BridgeDb
 mappings

---
 scripts/generate-svgs/add_identifiers | 55 +++++++++++++++++++++++++++
 scripts/generate-svgs/gpmlconverter   | 35 +++++++++--------
 2 files changed, 74 insertions(+), 16 deletions(-)
 create mode 100755 scripts/generate-svgs/add_identifiers

diff --git a/scripts/generate-svgs/add_identifiers b/scripts/generate-svgs/add_identifiers
new file mode 100755
index 00000000..b08317bc
--- /dev/null
+++ b/scripts/generate-svgs/add_identifiers
@@ -0,0 +1,55 @@
+#!/usr/bin/env node
+
+const fs = require("fs");
+const path = require("path");
+
+const pathwayFile = process.argv[2];
+if (!pathwayFile) {
+  throw new Error("Must specify a pathway");
+}
+const { pathway, entitiesById } = JSON.parse(fs.readFileSync(pathwayFile));
+const pathwayId = pathway.id;
+
+// Find the corresponding datanodes.tsv file of the given pathway
+const parsedPath = path.parse(pathwayFile);
+pathToDatabase = path.join(__dirname, "..", "..");
+pathToDatanodes = path.join(pathToDatabase, "pathways", parsedPath.name, parsedPath.name + "-datanodes.tsv");
+
+// Read datanodes.tsv file
+const datanodesData = fs.readFileSync(pathToDatanodes, 'utf8');
+
+// Create a map between labels and compact identifiers
+let dataNodesMap = new Map();
+datanodesData.split(/\r?\n/).forEach((line, index) => {
+  if (index != 0) {
+    let lineArray = line.split('\t')
+    dataNodesMap.set(lineArray[0], lineArray[2]);
+  }
+})
+
+// DataNodes
+let dataNodes = Object.values(entitiesById).filter(
+  entityById =>
+    entityById.type !== undefined &&
+    entityById.type.indexOf("DataNode") > -1 &&
+    entityById.xrefDataSource &&
+    entityById.xrefIdentifier
+)
+
+dataNodes.forEach(function(entityById) {
+  let label = entityById.textContent;
+  let compactIdentifier = dataNodesMap.get(label);
+  entityById.type.push("CompactIdentifier:"+compactIdentifier);
+});
+
+fs.writeFile(
+  pathwayFile,
+  JSON.stringify({ pathway, entitiesById }),
+  function(err) {
+    if (err) {
+      console.error(`Error adding compact identifier:
+      ${errString}
+      ${__filename}`);
+    }
+  }
+);
\ No newline at end of file
diff --git a/scripts/generate-svgs/gpmlconverter b/scripts/generate-svgs/gpmlconverter
index 09369acd..d877045c 100755
--- a/scripts/generate-svgs/gpmlconverter
+++ b/scripts/generate-svgs/gpmlconverter
@@ -204,19 +204,21 @@ elif [[ "$ext_out" =~ ^(json|jsonld)$ ]]; then
   if [ -z "$organism" ] || [ -z "$xref_identifiers" ]; then
     echo 'No xrefs to process.'
   else
-    mv "$path_out" "$path_out.b4bridgedb.json"
-    bridgedb xrefs -f json \
-        -i '.entitiesById[].type' "$organism" '.entitiesById[].xrefDataSource' '.entitiesById[].xrefIdentifier' \
-        ChEBI P683 Ensembl P594 "Entrez Gene" P351 HGNC P353 HMDB P2057 Wikidata \
-       < "$path_out.b4bridgedb.json" > "$path_out"
+    #mv "$path_out" "$path_out.b4bridgedb.json"
+    #bridgedb xrefs -f json \
+    #    -i '.entitiesById[].type' "$organism" '.entitiesById[].xrefDataSource' '.entitiesById[].xrefIdentifier' \
+    #    ChEBI P683 Ensembl P594 "Entrez Gene" P351 HGNC P353 HMDB P2057 Wikidata \
+    #   < "$path_out.b4bridgedb.json" > "$path_out"
 
     # Add Wikidata ids
     #"$SCRIPT_DIR/add_wd_ids" "$path_out"
-    add_wd_ids "$path_out"
+    #add_wd_ids "$path_out"
 
     # Add HGNC Symbols, without the "HGNC:" namespace prefixed
     #"$SCRIPT_DIR/add_hgnc_symbols" "$path_out"
-    add_hgnc_symbols "$path_out"
+    #add_hgnc_symbols "$path_out"
+    # Add compact identifier
+    add_identifiers "$path_out"
   fi
 
 elif [[ "$ext_out" =~ ^(svg|pvjssvg)$ ]]; then
@@ -424,21 +426,22 @@ r '"$SCRIPT_DIR/plain.css"'
     cp "$path_out" "$path_out_tmp"
     el_count=$(xmlstarlet sel -N svg='http://www.w3.org/2000/svg' -t -v "count(/svg:svg/svg:g//*[@class])" "$path_out_tmp")
     for i in $(seq $el_count); do
-    	readarray -t wditems <<<$(xmlstarlet sel -N svg='http://www.w3.org/2000/svg' -t \
+    	readarray -t ciItems <<<$(xmlstarlet sel -N svg='http://www.w3.org/2000/svg' -t \
     			-v "(/svg:svg/svg:g//*[@class])[$i]/@class" "$path_out_tmp" | \
-    		awk '/Wikidata_Q[0-9]+/' | tr ' ' '\n' | awk '/Wikidata_Q[0-9]+/');
-	wditems_len="${#wditems[@]}"
-    	if [[ wditems_len -eq 1 ]]; then
-		wditem=${wditems[0]}
-		if [ ! -z $wditem ]; then
-
+    		awk '/CompactIdentifier/' | tr ' ' '\n' | awk '/CompactIdentifier/');
+	ciItems_len="${#ciItems[@]}"
+    	if [[ ciItems_len -eq 1 ]]; then
+		ciItem=${ciItems[0]}
+		if [ ! -z $ciItem ]; then
+      ciItem=$(echo "$ciItem" | sed 's/\(.*\)_/\1:/')	# remove last '_' to ':'
+      ciItem=${ciItem#*_} # remove after CompactIdentifier_
 			#wikidata_iri=$(echo "$wditem" | awk -F'_' '{print "https://www.wikidata.org/wiki/"$NF}')
-			scholia_iri=$(echo "$wditem" | awk -F'_' '{print "https://tools.wmflabs.org/scholia/"$NF}')
+			link_iri=$(echo "$ciItem" | awk '{print "https://bioregistry.io/"$NF}')
 
 			xmlstarlet ed -L -N svg='http://www.w3.org/2000/svg' \
 						-i "(/svg:svg/svg:g//*[@class])[$i]" \
 						-t attr -n "xlink:href" \
-						-v "$scholia_iri" \
+						-v "$link_iri" \
 						"$path_out_tmp";
 	    
 			xmlstarlet ed -L -N svg='http://www.w3.org/2000/svg' \