From 25cb51165d59f408ef6c833e18c26447e1078cec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hasan=20Balc=C4=B1?= Date: Wed, 29 Nov 2023 14:32:15 +0000 Subject: [PATCH] Change links to compact identifiers and disable BridgeDb mappings --- scripts/generate-svgs/add_identifiers | 55 +++++++++++++++++++++++++++ scripts/generate-svgs/gpmlconverter | 35 +++++++++-------- 2 files changed, 74 insertions(+), 16 deletions(-) create mode 100755 scripts/generate-svgs/add_identifiers diff --git a/scripts/generate-svgs/add_identifiers b/scripts/generate-svgs/add_identifiers new file mode 100755 index 00000000..b08317bc --- /dev/null +++ b/scripts/generate-svgs/add_identifiers @@ -0,0 +1,55 @@ +#!/usr/bin/env node + +const fs = require("fs"); +const path = require("path"); + +const pathwayFile = process.argv[2]; +if (!pathwayFile) { + throw new Error("Must specify a pathway"); +} +const { pathway, entitiesById } = JSON.parse(fs.readFileSync(pathwayFile)); +const pathwayId = pathway.id; + +// Find the corresponding datanodes.tsv file of the given pathway +const parsedPath = path.parse(pathwayFile); +pathToDatabase = path.join(__dirname, "..", ".."); +pathToDatanodes = path.join(pathToDatabase, "pathways", parsedPath.name, parsedPath.name + "-datanodes.tsv"); + +// Read datanodes.tsv file +const datanodesData = fs.readFileSync(pathToDatanodes, 'utf8'); + +// Create a map between labels and compact identifiers +let dataNodesMap = new Map(); +datanodesData.split(/\r?\n/).forEach((line, index) => { + if (index != 0) { + let lineArray = line.split('\t') + dataNodesMap.set(lineArray[0], lineArray[2]); + } +}) + +// DataNodes +let dataNodes = Object.values(entitiesById).filter( + entityById => + entityById.type !== undefined && + entityById.type.indexOf("DataNode") > -1 && + entityById.xrefDataSource && + entityById.xrefIdentifier +) + +dataNodes.forEach(function(entityById) { + let label = entityById.textContent; + let compactIdentifier = dataNodesMap.get(label); + entityById.type.push("CompactIdentifier:"+compactIdentifier); +}); + +fs.writeFile( + pathwayFile, + JSON.stringify({ pathway, entitiesById }), + function(err) { + if (err) { + console.error(`Error adding compact identifier: + ${errString} + ${__filename}`); + } + } +); \ No newline at end of file diff --git a/scripts/generate-svgs/gpmlconverter b/scripts/generate-svgs/gpmlconverter index 09369acd..d877045c 100755 --- a/scripts/generate-svgs/gpmlconverter +++ b/scripts/generate-svgs/gpmlconverter @@ -204,19 +204,21 @@ elif [[ "$ext_out" =~ ^(json|jsonld)$ ]]; then if [ -z "$organism" ] || [ -z "$xref_identifiers" ]; then echo 'No xrefs to process.' else - mv "$path_out" "$path_out.b4bridgedb.json" - bridgedb xrefs -f json \ - -i '.entitiesById[].type' "$organism" '.entitiesById[].xrefDataSource' '.entitiesById[].xrefIdentifier' \ - ChEBI P683 Ensembl P594 "Entrez Gene" P351 HGNC P353 HMDB P2057 Wikidata \ - < "$path_out.b4bridgedb.json" > "$path_out" + #mv "$path_out" "$path_out.b4bridgedb.json" + #bridgedb xrefs -f json \ + # -i '.entitiesById[].type' "$organism" '.entitiesById[].xrefDataSource' '.entitiesById[].xrefIdentifier' \ + # ChEBI P683 Ensembl P594 "Entrez Gene" P351 HGNC P353 HMDB P2057 Wikidata \ + # < "$path_out.b4bridgedb.json" > "$path_out" # Add Wikidata ids #"$SCRIPT_DIR/add_wd_ids" "$path_out" - add_wd_ids "$path_out" + #add_wd_ids "$path_out" # Add HGNC Symbols, without the "HGNC:" namespace prefixed #"$SCRIPT_DIR/add_hgnc_symbols" "$path_out" - add_hgnc_symbols "$path_out" + #add_hgnc_symbols "$path_out" + # Add compact identifier + add_identifiers "$path_out" fi elif [[ "$ext_out" =~ ^(svg|pvjssvg)$ ]]; then @@ -424,21 +426,22 @@ r '"$SCRIPT_DIR/plain.css"' cp "$path_out" "$path_out_tmp" el_count=$(xmlstarlet sel -N svg='http://www.w3.org/2000/svg' -t -v "count(/svg:svg/svg:g//*[@class])" "$path_out_tmp") for i in $(seq $el_count); do - readarray -t wditems <<<$(xmlstarlet sel -N svg='http://www.w3.org/2000/svg' -t \ + readarray -t ciItems <<<$(xmlstarlet sel -N svg='http://www.w3.org/2000/svg' -t \ -v "(/svg:svg/svg:g//*[@class])[$i]/@class" "$path_out_tmp" | \ - awk '/Wikidata_Q[0-9]+/' | tr ' ' '\n' | awk '/Wikidata_Q[0-9]+/'); - wditems_len="${#wditems[@]}" - if [[ wditems_len -eq 1 ]]; then - wditem=${wditems[0]} - if [ ! -z $wditem ]; then - + awk '/CompactIdentifier/' | tr ' ' '\n' | awk '/CompactIdentifier/'); + ciItems_len="${#ciItems[@]}" + if [[ ciItems_len -eq 1 ]]; then + ciItem=${ciItems[0]} + if [ ! -z $ciItem ]; then + ciItem=$(echo "$ciItem" | sed 's/\(.*\)_/\1:/') # remove last '_' to ':' + ciItem=${ciItem#*_} # remove after CompactIdentifier_ #wikidata_iri=$(echo "$wditem" | awk -F'_' '{print "https://www.wikidata.org/wiki/"$NF}') - scholia_iri=$(echo "$wditem" | awk -F'_' '{print "https://tools.wmflabs.org/scholia/"$NF}') + link_iri=$(echo "$ciItem" | awk '{print "https://bioregistry.io/"$NF}') xmlstarlet ed -L -N svg='http://www.w3.org/2000/svg' \ -i "(/svg:svg/svg:g//*[@class])[$i]" \ -t attr -n "xlink:href" \ - -v "$scholia_iri" \ + -v "$link_iri" \ "$path_out_tmp"; xmlstarlet ed -L -N svg='http://www.w3.org/2000/svg' \