+*/
diff --git a/taxonium_component/src/utils/extract.js b/taxonium_component/src/utils/extract.js
index 528f88dd..a235701d 100644
--- a/taxonium_component/src/utils/extract.js
+++ b/taxonium_component/src/utils/extract.js
@@ -1,119 +1,141 @@
-import processJsonLines from './nodeMapper.js';
-
-/*
-getParsimonySamples function outline:
-inputs:
- sampleID, which is the explicit node or sample name, not internal ID
- maxParsimony, which is the SNP distance threshold of interest
- Nested functions:
- processJsonLines: main worker of the backend, which reads in the jsonl file and constructs a map of all internal nodes, children, and mutations, and checks if the sample exists in the tree
- findNodesWithinDistance: a helper function to find all nodes within a certain distance of a given node
- traverses up and down the tree, adding nodes to a results array if they are within the distance threshold
- returns the results array
- traverseUp: helper function to traverse up the tree, adding nodes to the results array if they are within the distance threshold
- traverseDown: helper function to traverse down the tree, adding nodes to the results array if they are within the distance threshold
-outputs: should output a simple list/array-like of internal IDs and their SNP distances from the queried node, some flags that determine whether or not a valid search was performed, as well as a map of each nodes name with genbank accession and pangolin lineage
-with this list as a result, we can then query the backend for more information about each node, like name, mutations, etc, if needed
- this above includes if specificMut is passed through, which would allow for filtering based on whether or not a node has a specific mutation,
- but the filtering is done after all SNPs within distance are found, to reduce processing time if flag isnt specified
-once the list is obtained, snpComponent formats the list for output into Taxonium(Big step)
-*/
-async function getParsimonySamples(sampleID, maxParsimony) {
- return processJsonLines("https://cov2tree.nyc3.cdn.digitaloceanspaces.com/latest_public.jsonl.gz",sampleID).then(myResult => {//answersArray=[nodes, foundSample, foundSampleID, foundParentID, foundSNPCount, isBranch]
- if (myResult==="Error parsing JSON"){//if error parsing JSON, return error
- return "Error parsing JSON";
- }
- var nodeMap=myResult[0]//index of all internal nodes and children
- // Main function to find all nodes within a certain distance of a given node
- function findNodesWithinDistance(node, distanceThreshold) {
- // Helper function to traverse up (towards the parent)
- function traverseUp(node, currentDistance) {
- var parent_id=nodeMap[node].parent_id
- var snpCount=nodeMap[node].snpCount
- if (parent_id===node || currentDistance > distanceThreshold) {//if root node(root has itself as parent), or if threshold is reached,
- //console.log("reached root node or threshold, returning at distance "+currentDistance+" from node "+node+" with parent "+parent_id+" and snpCount "+snpCount)
- return;//end traversal
- }
- //console.log("traversing up, new node is "+parent_id+" with distance "+(currentDistance + snpCount))
- if (!visited.has(parent_id)) {// Check if this node has already been visited to avoid infinite loops
- visited.add(parent_id);
- traverseDown(parent_id, currentDistance + snpCount);//Traverse down from the parent
- traverseUp(parent_id, currentDistance + snpCount);// Traverse further up
- }
- }
- // Helper function to traverse down (towards the children)
- function traverseDown(node, currentDistance) {
- if (!nodeMap[node]|| currentDistance > distanceThreshold) {return;}//if node is a leaf node, or it threshold is reached, return
- for (const child of nodeMap[node].children) {// Traverse all children
- let decodedChild=child.split("=")//split encoded child into internal ID and SNP distance
- let childId=decodedChild[0]//get internal ID of child
- let childSnpDist=parseInt(decodedChild[1])//get SNP distance of child
- let newTotal=currentDistance+childSnpDist//add SNP distance of child to current distance
- //console.log("traversing down, new node is "+childId+" with distance "+childSnpDist+" for new total "+newTotal)
- if (!visited.has(childId)&&!visited.has(decodedChild[3])) {//need a switch to add childs as genbank accession or node ID, since some sample names are repeated
- if (childId.match(/^\d+$/)){//if its just numbers, its an internal node, so we add it to visited as is
- visited.add(childId);
- }
- else {visited.add(decodedChild[3]);}//if its not just numbers, its a leaf node, so we add the genbank accession to visited
- if ((newTotal <= distanceThreshold)){ //dont add the root node, as its always going to be within SNP distance of itself
- //console.log("adding node to results:"+childId+" with distance "+newTotal)
- if (!nodeMap[childId]){//if its not an entry in node map, means its not an internal node, so we add it to the results
- //console.log("adding node to results:"+decodedChild)
- results.push([decodedChild[0], newTotal, decodedChild[2], decodedChild[3]]);
- }
- }
- if (nodeMap[childId]){//if the child is an internal node, traverse down
- traverseDown(childId,newTotal);// Traverse further down; pass ID, not node info itself
- }
- }
- }
- }
-
- // Start of the main function
- //boolean obtained during traversal of the whole tree; if the queried sample exists in taxonium, this will flag as true
- if (!myResult[1]) {//if boolean is falsey
- console.log("Node not found in the tree");//its not a valid node, return error statement
- return "Node not found in the tree";
- }
-
- let visited = new Set(); // To keep track of visited nodes
- let results = []; // To store nodes within the distance threshold
- visited.add(myResult[2]); //add ID of queried sample to visited
- if (myResult[5]){//if the node is an internal node
- traverseDown(myResult[2], 0);//start traversal from the internal node, we have a neutral distance of 0
- traverseUp(myResult[2], 0);
- }
- else{
- traverseDown(myResult[3], myResult[4]);
- traverseUp(myResult[3], myResult[4]);
-
- }
- //internal ID of the queried sample
- // Traverse as far down as possible first, then go up, and traverse down again ignoring visited nodes
- return results;
- }
-
- let goodSamples = findNodesWithinDistance(sampleID, maxParsimony)
- nodeMap=null;
- return goodSamples
- })
- .catch(error => {
- // Catch any errors from processJsonLines or thrown in the then block
- console.error('Error in getParsimonySamples:', error);
- return "Error processing samples";
- });
-}
-/*
-getParsimonySamples("node_960478", 5)
- .then(result => {
- console.log("Results:", result);
- })
- .catch(error => {
- console.error("Error processing samples:", error);
- });
-*/
-export default getParsimonySamples;
-
-/*
-NOTES:
-*/
+import processJsonLines from "./nodeMapper.js";
+
+/*
+getParsimonySamples function outline:
+inputs:
+ sampleID, which is the explicit node or sample name, not internal ID
+ maxParsimony, which is the SNP distance threshold of interest
+ Nested functions:
+ processJsonLines: main worker of the backend, which reads in the jsonl file and constructs a map of all internal nodes, children, and mutations, and checks if the sample exists in the tree
+ findNodesWithinDistance: a helper function to find all nodes within a certain distance of a given node
+ traverses up and down the tree, adding nodes to a results array if they are within the distance threshold
+ returns the results array
+ traverseUp: helper function to traverse up the tree, adding nodes to the results array if they are within the distance threshold
+ traverseDown: helper function to traverse down the tree, adding nodes to the results array if they are within the distance threshold
+outputs: should output a simple list/array-like of internal IDs and their SNP distances from the queried node, some flags that determine whether or not a valid search was performed, as well as a map of each nodes name with genbank accession and pangolin lineage
+with this list as a result, we can then query the backend for more information about each node, like name, mutations, etc, if needed
+ this above includes if specificMut is passed through, which would allow for filtering based on whether or not a node has a specific mutation,
+ but the filtering is done after all SNPs within distance are found, to reduce processing time if flag isnt specified
+once the list is obtained, snpComponent formats the list for output into Taxonium(Big step)
+*/
+async function getParsimonySamples(sampleID, maxParsimony) {
+ return processJsonLines(
+ "https://cov2tree.nyc3.cdn.digitaloceanspaces.com/latest_public.jsonl.gz",
+ sampleID
+ )
+ .then((myResult) => {
+ //answersArray=[nodes, foundSample, foundSampleID, foundParentID, foundSNPCount, isBranch]
+ if (myResult === "Error parsing JSON") {
+ //if error parsing JSON, return error
+ return "Error parsing JSON";
+ }
+ var nodeMap = myResult[0]; //index of all internal nodes and children
+ // Main function to find all nodes within a certain distance of a given node
+ function findNodesWithinDistance(node, distanceThreshold) {
+ // Helper function to traverse up (towards the parent)
+ function traverseUp(node, currentDistance) {
+ var parent_id = nodeMap[node].parent_id;
+ var snpCount = nodeMap[node].snpCount;
+ if (parent_id === node || currentDistance > distanceThreshold) {
+ //if root node(root has itself as parent), or if threshold is reached,
+ //console.log("reached root node or threshold, returning at distance "+currentDistance+" from node "+node+" with parent "+parent_id+" and snpCount "+snpCount)
+ return; //end traversal
+ }
+ //console.log("traversing up, new node is "+parent_id+" with distance "+(currentDistance + snpCount))
+ if (!visited.has(parent_id)) {
+ // Check if this node has already been visited to avoid infinite loops
+ visited.add(parent_id);
+ traverseDown(parent_id, currentDistance + snpCount); //Traverse down from the parent
+ traverseUp(parent_id, currentDistance + snpCount); // Traverse further up
+ }
+ }
+ // Helper function to traverse down (towards the children)
+ function traverseDown(node, currentDistance) {
+ if (!nodeMap[node] || currentDistance > distanceThreshold) {
+ return;
+ } //if node is a leaf node, or it threshold is reached, return
+ for (const child of nodeMap[node].children) {
+ // Traverse all children
+ let decodedChild = child.split("="); //split encoded child into internal ID and SNP distance
+ let childId = decodedChild[0]; //get internal ID of child
+ let childSnpDist = parseInt(decodedChild[1]); //get SNP distance of child
+ let newTotal = currentDistance + childSnpDist; //add SNP distance of child to current distance
+ //console.log("traversing down, new node is "+childId+" with distance "+childSnpDist+" for new total "+newTotal)
+ if (!visited.has(childId) && !visited.has(decodedChild[3])) {
+ //need a switch to add childs as genbank accession or node ID, since some sample names are repeated
+ if (childId.match(/^\d+$/)) {
+ //if its just numbers, its an internal node, so we add it to visited as is
+ visited.add(childId);
+ } else {
+ visited.add(decodedChild[3]);
+ } //if its not just numbers, its a leaf node, so we add the genbank accession to visited
+ if (newTotal <= distanceThreshold) {
+ //dont add the root node, as its always going to be within SNP distance of itself
+ //console.log("adding node to results:"+childId+" with distance "+newTotal)
+ if (!nodeMap[childId]) {
+ //if its not an entry in node map, means its not an internal node, so we add it to the results
+ //console.log("adding node to results:"+decodedChild)
+ results.push([
+ decodedChild[0],
+ newTotal,
+ decodedChild[2],
+ decodedChild[3],
+ ]);
+ }
+ }
+ if (nodeMap[childId]) {
+ //if the child is an internal node, traverse down
+ traverseDown(childId, newTotal); // Traverse further down; pass ID, not node info itself
+ }
+ }
+ }
+ }
+
+ // Start of the main function
+ //boolean obtained during traversal of the whole tree; if the queried sample exists in taxonium, this will flag as true
+ if (!myResult[1]) {
+ //if boolean is falsey
+ console.log("Node not found in the tree"); //its not a valid node, return error statement
+ return "Node not found in the tree";
+ }
+
+ let visited = new Set(); // To keep track of visited nodes
+ let results = []; // To store nodes within the distance threshold
+ visited.add(myResult[2]); //add ID of queried sample to visited
+ if (myResult[5]) {
+ //if the node is an internal node
+ traverseDown(myResult[2], 0); //start traversal from the internal node, we have a neutral distance of 0
+ traverseUp(myResult[2], 0);
+ } else {
+ traverseDown(myResult[3], myResult[4]);
+ traverseUp(myResult[3], myResult[4]);
+ }
+ //internal ID of the queried sample
+ // Traverse as far down as possible first, then go up, and traverse down again ignoring visited nodes
+ return results;
+ }
+
+ let goodSamples = findNodesWithinDistance(sampleID, maxParsimony);
+ nodeMap = null;
+ return goodSamples;
+ })
+ .catch((error) => {
+ // Catch any errors from processJsonLines or thrown in the then block
+ console.error("Error in getParsimonySamples:", error);
+ return "Error processing samples";
+ });
+}
+/*
+getParsimonySamples("node_960478", 5)
+ .then(result => {
+ console.log("Results:", result);
+ })
+ .catch(error => {
+ console.error("Error processing samples:", error);
+ });
+*/
+export default getParsimonySamples;
+
+/*
+NOTES:
+*/
diff --git a/taxonium_component/src/utils/nodeMapper.js b/taxonium_component/src/utils/nodeMapper.js
index 14ac8c16..3eb660af 100644
--- a/taxonium_component/src/utils/nodeMapper.js
+++ b/taxonium_component/src/utils/nodeMapper.js
@@ -1,162 +1,194 @@
-/*
-TODO:
-*/
-
-async function processJsonLines(url,sampleID) {
- // Fetch the gzipped JSONL file
- //const startTime = new Date(); // Start timing
- const response = await fetch(url);
-
- // Ensure the fetch was successful
- if (!response.ok) {
- throw new Error(`HTTP error! status: ${response.status}`);
- }
-
- // Stream the response through decompression and decoding
- const decompressedStream = response.body.pipeThrough(new DecompressionStream('gzip'));
- const textStream = decompressedStream.pipeThrough(new TextDecoderStream());
-
- // Reader to read the stream line by line
- const reader = textStream.getReader();
- let remainder = '';
- let result;
- let nodes = {};
- let foundSample=false;//we will be looking for a specific ID when we construct
- let foundSampleID=""
- let foundParentID=""
- let foundSNPCount=0
- let isBranch=false
- while (!(result = await reader.read()).done) {
- const chunk = remainder + result.value;
- const lines = chunk.split('\n');
- remainder = lines.pop(); // Save the last line in case it's incomplete
- for (const line of lines) {
- if (line) {
- var snpCount=0;
- try {
- const json = JSON.parse(line);
- if (json.config){//if line has the config file, skip it to avoid an error
- continue;//this first line also has mutations dictionary for decoding, if we need that later
- }
- for (const mut of json.mutations){
- if (mut>107435){
- snpCount+=1;
- }
- }
- if (json.name===sampleID){//check if this is the sample we will be searching for
- foundSample=true;//if it is, we have found it
- foundSampleID=json.node_id//store its ID so we can use it later
- foundParentID=json.parent_id//need to get parent ID of first node as a jumping off point for internal nodes, since theyre not being stored
- foundSNPCount=snpCount
- if (json.name.includes("node_")) {
- isBranch=true
- }
- //console.log(json)
- }
-
- if (json.name.includes("node_")) { // Check if the node is internal
- var encodedChild=(String(json.node_id)+"="+String(snpCount))//encode child and snp count without further nesting, as trying to store them as separate objects causes Stringify error due to excessive nesting
- if (!nodes[json.node_id]) {//if internal, but not added to list
- nodes[json.node_id] = {//create new node
- parent_id: json.parent_id,
- snpCount: snpCount,
- children: []
- };
- if (!nodes[nodes[json.node_id].parent_id]){//if the parent is not yet added to the list,
- nodes[nodes[json.node_id].parent_id] = {// add it to the list, with null name and parent, since we wont have that info until we read in parent node
- parent_id: null,
- snpCount: null,
- children: [encodedChild]//store the node ID and the number of mutations
- };
- }
- else{
- nodes[nodes[json.node_id].parent_id].children.push(encodedChild);// if the parent node has been added, add this node to its children
- }
- }
- if(nodes[json.node_id] && (nodes[json.node_id].parent_id===null || nodes[json.node_id].name===null)){//if we have added this parent node previously, but finally come across in JSON
- //console.log("Node ID being updated:"+json.name)
- nodes[json.node_id].parent_id=json.parent_id;//fill in the parent ID
- nodes[json.node_id].snpCount=snpCount;//fill in the snp count
- if (!nodes[nodes[json.node_id].parent_id]){//if this node, which was added by a previous step and therefore does not flag new internal step above, has a parent that has not been added to the list
- nodes[nodes[json.node_id].parent_id] = {// so add it
- parent_id: null,
- snpCount: null,
- children: [encodedChild]//store the node ID and the number of mutations
- };
- }
- else{
- nodes[nodes[json.node_id].parent_id].children.push(encodedChild);// if the parent node has been added, add this node to its children
- }
- }
- }
- else {// if doesnt contain "node_", then its a leaf node
- encodedChild=(String(json.name)+"="+String(snpCount)+"="+String(json.meta_pangolin_lineage)+"="+String(json.meta_genbank_accession))//encode child and snp count without further nesting, as trying to store them as separate objects causes Stringify error due to excessive nesting
- if (!nodes[json.parent_id]) {//we dont track leaf nodes, so if parent node is not in list, add it
- nodes[json.parent_id] = {//add line which fills in these null values when we read in the parent node
- parent_id: null,
- snpCount: null,
- children: [encodedChild]
- };
- } else {
-
- nodes[json.parent_id].children.push(encodedChild);//if parent node is in list, add this node to its children
- }
- }
- } catch (e) {
- console.error('Error parsing JSON:', e);
- return "Error parsing JSON"
- }
- }
- }
- }
-
-
- var answersArray=[nodes, foundSample, foundSampleID, foundParentID, foundSNPCount, isBranch]
- return answersArray;
- }
-/*
-processJsonLines('https://cov2tree.nyc3.cdn.digitaloceanspaces.com/latest_public.jsonl.gz', "node_3").then(result => {
- let sliced = Object.fromEntries(Object.entries(result[0][0]).slice(0,3))//get first 3 entries
- console.log("First 3 entries: ",sliced)
- //saveObjectToJson(result[0], 'C:/Users/david/my-app/src/InternalNodeMap.json');
-})
-.catch(error => {
- console.error("Error processing samples:", error);
-});
-function saveObjectToJson(dataObject, outputPath) {
- const fs = require('fs');
- const JSONStream = require('JSONStream');
- return new Promise((resolve, reject) => {
- const writeStream = fs.createWriteStream(outputPath);
- const stringifyStream = JSONStream.stringifyObject();
- stringifyStream.pipe(writeStream);
-
- writeStream.on('finish', () => {
- console.log('JSON file has been written successfully.');
- resolve();
- });
-
- writeStream.on('error', (error) => {
- console.error('Stream write error:', error);
- reject(error);
- });
-
- stringifyStream.on('error', (error) => {
- console.error('JSON stringify error:', error);
- reject(error);
- });
-
- for (const key in dataObject) {
- stringifyStream.write([key, dataObject[key]]);
- }
- stringifyStream.end();
- });
-}
-*/
-export default processJsonLines;
-
-// Usage example
-//at ~2gb of ram, 4.2ghz with 6 cores, a little under 60sec when reading from url
-//time to write to file is more extensive, but ideally not a factor if its happening in the backend
-//time to query backend for single node: ~0.6s
-//time to add snp dist when reading is negligible
\ No newline at end of file
+/*
+TODO:
+*/
+
+async function processJsonLines(url, sampleID) {
+ // Fetch the gzipped JSONL file
+ //const startTime = new Date(); // Start timing
+ const response = await fetch(url);
+
+ // Ensure the fetch was successful
+ if (!response.ok) {
+ throw new Error(`HTTP error! status: ${response.status}`);
+ }
+
+ // Stream the response through decompression and decoding
+ const decompressedStream = response.body.pipeThrough(
+ new DecompressionStream("gzip")
+ );
+ const textStream = decompressedStream.pipeThrough(new TextDecoderStream());
+
+ // Reader to read the stream line by line
+ const reader = textStream.getReader();
+ let remainder = "";
+ let result;
+ let nodes = {};
+ let foundSample = false; //we will be looking for a specific ID when we construct
+ let foundSampleID = "";
+ let foundParentID = "";
+ let foundSNPCount = 0;
+ let isBranch = false;
+ while (!(result = await reader.read()).done) {
+ const chunk = remainder + result.value;
+ const lines = chunk.split("\n");
+ remainder = lines.pop(); // Save the last line in case it's incomplete
+ for (const line of lines) {
+ if (line) {
+ var snpCount = 0;
+ try {
+ const json = JSON.parse(line);
+ if (json.config) {
+ //if line has the config file, skip it to avoid an error
+ continue; //this first line also has mutations dictionary for decoding, if we need that later
+ }
+ for (const mut of json.mutations) {
+ if (mut > 107435) {
+ snpCount += 1;
+ }
+ }
+ if (json.name === sampleID) {
+ //check if this is the sample we will be searching for
+ foundSample = true; //if it is, we have found it
+ foundSampleID = json.node_id; //store its ID so we can use it later
+ foundParentID = json.parent_id; //need to get parent ID of first node as a jumping off point for internal nodes, since theyre not being stored
+ foundSNPCount = snpCount;
+ if (json.name.includes("node_")) {
+ isBranch = true;
+ }
+ //console.log(json)
+ }
+
+ if (json.name.includes("node_")) {
+ // Check if the node is internal
+ var encodedChild = String(json.node_id) + "=" + String(snpCount); //encode child and snp count without further nesting, as trying to store them as separate objects causes Stringify error due to excessive nesting
+ if (!nodes[json.node_id]) {
+ //if internal, but not added to list
+ nodes[json.node_id] = {
+ //create new node
+ parent_id: json.parent_id,
+ snpCount: snpCount,
+ children: [],
+ };
+ if (!nodes[nodes[json.node_id].parent_id]) {
+ //if the parent is not yet added to the list,
+ nodes[nodes[json.node_id].parent_id] = {
+ // add it to the list, with null name and parent, since we wont have that info until we read in parent node
+ parent_id: null,
+ snpCount: null,
+ children: [encodedChild], //store the node ID and the number of mutations
+ };
+ } else {
+ nodes[nodes[json.node_id].parent_id].children.push(
+ encodedChild
+ ); // if the parent node has been added, add this node to its children
+ }
+ }
+ if (
+ nodes[json.node_id] &&
+ (nodes[json.node_id].parent_id === null ||
+ nodes[json.node_id].name === null)
+ ) {
+ //if we have added this parent node previously, but finally come across in JSON
+ //console.log("Node ID being updated:"+json.name)
+ nodes[json.node_id].parent_id = json.parent_id; //fill in the parent ID
+ nodes[json.node_id].snpCount = snpCount; //fill in the snp count
+ if (!nodes[nodes[json.node_id].parent_id]) {
+ //if this node, which was added by a previous step and therefore does not flag new internal step above, has a parent that has not been added to the list
+ nodes[nodes[json.node_id].parent_id] = {
+ // so add it
+ parent_id: null,
+ snpCount: null,
+ children: [encodedChild], //store the node ID and the number of mutations
+ };
+ } else {
+ nodes[nodes[json.node_id].parent_id].children.push(
+ encodedChild
+ ); // if the parent node has been added, add this node to its children
+ }
+ }
+ } else {
+ // if doesnt contain "node_", then its a leaf node
+ encodedChild =
+ String(json.name) +
+ "=" +
+ String(snpCount) +
+ "=" +
+ String(json.meta_pangolin_lineage) +
+ "=" +
+ String(json.meta_genbank_accession); //encode child and snp count without further nesting, as trying to store them as separate objects causes Stringify error due to excessive nesting
+ if (!nodes[json.parent_id]) {
+ //we dont track leaf nodes, so if parent node is not in list, add it
+ nodes[json.parent_id] = {
+ //add line which fills in these null values when we read in the parent node
+ parent_id: null,
+ snpCount: null,
+ children: [encodedChild],
+ };
+ } else {
+ nodes[json.parent_id].children.push(encodedChild); //if parent node is in list, add this node to its children
+ }
+ }
+ } catch (e) {
+ console.error("Error parsing JSON:", e);
+ return "Error parsing JSON";
+ }
+ }
+ }
+ }
+
+ var answersArray = [
+ nodes,
+ foundSample,
+ foundSampleID,
+ foundParentID,
+ foundSNPCount,
+ isBranch,
+ ];
+ return answersArray;
+}
+/*
+processJsonLines('https://cov2tree.nyc3.cdn.digitaloceanspaces.com/latest_public.jsonl.gz', "node_3").then(result => {
+ let sliced = Object.fromEntries(Object.entries(result[0][0]).slice(0,3))//get first 3 entries
+ console.log("First 3 entries: ",sliced)
+ //saveObjectToJson(result[0], 'C:/Users/david/my-app/src/InternalNodeMap.json');
+})
+.catch(error => {
+ console.error("Error processing samples:", error);
+});
+function saveObjectToJson(dataObject, outputPath) {
+ const fs = require('fs');
+ const JSONStream = require('JSONStream');
+ return new Promise((resolve, reject) => {
+ const writeStream = fs.createWriteStream(outputPath);
+ const stringifyStream = JSONStream.stringifyObject();
+ stringifyStream.pipe(writeStream);
+
+ writeStream.on('finish', () => {
+ console.log('JSON file has been written successfully.');
+ resolve();
+ });
+
+ writeStream.on('error', (error) => {
+ console.error('Stream write error:', error);
+ reject(error);
+ });
+
+ stringifyStream.on('error', (error) => {
+ console.error('JSON stringify error:', error);
+ reject(error);
+ });
+
+ for (const key in dataObject) {
+ stringifyStream.write([key, dataObject[key]]);
+ }
+ stringifyStream.end();
+ });
+}
+*/
+export default processJsonLines;
+
+// Usage example
+//at ~2gb of ram, 4.2ghz with 6 cores, a little under 60sec when reading from url
+//time to write to file is more extensive, but ideally not a factor if its happening in the backend
+//time to query backend for single node: ~0.6s
+//time to add snp dist when reading is negligible