comments

18F · Nov 21, 2024 · 22f78d8 · 22f78d8
1 parent 60633be
commit 22f78d8
Show file tree

Hide file tree

Showing 2 changed files with 67 additions and 29 deletions.
diff --git a/src/scripts/bio-art.js b/src/scripts/bio-art.js
@@ -6,6 +6,9 @@ const {
 } = require("../utils");
 const sample = require("../utils/sample");
 
+// The set of ontologies we want. Some of them could be questionable, like
+// human anatomy, so leave those out. Maybe after we review them all more
+// thoroughly, we can decide whether to add more!
 const permitted = new Set([
   "fungi",
   "parasites",
@@ -18,6 +21,7 @@ const permitted = new Set([
 ]);
 
 const get = (url) =>
+  // The BioArt API requires a browser user-agent, so put that in here.
   fetch(url, {
     headers: {
       "User-Agent":
@@ -28,35 +32,56 @@ const get = (url) =>
 const getJSON = (url) => get(url).then((r) => r.json());
 
 const getPermittedOntologyIDs = () =>
+  // The list of ontologu IDs is unlikely to change very often, so cache it
+  // for an hour.
   cache("bio-art ontology id", 60, async () => {
     const allOntologies = await getJSON(
       "https://bioart.niaid.nih.gov/api/ontologies?type=Bioart%20Category",
     );
 
+    // Filter down to just the keys that we've allowed, and then map down to
+    // just the ontology IDs. That's all we need going forward.
     return allOntologies
       .filter(({ ontologyKey }) => permitted.has(ontologyKey.toLowerCase()))
       .map(({ ontologyId }) => ontologyId);
   });
 
 const getEntities = async (ontologyIds) =>
-  cache(`bio-art entities [${ontologyIds.join(",")}]`, 300, async () => {
+  // The list of entities might change more often than the list of ontology IDs,
+  // so we can cache it for a little shorter.
+  cache(`bio-art entities [${ontologyIds.join(",")}]`, 30, async () => {
     const url = new URL("https://bioart.niaid.nih.gov");
 
+    // The search string is part of the URL path, which is unusual. Anyway, it's
+    // these fields and values.
     const search = [
       "type:bioart",
       `license:"Public Domain"`,
       `ontologyid:((${ontologyIds.join(" OR ")}))`,
     ];
 
+    // Now put the whole path together.
     url.pathname = `api/search/${search.join(" AND ")}`;
+
+    // And add a query parameter for the number of entities to fetch. There may
+    // be more entities, but we'll deal with that later.
     url.searchParams.set("size", 100);
 
+    // found is the total number of entities that are responsive to our search,
+    // and hit (initialList) is the first batch of those matches.
     const {
       hits: { found, hit: initialList },
-    } = await getJSON(url.href);
+    } =
+      // Use the URL.href method so it properly escapes the path and search
+      // parameters. This way we don't have to think about it. :)
+      await getJSON(url.href);
 
     const entities = [...initialList];
 
+    // If the number of entities we've received is less than the total number of
+    // entities that match our search, run the search again but add the "start"
+    // query paramemter so we get the next batch. Repeat until we have all of
+    // the responsive entities.
     while (entities.length < found) {
       url.searchParams.set("start", entities.length);
       const {
@@ -65,15 +90,25 @@ const getEntities = async (ontologyIds) =>
       entities.push(...nextList);
     }
 
-    return entities;
+    // And finally, we only want the field data, so map down to just that.
+    return entities.map(({ fields }) => fields);
   });
 
 const getRandomEntity = async () => {
   const ontologyIds = await getPermittedOntologyIDs();
   const entities = await getEntities(ontologyIds);
 
-  const { fields: entity } = sample(entities);
-
+  const entity = sample(entities);
+
+  // An entity can have multiple variations, each with multiple files. We'll
+  // just grab the first variant. For a given varient, the list of files is a
+  // string of the form:
+  //
+  //    FORMAT:id|FORMAT:id,id,id|FORMAT:id
+  //
+  // Where FORMAT is an image format such as PNG and the ids are a list of file
+  // IDs used to actually fetch the file. So we'll grab the list of PNG file IDs
+  // and then take the last one, for simplicity's sake.
   const file = entity.filesinfo[0]
     .split("|")
     .find((s) => s.startsWith("PNG:"))
@@ -82,10 +117,12 @@ const getRandomEntity = async () => {
     .split(",")
     .pop();
 
+  // Once we have the file ID, we can build up a URL to fetch it.
   const fileUrl = new URL(
     `https://bioart.niaid.nih.gov/api/bioarts/${entity.id[0]}/zip?file-ids=${file}`,
   ).href;
 
+  // All we want from the entity is its title, creator, and download URL.
   return {
     title: entity.title.pop(),
     creator: entity.creator.pop(),
@@ -105,11 +142,14 @@ module.exports = (app) => {
     const { channel, thread_ts: thread } = msg.message;
 
     try {
+      // Get an entity
       const entity = await getRandomEntity();
+      // Get its image file as a buffer
       const file = await get(entity.fileUrl)
         .then((r) => r.arrayBuffer())
         .then((a) => Buffer.from(a));
 
+      // Post that sucker to Slack.
       postFile({
         channel_id: channel,
         thread_ts: thread,

diff --git a/src/scripts/bio-art.test.js b/src/scripts/bio-art.test.js
@@ -80,28 +80,22 @@ describe("bio-art", () => {
           case "bio-art entities [one,two,three]":
             return [
               {
-                fields: {
-                  id: [1],
-                  title: ["An art"],
-                  creator: ["Zeus"],
-                  filesinfo: ["svg:bob|bmp:george|PNG:image1a,image1b,image1c"],
-                },
+                id: [1],
+                title: ["An art"],
+                creator: ["Zeus"],
+                filesinfo: ["svg:bob|bmp:george|PNG:image1a,image1b,image1c"],
               },
               {
-                fields: {
-                  id: [2],
-                  title: ["Some art"],
-                  creator: ["Persephone"],
-                  filesinfo: ["svg:bob|bmp:george|PNG:image2a,image2b,image2c"],
-                },
+                id: [2],
+                title: ["Some art"],
+                creator: ["Persephone"],
+                filesinfo: ["svg:bob|bmp:george|PNG:image2a,image2b,image2c"],
               },
               {
-                fields: {
-                  id: [3],
-                  title: ["The art"],
-                  creator: ["Athena"],
-                  filesinfo: ["svg:bob|bmp:george|PNG:image3a,image3b,image3c"],
-                },
+                id: [3],
+                title: ["The art"],
+                creator: ["Athena"],
+                filesinfo: ["svg:bob|bmp:george|PNG:image3a,image3b,image3c"],
               },
             ];
           default:
@@ -171,9 +165,9 @@ describe("bio-art", () => {
               .pop();
 
             fetch.mockResolvedValue({
-              json: jest
-                .fn()
-                .mockResolvedValue({ hits: { found: 1, hit: [1] } }),
+              json: jest.fn().mockResolvedValue({
+                hits: { found: 1, hit: [{ fields: 1 }] },
+              }),
             });
 
             const entities = await populator();
@@ -208,7 +202,7 @@ describe("bio-art", () => {
                     json: jest.fn().mockResolvedValue({
                       hits: {
                         found: 250,
-                        hit: [...Array(100)].map((_, i) => i),
+                        hit: [...Array(100)].map((_, i) => ({ fields: i })),
                       },
                     }),
                   };
@@ -218,7 +212,9 @@ describe("bio-art", () => {
                     json: jest.fn().mockResolvedValue({
                       hits: {
                         found: 250,
-                        hit: [...Array(100)].map((_, i) => 100 + i),
+                        hit: [...Array(100)].map((_, i) => ({
+                          fields: 100 + i,
+                        })),
                       },
                     }),
                   };
@@ -228,7 +224,9 @@ describe("bio-art", () => {
                     json: jest.fn().mockResolvedValue({
                       hits: {
                         found: 250,
-                        hit: [...Array(50)].map((_, i) => 200 + i),
+                        hit: [...Array(50)].map((_, i) => ({
+                          fields: 200 + i,
+                        })),
                       },
                     }),
                   };