From 987533bf1ad22d0b15ce296f8cbd10bcccc9b09c Mon Sep 17 00:00:00 2001 From: Greg Walker Date: Thu, 21 Nov 2024 09:53:23 -0600 Subject: [PATCH] comments --- src/scripts/bio-art.js | 50 +++++++++++++++++++++++++++++++++---- src/scripts/bio-art.test.js | 46 ++++++++++++++++------------------ 2 files changed, 67 insertions(+), 29 deletions(-) diff --git a/src/scripts/bio-art.js b/src/scripts/bio-art.js index 3903afb..062ac86 100644 --- a/src/scripts/bio-art.js +++ b/src/scripts/bio-art.js @@ -6,6 +6,9 @@ const { } = require("../utils"); const sample = require("../utils/sample"); +// The set of ontologies we want. Some of them could be questionable, like +// human anatomy, so leave those out. Maybe after we review them all more +// thoroughly, we can decide whether to add more! const permitted = new Set([ "fungi", "parasites", @@ -18,6 +21,7 @@ const permitted = new Set([ ]); const get = (url) => + // The BioArt API requires a browser user-agent, so put that in here. fetch(url, { headers: { "User-Agent": @@ -28,35 +32,56 @@ const get = (url) => const getJSON = (url) => get(url).then((r) => r.json()); const getPermittedOntologyIDs = () => + // The list of ontologu IDs is unlikely to change very often, so cache it + // for an hour. cache("bio-art ontology id", 60, async () => { const allOntologies = await getJSON( "https://bioart.niaid.nih.gov/api/ontologies?type=Bioart%20Category", ); + // Filter down to just the keys that we've allowed, and then map down to + // just the ontology IDs. That's all we need going forward. return allOntologies .filter(({ ontologyKey }) => permitted.has(ontologyKey.toLowerCase())) .map(({ ontologyId }) => ontologyId); }); const getEntities = async (ontologyIds) => - cache(`bio-art entities [${ontologyIds.join(",")}]`, 300, async () => { + // The list of entities might change more often than the list of ontology IDs, + // so we can cache it for a little shorter. + cache(`bio-art entities [${ontologyIds.join(",")}]`, 30, async () => { const url = new URL("https://bioart.niaid.nih.gov"); + // The search string is part of the URL path, which is unusual. Anyway, it's + // these fields and values. const search = [ "type:bioart", `license:"Public Domain"`, `ontologyid:((${ontologyIds.join(" OR ")}))`, ]; + // Now put the whole path together. url.pathname = `api/search/${search.join(" AND ")}`; + + // And add a query parameter for the number of entities to fetch. There may + // be more entities, but we'll deal with that later. url.searchParams.set("size", 100); + // found is the total number of entities that are responsive to our search, + // and hit (initialList) is the first batch of those matches. const { hits: { found, hit: initialList }, - } = await getJSON(url.href); + } = + // Use the URL.href method so it properly escapes the path and search + // parameters. This way we don't have to think about it. :) + await getJSON(url.href); const entities = [...initialList]; + // If the number of entities we've received is less than the total number of + // entities that match our search, run the search again but add the "start" + // query paramemter so we get the next batch. Repeat until we have all of + // the responsive entities. while (entities.length < found) { url.searchParams.set("start", entities.length); const { @@ -65,15 +90,25 @@ const getEntities = async (ontologyIds) => entities.push(...nextList); } - return entities; + // And finally, we only want the field data, so map down to just that. + return entities.map(({ fields }) => fields); }); const getRandomEntity = async () => { const ontologyIds = await getPermittedOntologyIDs(); const entities = await getEntities(ontologyIds); - const { fields: entity } = sample(entities); - + const entity = sample(entities); + + // An entity can have multiple variations, each with multiple files. We'll + // just grab the first variant. For a given varient, the list of files is a + // string of the form: + // + // FORMAT:id|FORMAT:id,id,id|FORMAT:id + // + // Where FORMAT is an image format such as PNG and the ids are a list of file + // IDs used to actually fetch the file. So we'll grab the list of PNG file IDs + // and then take the last one, for simplicity's sake. const file = entity.filesinfo[0] .split("|") .find((s) => s.startsWith("PNG:")) @@ -82,10 +117,12 @@ const getRandomEntity = async () => { .split(",") .pop(); + // Once we have the file ID, we can build up a URL to fetch it. const fileUrl = new URL( `https://bioart.niaid.nih.gov/api/bioarts/${entity.id[0]}/zip?file-ids=${file}`, ).href; + // All we want from the entity is its title, creator, and download URL. return { title: entity.title.pop(), creator: entity.creator.pop(), @@ -105,11 +142,14 @@ module.exports = (app) => { const { channel, thread_ts: thread } = msg.message; try { + // Get an entity const entity = await getRandomEntity(); + // Get its image file as a buffer const file = await get(entity.fileUrl) .then((r) => r.arrayBuffer()) .then((a) => Buffer.from(a)); + // Post that sucker to Slack. postFile({ channel_id: channel, thread_ts: thread, diff --git a/src/scripts/bio-art.test.js b/src/scripts/bio-art.test.js index 4a1f695..3b223ba 100644 --- a/src/scripts/bio-art.test.js +++ b/src/scripts/bio-art.test.js @@ -80,28 +80,22 @@ describe("bio-art", () => { case "bio-art entities [one,two,three]": return [ { - fields: { - id: [1], - title: ["An art"], - creator: ["Zeus"], - filesinfo: ["svg:bob|bmp:george|PNG:image1a,image1b,image1c"], - }, + id: [1], + title: ["An art"], + creator: ["Zeus"], + filesinfo: ["svg:bob|bmp:george|PNG:image1a,image1b,image1c"], }, { - fields: { - id: [2], - title: ["Some art"], - creator: ["Persephone"], - filesinfo: ["svg:bob|bmp:george|PNG:image2a,image2b,image2c"], - }, + id: [2], + title: ["Some art"], + creator: ["Persephone"], + filesinfo: ["svg:bob|bmp:george|PNG:image2a,image2b,image2c"], }, { - fields: { - id: [3], - title: ["The art"], - creator: ["Athena"], - filesinfo: ["svg:bob|bmp:george|PNG:image3a,image3b,image3c"], - }, + id: [3], + title: ["The art"], + creator: ["Athena"], + filesinfo: ["svg:bob|bmp:george|PNG:image3a,image3b,image3c"], }, ]; default: @@ -171,9 +165,9 @@ describe("bio-art", () => { .pop(); fetch.mockResolvedValue({ - json: jest - .fn() - .mockResolvedValue({ hits: { found: 1, hit: [1] } }), + json: jest.fn().mockResolvedValue({ + hits: { found: 1, hit: [{ fields: 1 }] }, + }), }); const entities = await populator(); @@ -208,7 +202,7 @@ describe("bio-art", () => { json: jest.fn().mockResolvedValue({ hits: { found: 250, - hit: [...Array(100)].map((_, i) => i), + hit: [...Array(100)].map((_, i) => ({ fields: i })), }, }), }; @@ -218,7 +212,9 @@ describe("bio-art", () => { json: jest.fn().mockResolvedValue({ hits: { found: 250, - hit: [...Array(100)].map((_, i) => 100 + i), + hit: [...Array(100)].map((_, i) => ({ + fields: 100 + i, + })), }, }), }; @@ -228,7 +224,9 @@ describe("bio-art", () => { json: jest.fn().mockResolvedValue({ hits: { found: 250, - hit: [...Array(50)].map((_, i) => 200 + i), + hit: [...Array(50)].map((_, i) => ({ + fields: 200 + i, + })), }, }), };