Skip to content

Commit

Permalink
comments
Browse files Browse the repository at this point in the history
  • Loading branch information
mgwalker committed Nov 21, 2024
1 parent 60633be commit 22f78d8
Show file tree
Hide file tree
Showing 2 changed files with 67 additions and 29 deletions.
50 changes: 45 additions & 5 deletions src/scripts/bio-art.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@ const {
} = require("../utils");
const sample = require("../utils/sample");

// The set of ontologies we want. Some of them could be questionable, like
// human anatomy, so leave those out. Maybe after we review them all more
// thoroughly, we can decide whether to add more!
const permitted = new Set([
"fungi",
"parasites",
Expand All @@ -18,6 +21,7 @@ const permitted = new Set([
]);

const get = (url) =>
// The BioArt API requires a browser user-agent, so put that in here.
fetch(url, {
headers: {
"User-Agent":
Expand All @@ -28,35 +32,56 @@ const get = (url) =>
const getJSON = (url) => get(url).then((r) => r.json());

const getPermittedOntologyIDs = () =>
// The list of ontologu IDs is unlikely to change very often, so cache it
// for an hour.
cache("bio-art ontology id", 60, async () => {
const allOntologies = await getJSON(
"https://bioart.niaid.nih.gov/api/ontologies?type=Bioart%20Category",
);

// Filter down to just the keys that we've allowed, and then map down to
// just the ontology IDs. That's all we need going forward.
return allOntologies
.filter(({ ontologyKey }) => permitted.has(ontologyKey.toLowerCase()))
.map(({ ontologyId }) => ontologyId);
});

const getEntities = async (ontologyIds) =>
cache(`bio-art entities [${ontologyIds.join(",")}]`, 300, async () => {
// The list of entities might change more often than the list of ontology IDs,
// so we can cache it for a little shorter.
cache(`bio-art entities [${ontologyIds.join(",")}]`, 30, async () => {
const url = new URL("https://bioart.niaid.nih.gov");

// The search string is part of the URL path, which is unusual. Anyway, it's
// these fields and values.
const search = [
"type:bioart",
`license:"Public Domain"`,
`ontologyid:((${ontologyIds.join(" OR ")}))`,
];

// Now put the whole path together.
url.pathname = `api/search/${search.join(" AND ")}`;

// And add a query parameter for the number of entities to fetch. There may
// be more entities, but we'll deal with that later.
url.searchParams.set("size", 100);

// found is the total number of entities that are responsive to our search,
// and hit (initialList) is the first batch of those matches.
const {
hits: { found, hit: initialList },
} = await getJSON(url.href);
} =
// Use the URL.href method so it properly escapes the path and search
// parameters. This way we don't have to think about it. :)
await getJSON(url.href);

const entities = [...initialList];

// If the number of entities we've received is less than the total number of
// entities that match our search, run the search again but add the "start"
// query paramemter so we get the next batch. Repeat until we have all of
// the responsive entities.
while (entities.length < found) {
url.searchParams.set("start", entities.length);
const {
Expand All @@ -65,15 +90,25 @@ const getEntities = async (ontologyIds) =>
entities.push(...nextList);
}

return entities;
// And finally, we only want the field data, so map down to just that.
return entities.map(({ fields }) => fields);
});

const getRandomEntity = async () => {
const ontologyIds = await getPermittedOntologyIDs();
const entities = await getEntities(ontologyIds);

const { fields: entity } = sample(entities);

const entity = sample(entities);

// An entity can have multiple variations, each with multiple files. We'll
// just grab the first variant. For a given varient, the list of files is a
// string of the form:
//
// FORMAT:id|FORMAT:id,id,id|FORMAT:id
//
// Where FORMAT is an image format such as PNG and the ids are a list of file
// IDs used to actually fetch the file. So we'll grab the list of PNG file IDs
// and then take the last one, for simplicity's sake.
const file = entity.filesinfo[0]
.split("|")
.find((s) => s.startsWith("PNG:"))
Expand All @@ -82,10 +117,12 @@ const getRandomEntity = async () => {
.split(",")
.pop();

// Once we have the file ID, we can build up a URL to fetch it.
const fileUrl = new URL(
`https://bioart.niaid.nih.gov/api/bioarts/${entity.id[0]}/zip?file-ids=${file}`,
).href;

// All we want from the entity is its title, creator, and download URL.
return {
title: entity.title.pop(),
creator: entity.creator.pop(),
Expand All @@ -105,11 +142,14 @@ module.exports = (app) => {
const { channel, thread_ts: thread } = msg.message;

try {
// Get an entity
const entity = await getRandomEntity();
// Get its image file as a buffer
const file = await get(entity.fileUrl)
.then((r) => r.arrayBuffer())
.then((a) => Buffer.from(a));

// Post that sucker to Slack.
postFile({
channel_id: channel,
thread_ts: thread,
Expand Down
46 changes: 22 additions & 24 deletions src/scripts/bio-art.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -80,28 +80,22 @@ describe("bio-art", () => {
case "bio-art entities [one,two,three]":
return [
{
fields: {
id: [1],
title: ["An art"],
creator: ["Zeus"],
filesinfo: ["svg:bob|bmp:george|PNG:image1a,image1b,image1c"],
},
id: [1],
title: ["An art"],
creator: ["Zeus"],
filesinfo: ["svg:bob|bmp:george|PNG:image1a,image1b,image1c"],
},
{
fields: {
id: [2],
title: ["Some art"],
creator: ["Persephone"],
filesinfo: ["svg:bob|bmp:george|PNG:image2a,image2b,image2c"],
},
id: [2],
title: ["Some art"],
creator: ["Persephone"],
filesinfo: ["svg:bob|bmp:george|PNG:image2a,image2b,image2c"],
},
{
fields: {
id: [3],
title: ["The art"],
creator: ["Athena"],
filesinfo: ["svg:bob|bmp:george|PNG:image3a,image3b,image3c"],
},
id: [3],
title: ["The art"],
creator: ["Athena"],
filesinfo: ["svg:bob|bmp:george|PNG:image3a,image3b,image3c"],
},
];
default:
Expand Down Expand Up @@ -171,9 +165,9 @@ describe("bio-art", () => {
.pop();

fetch.mockResolvedValue({
json: jest
.fn()
.mockResolvedValue({ hits: { found: 1, hit: [1] } }),
json: jest.fn().mockResolvedValue({
hits: { found: 1, hit: [{ fields: 1 }] },
}),
});

const entities = await populator();
Expand Down Expand Up @@ -208,7 +202,7 @@ describe("bio-art", () => {
json: jest.fn().mockResolvedValue({
hits: {
found: 250,
hit: [...Array(100)].map((_, i) => i),
hit: [...Array(100)].map((_, i) => ({ fields: i })),
},
}),
};
Expand All @@ -218,7 +212,9 @@ describe("bio-art", () => {
json: jest.fn().mockResolvedValue({
hits: {
found: 250,
hit: [...Array(100)].map((_, i) => 100 + i),
hit: [...Array(100)].map((_, i) => ({
fields: 100 + i,
})),
},
}),
};
Expand All @@ -228,7 +224,9 @@ describe("bio-art", () => {
json: jest.fn().mockResolvedValue({
hits: {
found: 250,
hit: [...Array(50)].map((_, i) => 200 + i),
hit: [...Array(50)].map((_, i) => ({
fields: 200 + i,
})),
},
}),
};
Expand Down

0 comments on commit 22f78d8

Please sign in to comment.