diff --git a/adminSiteClient/GdocsBreadcrumbsInput.tsx b/adminSiteClient/GdocsManualBreadcrumbsInput.tsx similarity index 59% rename from adminSiteClient/GdocsBreadcrumbsInput.tsx rename to adminSiteClient/GdocsManualBreadcrumbsInput.tsx index 094e7188cba..86c382426d4 100644 --- a/adminSiteClient/GdocsBreadcrumbsInput.tsx +++ b/adminSiteClient/GdocsManualBreadcrumbsInput.tsx @@ -13,14 +13,12 @@ export const BreadcrumbLine = ({ item, setItem, removeItem, - isLastBreadcrumbItem, labelError, hrefError, }: { item: BreadcrumbItem setItem: (item: BreadcrumbItem) => void removeItem: () => void - isLastBreadcrumbItem?: boolean labelError?: OwidGdocErrorMessage hrefError?: OwidGdocErrorMessage }) => { @@ -30,15 +28,10 @@ export const BreadcrumbLine = ({ setItem({ ...item, href: e.target.value }) } - disabled={isLastBreadcrumbItem} status={hrefError?.type} placeholder="e.g. /poverty" /> @@ -51,11 +44,7 @@ export const BreadcrumbLine = ({ onChange={(e) => setItem({ ...item, label: e.target.value }) } - placeholder={ - isLastBreadcrumbItem - ? "Concise version of the article's title" - : undefined - } + placeholder={"A topic name"} status={labelError?.type} /> {labelError && } @@ -70,7 +59,7 @@ export const BreadcrumbLine = ({ ) } -export const GdocsBreadcrumbsInput = ({ +export const GdocsManualBreadcrumbsInput = ({ gdoc, setCurrentGdoc, errors, @@ -79,62 +68,76 @@ export const GdocsBreadcrumbsInput = ({ setCurrentGdoc: (gdoc: OwidGdocPostInterface) => void errors?: OwidGdocErrorMessage[] }) => { - const setBreadcrumbs = (breadcrumbs: BreadcrumbItem[] | undefined) => { - if (breadcrumbs?.length) { - // The last breadcrumb is not clickable, so we don't need a URL - breadcrumbs[breadcrumbs.length - 1].href = undefined - } else breadcrumbs = undefined - - setCurrentGdoc({ ...gdoc, breadcrumbs: breadcrumbs ?? null }) + const setBreadcrumbs = (breadcrumbs: BreadcrumbItem[] | null) => { + setCurrentGdoc({ ...gdoc, manualBreadcrumbs: breadcrumbs }) } const setItemAtIndex = (item: BreadcrumbItem, i: number) => { - const breadcrumbs = [...(gdoc.breadcrumbs ?? [])] + if (!gdoc.manualBreadcrumbs) return + + const breadcrumbs = [...gdoc.manualBreadcrumbs] breadcrumbs[i] = item setBreadcrumbs(breadcrumbs) } const removeItemAtIndex = (i: number) => { - const breadcrumbs = [...(gdoc.breadcrumbs ?? [])] + if (!gdoc.manualBreadcrumbs) return + + const breadcrumbs = [...gdoc.manualBreadcrumbs] breadcrumbs.splice(i, 1) - setBreadcrumbs(breadcrumbs) + + setBreadcrumbs(breadcrumbs.length ? breadcrumbs : null) } return (
-
- Breadcrumbs - -
- {gdoc.breadcrumbs?.map((item, i) => ( +
Breadcrumbs
+ {!!gdoc.breadcrumbs?.length && !gdoc.manualBreadcrumbs?.length ? ( +
+

+ The breadcrumbs for this article will be automatically + generated, based on this article's tags and the tag + graph. +

+

+ If you want to override these breadcrumbs, you can do so + here: +

+
+ ) : ( + + Unless you are editing an SDG page, each breadcrumb should + have a URL and label. + + )} + + {gdoc.manualBreadcrumbs?.map((item, i) => ( setItemAtIndex(item, i)} removeItem={() => removeItemAtIndex(i)} key={i} labelError={getPropertyMostCriticalError( - `breadcrumbs[${i}].label`, + `manualBreadcrumbs[${i}].label`, errors )} hrefError={getPropertyMostCriticalError( - `breadcrumbs[${i}].href`, + `manualBreadcrumbs[${i}].href`, errors )} - isLastBreadcrumbItem={i === gdoc.breadcrumbs!.length - 1} /> ))} - {!gdoc.breadcrumbs?.length && No breadcrumbs}
) } diff --git a/adminSiteClient/GdocsPreviewPage.tsx b/adminSiteClient/GdocsPreviewPage.tsx index aa5ab051392..97b43fff73b 100644 --- a/adminSiteClient/GdocsPreviewPage.tsx +++ b/adminSiteClient/GdocsPreviewPage.tsx @@ -313,11 +313,6 @@ export const GdocsPreviewPage = ({ match, history }: GdocsMatchProps) => { size="large" onClose={onSettingsClose} open={isSettingsOpen} - extra={ - - } > {tsMatch(currentGdoc) .with( diff --git a/adminSiteClient/GdocsSettingsForms.tsx b/adminSiteClient/GdocsSettingsForms.tsx index a1911068063..5d555a45df1 100644 --- a/adminSiteClient/GdocsSettingsForms.tsx +++ b/adminSiteClient/GdocsSettingsForms.tsx @@ -15,7 +15,7 @@ import { import { GdocsPublishedAt } from "./GdocsDateline.js" import { GdocsPublicationContext } from "./GdocsPublicationContext.js" import { Alert } from "antd" -import { GdocsBreadcrumbsInput } from "./GdocsBreadcrumbsInput.js" +import { GdocsManualBreadcrumbsInput } from "./GdocsManualBreadcrumbsInput.js" const GdocCommonErrors = ({ errors, @@ -155,7 +155,7 @@ export const GdocPostSettings = ({ errors={errors} description="An optional property to override the excerpt of this post in our atom feed, which is used for the newsletter" /> - , boolean > = { - breadcrumbs: true, + breadcrumbs: true, // automatically generated, not actually possible to change via the admin preview + manualBreadcrumbs: true, errors: true, linkedAuthors: false, linkedCharts: true, diff --git a/adminSiteClient/gdocsValidation.ts b/adminSiteClient/gdocsValidation.ts index 0ad8da3f38c..fe64982c675 100644 --- a/adminSiteClient/gdocsValidation.ts +++ b/adminSiteClient/gdocsValidation.ts @@ -140,12 +140,12 @@ function validateExcerpt( } } -function validateBreadcrumbs( +function validateManualBreadcrumbs( gdoc: OwidGdocPostInterface, errors: OwidGdocErrorMessage[] ) { - if (gdoc.breadcrumbs) { - for (const [i, breadcrumb] of gdoc.breadcrumbs.entries()) { + if (gdoc.manualBreadcrumbs) { + for (const [i, breadcrumb] of gdoc.manualBreadcrumbs.entries()) { if (!breadcrumb.label) { errors.push({ property: `breadcrumbs[${i}].label`, @@ -155,7 +155,7 @@ function validateBreadcrumbs( } // Last item can be missing a href - if (!breadcrumb.href && i !== gdoc.breadcrumbs.length - 1) { + if (!breadcrumb.href && i !== gdoc.manualBreadcrumbs.length - 1) { errors.push({ property: `breadcrumbs[${i}].href`, type: OwidGdocErrorMessageType.Error, @@ -292,7 +292,7 @@ export const getErrors = (gdoc: OwidGdoc): OwidGdocErrorMessage[] => { if (checkIsGdocPost(gdoc)) { validateRefs(gdoc, errors) validateExcerpt(gdoc, errors) - validateBreadcrumbs(gdoc, errors) + validateManualBreadcrumbs(gdoc, errors) validateAtomFields(gdoc, errors) } else if (checkIsDataInsight(gdoc)) { validateApprovedBy(gdoc, errors) diff --git a/adminSiteServer/apiRoutes/gdocs.ts b/adminSiteServer/apiRoutes/gdocs.ts index fbeb412e0d9..1a84b0e94da 100644 --- a/adminSiteServer/apiRoutes/gdocs.ts +++ b/adminSiteServer/apiRoutes/gdocs.ts @@ -178,11 +178,10 @@ export async function createOrUpdateGdoc( : GdocLinkUpdateMode.DeleteOnly ) - await upsertGdoc(trx, nextGdoc) - + const upserted = await upsertGdoc(trx, nextGdoc) await indexAndBakeGdocIfNeccesary(trx, res.locals.user, prevGdoc, nextGdoc) - return nextGdoc + return upserted } async function validateTombstoneRelatedLinkUrl( diff --git a/adminSiteServer/app.test.ts b/adminSiteServer/app.test.ts index 69b96f537fc..f8c70d04aae 100644 --- a/adminSiteServer/app.test.ts +++ b/adminSiteServer/app.test.ts @@ -1,5 +1,5 @@ import { google } from "googleapis" -import { jest } from "@jest/globals" +import { beforeAll, jest } from "@jest/globals" // Mock the google docs api to retrieve files from the test-files directory // AFAICT, we have to do this directly after the import // and before any other code that might import googleapis @@ -43,7 +43,10 @@ import { Knex, knex } from "knex" import { dbTestConfig } from "../db/tests/dbTestConfig.js" import { TransactionCloseMode, + getBestBreadcrumbs, + getParentTagArraysByChildName, knexReadWriteTransaction, + knexReadonlyTransaction, setKnexInstance, } from "../db/db.js" import { cleanTestDb, TABLES_IN_USE } from "../db/tests/testHelpers.js" @@ -51,9 +54,19 @@ import { ChartConfigsTableName, ChartsTableName, DatasetsTableName, + DbInsertTag, + DbInsertTagGraphNode, MultiDimDataPagesTableName, MultiDimXChartConfigsTableName, + TagsTableName, + TagGraphTableName, VariablesTableName, + TagGraphRootName, + PostsGdocsTableName, + OwidGdocType, + DbInsertPostGdoc, + DbInsertPostGdocXTag, + PostsGdocsXTagsTableName, } from "@ourworldindata/types" import path from "path" import fs from "fs" @@ -920,3 +933,270 @@ describe("OwidAdminApp: indicator-level chart configs", () => { expect(json.success).toBe(false) }) }) + +describe("OwidAdminApp: tag graph", () => { + // prettier-ignore + const dummyTags: DbInsertTag[] = [ + { name: TagGraphRootName, id: 1 }, + { name: "Energy and Environment", id: 2 }, + { name: "Energy", slug: "energy", id: 3 }, + { name: "Nuclear Energy", slug: "nuclear-energy", id: 4 }, + { name: "CO2 & Greenhouse Gas Emissions", slug: "co2-and-greenhouse-gas-emissions", id: 5 }, + ] + + const dummyTagGraph: DbInsertTagGraphNode[] = [ + { parentId: 1, childId: 2 }, + { parentId: 2, childId: 3, weight: 110 }, + { parentId: 2, childId: 5 }, + { parentId: 3, childId: 4 }, + { parentId: 5, childId: 4 }, + ] + + function makeDummyTopicPage(slug: string): DbInsertPostGdoc { + return { + slug, + content: JSON.stringify({ + type: OwidGdocType.TopicPage, + authors: [] as string[], + }), + id: slug, + published: 1, + createdAt: new Date(), + publishedAt: new Date(), + markdown: "", + } + } + const dummyTopicPages: DbInsertPostGdoc[] = [ + makeDummyTopicPage("energy"), + makeDummyTopicPage("nuclear-energy"), + makeDummyTopicPage("co2-and-greenhouse-gas-emissions"), + ] + + const dummyPostTags: DbInsertPostGdocXTag[] = [ + { gdocId: "energy", tagId: 3 }, + { gdocId: "nuclear-energy", tagId: 4 }, + { gdocId: "co2-and-greenhouse-gas-emissions", tagId: 5 }, + ] + + beforeEach(async () => { + await testKnexInstance!(TagsTableName).insert(dummyTags) + await testKnexInstance!(TagGraphTableName).insert(dummyTagGraph) + await testKnexInstance!(PostsGdocsTableName).insert(dummyTopicPages) + await testKnexInstance!(PostsGdocsXTagsTableName).insert(dummyPostTags) + }) + it("should be able to see all the tags", async () => { + const tags = await fetchJsonFromAdminApi("/tags.json") + expect(tags).toEqual({ + tags: [ + { + id: 5, + isTopic: 1, + name: "CO2 & Greenhouse Gas Emissions", + slug: "co2-and-greenhouse-gas-emissions", + }, + { + id: 3, + isTopic: 1, + name: "Energy", + slug: "energy", + }, + { + id: 2, + isTopic: 0, + name: "Energy and Environment", + slug: null, + }, + { + id: 4, + isTopic: 1, + name: "Nuclear Energy", + slug: "nuclear-energy", + }, + { + id: 1, + isTopic: 0, + name: "tag-graph-root", + slug: null, + }, + ], + }) + }) + + it("should be able to generate a tag graph", async () => { + const json = await fetchJsonFromAdminApi("/flatTagGraph.json") + expect(json).toEqual({ + "1": [ + { + childId: 2, + isTopic: 0, + name: "Energy and Environment", + parentId: 1, + weight: 100, + }, + ], + "2": [ + { + childId: 3, + isTopic: 1, + name: "Energy", + parentId: 2, + weight: 110, + }, + { + childId: 5, + isTopic: 1, + name: "CO2 & Greenhouse Gas Emissions", + parentId: 2, + weight: 100, + }, + ], + "3": [ + { + childId: 4, + isTopic: 1, + name: "Nuclear Energy", + parentId: 3, + weight: 100, + }, + ], + "5": [ + { + childId: 4, + isTopic: 1, + name: "Nuclear Energy", + parentId: 5, + weight: 100, + }, + ], + __rootId: 1, + }) + }) + + it("should be able to generate a set of breadcrumbs for a tag", async () => { + await knexReadonlyTransaction( + async (trx) => { + const parentTagArraysByChildName = + await getParentTagArraysByChildName(trx) + const breadcrumbs = getBestBreadcrumbs( + [ + { + id: 4, + name: "Nuclear Energy", + slug: "nuclear-energy", + }, + ], + parentTagArraysByChildName + ) + // breadcrumb hrefs are env-dependent, so we just assert on the labels + const labelsOnly = breadcrumbs.map((b) => b.label) + expect(labelsOnly).toEqual(["Energy", "Nuclear Energy"]) + }, + TransactionCloseMode.KeepOpen, + testKnexInstance + ) + }) + + it("should generate an optimal set of breadcrumbs when given multiple tags", async () => { + await knexReadonlyTransaction( + async (trx) => { + const parentTagArraysByChildName = + await getParentTagArraysByChildName(trx) + const breadcrumbs = getBestBreadcrumbs( + [ + { + id: 4, + name: "Nuclear Energy", + slug: "nuclear-energy", + }, + { + id: 5, + name: "CO2 & Greenhouse Gas Emissions", + slug: "co2-and-greenhouse-gas-emissions", + }, + ], + parentTagArraysByChildName + ) + // breadcrumb hrefs are env-dependent, so we just assert on the labels + const labelsOnly = breadcrumbs.map((b) => b.label) + expect(labelsOnly).toEqual(["Energy", "Nuclear Energy"]) + }, + TransactionCloseMode.KeepOpen, + testKnexInstance + ) + }) + it("should return an empty array when there are no topic tags in any of the tags' ancestors", async () => { + await knexReadonlyTransaction( + async (trx) => { + const parentTagArraysByChildName = + await getParentTagArraysByChildName(trx) + const breadcrumbs = getBestBreadcrumbs( + [ + { + id: 2, + name: "Energy and Environment", + slug: "", + }, + ], + parentTagArraysByChildName + ) + // breadcrumb hrefs are env-dependent, so we just assert on the labels + const labelsOnly = breadcrumbs.map((b) => b.label) + expect(labelsOnly).toEqual([]) + }, + TransactionCloseMode.KeepOpen, + testKnexInstance + ) + }) + it("when there are two valid paths to a given tag, it selects the longest one", async () => { + await knexReadonlyTransaction( + async (trx) => { + // Here, Women's Employment has 2 paths: + // 1. Poverty and Economic Development > Women's Employment + // 2. Human Rights > Women's Rights > Women's Employment + // prettier-ignore + await testKnexInstance!(TagsTableName).insert([ + { name: "Human Rights", id: 6 }, + { name: "Women's Rights", slug: "womens-rights", id: 7 }, + { name: "Women's Employment", slug: "womens-employment", id: 8 }, + { name: "Poverty and Economic Development", id: 9 }, + ]) + await testKnexInstance!(TagGraphTableName).insert([ + { parentId: 1, childId: 6 }, + { parentId: 6, childId: 7 }, + { parentId: 7, childId: 8 }, + { parentId: 1, childId: 9 }, + { parentId: 9, childId: 8 }, + ]) + await testKnexInstance!(PostsGdocsTableName).insert([ + makeDummyTopicPage("womens-rights"), + makeDummyTopicPage("womens-employment"), + ]) + await testKnexInstance!(PostsGdocsXTagsTableName).insert([ + { gdocId: "womens-rights", tagId: 7 }, + { gdocId: "womens-employment", tagId: 8 }, + ]) + + const parentTagArraysByChildName = + await getParentTagArraysByChildName(trx) + const breadcrumbs = getBestBreadcrumbs( + [ + { + id: 8, + name: "Women's Employment", + slug: "womens-employment", + }, + ], + parentTagArraysByChildName + ) + // breadcrumb hrefs are env-dependent, so we just assert on the labels + const labelsOnly = breadcrumbs.map((b) => b.label) + expect(labelsOnly).toEqual([ + "Women's Rights", + "Women's Employment", + ]) + }, + TransactionCloseMode.KeepOpen, + testKnexInstance + ) + }) +}) diff --git a/baker/SiteBaker.tsx b/baker/SiteBaker.tsx index 52f9b4cb51a..3671b27f8c6 100644 --- a/baker/SiteBaker.tsx +++ b/baker/SiteBaker.tsx @@ -612,6 +612,9 @@ export class SiteBaker { .getPublishedGdocPostsWithTags(knex) .then((gdocs) => gdocs.map(gdocFromJSON)) + const allParentTagArraysByChildName = + await db.getParentTagArraysByChildName(knex) + const gdocsToBake = slugs !== undefined ? publishedGdocs.filter((gdoc) => slugs.includes(gdoc.slug)) @@ -647,6 +650,16 @@ export class SiteBaker { publishedGdoc.linkedIndicators = attachments.linkedIndicators publishedGdoc.linkedChartViews = attachments.linkedChartViews + if ( + !publishedGdoc.manualBreadcrumbs?.length && + publishedGdoc.tags?.length + ) { + publishedGdoc.breadcrumbs = db.getBestBreadcrumbs( + publishedGdoc.tags, + allParentTagArraysByChildName + ) + } + // this is a no-op if the gdoc doesn't have an all-chart block if ("loadRelatedCharts" in publishedGdoc) { await publishedGdoc.loadRelatedCharts(knex) diff --git a/baker/algolia/utils/charts.ts b/baker/algolia/utils/charts.ts index 51f36eb0280..ac74e153536 100644 --- a/baker/algolia/utils/charts.ts +++ b/baker/algolia/utils/charts.ts @@ -13,6 +13,7 @@ import { isPathRedirectedToExplorer } from "../../../explorerAdminServer/Explore import { ParsedChartRecordRow, RawChartRecordRow } from "./types.js" import { excludeNullish } from "@ourworldindata/utils" import { processAvailableEntities } from "./shared.js" +import { getUniqueNamesFromParentTagArrays } from "@ourworldindata/utils/dist/Util.js" const computeChartScore = (record: Omit): number => { const { numRelatedArticles, views_7d } = record @@ -99,7 +100,8 @@ export const getChartsRecords = async ( const pageviews = await getAnalyticsPageviewsByUrlObj(knex) - const parentTagsByChildName = await db.getParentTagsByChildName(knex) + const parentTagArraysByChildName = + await db.getParentTagArraysByChildName(knex) const records: ChartRecord[] = [] for (const c of parsedRows) { @@ -121,10 +123,12 @@ export const getChartsRecords = async ( fontSize: 10, // doesn't matter, but is a mandatory field }).plaintext - const parentTags = c.tags.flatMap( + const parentTags = c.tags.flatMap((tagName) => { + const parentTagArrays = parentTagArraysByChildName[tagName] // a chart can be tagged with a tag that isn't in the tag graph - (tag) => parentTagsByChildName[tag] || [] - ) + if (!parentTagArrays) return [] + return getUniqueNamesFromParentTagArrays(parentTagArrays) + }) const record = { objectID: c.id.toString(), diff --git a/baker/algolia/utils/explorerViews.ts b/baker/algolia/utils/explorerViews.ts index 84c2037fd4f..19b059688cc 100644 --- a/baker/algolia/utils/explorerViews.ts +++ b/baker/algolia/utils/explorerViews.ts @@ -49,6 +49,7 @@ import { ChartRecord, ChartRecordType, } from "../../../site/search/searchTypes.js" +import { getUniqueNamesFromParentTagArrays } from "@ourworldindata/utils/dist/Util.js" export function explorerViewRecordToChartRecord( e: ExplorerViewFinalRecord @@ -698,7 +699,7 @@ async function getExplorersWithInheritedTags(trx: db.KnexReadonlyTransaction) { // The DB query gets the tags for the explorer, but we need to add the parent tags as well. // This isn't done in the query because it would require a recursive CTE. // It's easier to write that query once, separately, and reuse it. - const parentTags = await db.getParentTagsByChildName(trx) + const parentTagArrays = await db.getParentTagArraysByChildName(trx) const publishedExplorersWithTags = [] for (const explorer of Object.values(explorersBySlug)) { @@ -709,10 +710,14 @@ async function getExplorersWithInheritedTags(trx: db.KnexReadonlyTransaction) { }) } const tags = new Set() - for (const tag of explorer.tags) { - tags.add(tag) - for (const parentTag of parentTags[tag]) { - tags.add(parentTag) + + for (const tagName of explorer.tags) { + tags.add(tagName) + const parentTagNames = getUniqueNamesFromParentTagArrays( + parentTagArrays[tagName] + ) + for (const parentTagName of parentTagNames) { + tags.add(parentTagName) } } diff --git a/db/db.ts b/db/db.ts index ab9aa2e1ada..d64f6d29ad3 100644 --- a/db/db.ts +++ b/db/db.ts @@ -5,6 +5,7 @@ import { GRAPHER_DB_PASS, GRAPHER_DB_NAME, GRAPHER_DB_PORT, + BAKED_BASE_URL, } from "../settings/serverSettings.js" import { registerExitHandler } from "./cleanup.js" import { createTagGraph, keyBy } from "@ourworldindata/utils" @@ -30,8 +31,10 @@ import { MinimalExplorerInfo, DbEnrichedImage, DbEnrichedImageWithUserId, + MinimalTag, + BreadcrumbItem, } from "@ourworldindata/types" -import { groupBy, uniq } from "lodash" +import { groupBy } from "lodash" import { gdocFromJSON } from "./model/Gdoc/GdocFactory.js" // Return the first match from a mysql query @@ -387,7 +390,7 @@ export const getPublishedGdocPosts = async ( knex, `-- sql SELECT - g.breadcrumbs, + g.manualBreadcrumbs, g.content, g.createdAt, g.id, @@ -423,7 +426,7 @@ export const getPublishedGdocPostsWithTags = async ( knex, `-- sql SELECT - g.breadcrumbs, + g.manualBreadcrumbs, g.content, g.createdAt, g.id, @@ -536,43 +539,92 @@ export async function getFlatTagGraph(knex: KnexReadonlyTransaction): Promise< return { ...tagGraphByParentId, __rootId: tagGraphRootIdResult.id } } -// DFS through the tag graph and create a map of parent tags for each child tag -// e.g. { "Child": [ "Parent", "Grandparent" ], "Parent": [ "Grandparent" ] } -// parent tags are listed in no particular order -export async function getParentTagsByChildName( +// DFS through the tag graph and track all paths from a child to the root +// e.g. { "childTag": [ [parentTag1, parentTag2], [parentTag3] ] } +// Use this with getUniqueNamesFromParentTagArrays to get Record instead +export async function getParentTagArraysByChildName( trx: KnexReadonlyTransaction -): Promise> { +): Promise< + Record[][]> +> { const { __rootId, ...flatTagGraph } = await getFlatTagGraph(trx) const tagGraph = createTagGraph(flatTagGraph, __rootId) - - const tagsById = await trx("tags") - .select("id", "name") + const tagsById = await trx("tags") + .select("id", "name", "slug") .then((tags) => keyBy(tags, "id")) - const parentTagsByChildName: Record< + const pathsByChildName: Record< DbPlainTag["name"], - DbPlainTag["name"][] + Pick[][] > = {} - function trackParents(node: TagGraphNode): void { + function trackAllPaths( + node: TagGraphNode, + currentPath: Pick[] = [] + ): void { + const currentTag = tagsById[node.id] + const newPath = [...currentPath, currentTag] + + // Don't add paths for root node + if (node.id !== __rootId) { + const nodeName = currentTag.name + if (!pathsByChildName[nodeName]) { + pathsByChildName[nodeName] = [] + } + + // Add the complete path (excluding root) + pathsByChildName[nodeName].push(newPath.slice(1)) + } + for (const child of node.children) { - trackParents(child) + trackAllPaths(child, newPath) } + } - const preexistingParents = parentTagsByChildName[node.name] ?? [] - // node.path is an array of tag ids from the root to the current node - // slice to remove the root node and the current node, then map them into tag names - const newParents = node.path.slice(1, -1).map((id) => tagsById[id].name) + trackAllPaths(tagGraph) - parentTagsByChildName[node.name] = uniq([ - ...preexistingParents, - ...newParents, - ]) + return pathsByChildName +} + +export function getBestBreadcrumbs( + tags: MinimalTag[], + parentTagArraysByChildName: Record< + string, + Pick[][] + > +): BreadcrumbItem[] { + // For each tag, find the best path according to our criteria + // e.g. { "Nuclear Energy ": ["Energy and Environment", "Energy"], "Air Pollution": ["Energy and Environment"] } + const result = new Map[]>() + + for (const tag of tags) { + const paths = parentTagArraysByChildName[tag.name] + if (paths && paths.length > 0) { + // Since getFlatTagGraph already orders by weight DESC and name ASC, + // the first path in the array will be our best path + result.set(tag.id, paths[0]) + } } - trackParents(tagGraph) + // Only keep the topics in the paths, because only topics are clickable as breadcrumbs + const topicsOnly = Array.from(result.values()).reduce( + (acc, path) => { + return [...acc, path.filter((tag) => tag.slug)] + }, + [] as Pick[][] + ) + + // Pick the longest path from result, assuming that the longest path is the best + const longestPath = topicsOnly.reduce((best, path) => { + return path.length > best.length ? path : best + }, []) + + const breadcrumbs = longestPath.map((tag) => ({ + label: tag.name, + href: `${BAKED_BASE_URL}/${tag.slug}`, + })) - return parentTagsByChildName + return breadcrumbs } export async function updateTagGraph( diff --git a/db/migrateWpPostsToArchieMl.ts b/db/migrateWpPostsToArchieMl.ts index a0938d6c546..e3a9647c39d 100644 --- a/db/migrateWpPostsToArchieMl.ts +++ b/db/migrateWpPostsToArchieMl.ts @@ -256,7 +256,7 @@ const migrate = async (trx: db.KnexReadWriteTransaction): Promise => { updatedAt: post.updated_at_in_wordpress, publicationContext: OwidGdocPublicationContext.listed, // TODO: not all articles are listed, take this from the DB revisionId: null, - breadcrumbs: null, + manualBreadcrumbs: null, markdown: null, } const archieMlStatsContent = { diff --git a/db/migration/1736443943021-RemoveManualBreadcrumbsFromNonSDGArticles.ts b/db/migration/1736443943021-RemoveManualBreadcrumbsFromNonSDGArticles.ts new file mode 100644 index 00000000000..0de1dbac921 --- /dev/null +++ b/db/migration/1736443943021-RemoveManualBreadcrumbsFromNonSDGArticles.ts @@ -0,0 +1,49 @@ +import { MigrationInterface, QueryRunner } from "typeorm" + +export class RemoveManualBreadcrumbsFromNonSDGArticles1736443943021 + implements MigrationInterface +{ + public async up(queryRunner: QueryRunner): Promise { + /* As of 09-01-2025, the articles affected by this migration are: + - https://ourworldindata.org/renewable-energy + - https://ourworldindata.org/palm-oil + - https://ourworldindata.org/water-sanitation-2020-update + - https://ourworldindata.org/fossil-fuels + - https://ourworldindata.org/inequality-co2 + - https://ourworldindata.org/energy-gdp-decoupling + - https://ourworldindata.org/adopting-slower-growing-breeds-of-chicken-would-reduce-animal-suffering-significantly + - https://ourworldindata.org/number-without-electricity + - https://ourworldindata.org/hygiene + - https://ourworldindata.org/per-capita-energy + - https://ourworldindata.org/energy-definitions + - https://ourworldindata.org/land-use-per-energy-source + - https://ourworldindata.org/do-better-cages-or-cage-free-environments-really-improve-the-lives-of-hens + - https://ourworldindata.org/deforestation + - https://ourworldindata.org/energy-offshoring + - https://ourworldindata.org/electricity-mix + - https://ourworldindata.org/clean-water + - https://ourworldindata.org/global-energy-200-years + - https://ourworldindata.org/what-are-drivers-deforestation + - https://ourworldindata.org/energy-substitution-method + - https://ourworldindata.org/energy-missing-data + - https://ourworldindata.org/nuclear-energy + - https://ourworldindata.org/how-many-animals-get-slaughtered-every-day + - https://ourworldindata.org/energy-ladder + - https://ourworldindata.org/energy-access + - https://ourworldindata.org/sanitation + - https://ourworldindata.org/how-many-animals-are-factory-farmed + - https://ourworldindata.org/decarbonizing-energy-progress + */ + await queryRunner.query( + `-- sql + UPDATE posts_gdocs SET breadcrumbs = NULL + WHERE type = 'article' + AND breadcrumbs IS NOT NULL + AND slug NOT LIKE "%sdgs%"` + ) + } + + public async down(): Promise { + // no-op + } +} diff --git a/db/migration/1736455365750-RenameBreadcrumbsColumn.ts b/db/migration/1736455365750-RenameBreadcrumbsColumn.ts new file mode 100644 index 00000000000..e23ced84201 --- /dev/null +++ b/db/migration/1736455365750-RenameBreadcrumbsColumn.ts @@ -0,0 +1,15 @@ +import { MigrationInterface, QueryRunner } from "typeorm" + +export class RenameBreadcrumbsColumn1736455365750 + implements MigrationInterface +{ + public async up(queryRunner: QueryRunner): Promise { + await queryRunner.query(`-- sql + ALTER TABLE posts_gdocs RENAME COLUMN breadcrumbs TO manualBreadcrumbs`) + } + + public async down(queryRunner: QueryRunner): Promise { + await queryRunner.query(`-- sql + ALTER TABLE posts_gdocs RENAME COLUMN manualBreadcrumbs TO breadcrumbs`) + } +} diff --git a/db/migration/1736969067156-PostsGdocsUpdatedAt.ts b/db/migration/1736969067156-PostsGdocsUpdatedAt.ts new file mode 100644 index 00000000000..a4412b4d288 --- /dev/null +++ b/db/migration/1736969067156-PostsGdocsUpdatedAt.ts @@ -0,0 +1,19 @@ +import { MigrationInterface, QueryRunner } from "typeorm" + +export class PostsGdocsUpdatedAt1736969067156 implements MigrationInterface { + public async up(queryRunner: QueryRunner): Promise { + await queryRunner.query(`-- sql + ALTER TABLE posts_gdocs + MODIFY COLUMN createdAt DATETIME DEFAULT CURRENT_TIMESTAMP, + MODIFY COLUMN updatedAt DATETIME DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP + `) + } + + public async down(queryRunner: QueryRunner): Promise { + await queryRunner.query(`-- sql + ALTER TABLE posts_gdocs + MODIFY COLUMN createdAt DATETIME DEFAULT CURRENT_TIMESTAMP, + MODIFY COLUMN updatedAt DATETIME DEFAULT CURRENT_TIMESTAMP + `) + } +} diff --git a/db/model/Gdoc/GdocBase.ts b/db/model/Gdoc/GdocBase.ts index 173fd06e6fb..9fc97412934 100644 --- a/db/model/Gdoc/GdocBase.ts +++ b/db/model/Gdoc/GdocBase.ts @@ -82,6 +82,7 @@ export class GdocBase implements OwidGdocBaseInterface { publicationContext: OwidGdocPublicationContext = OwidGdocPublicationContext.unlisted breadcrumbs: BreadcrumbItem[] | null = null + manualBreadcrumbs: BreadcrumbItem[] | null = null tags: DbPlainTag[] | null = null errors: OwidGdocErrorMessage[] = [] donors: string[] = [] diff --git a/db/model/Gdoc/GdocFactory.ts b/db/model/Gdoc/GdocFactory.ts index 0a192d45dbe..5323228022a 100644 --- a/db/model/Gdoc/GdocFactory.ts +++ b/db/model/Gdoc/GdocFactory.ts @@ -44,6 +44,8 @@ import { KnexReadWriteTransaction, getImageMetadataByFilenames, getPublishedGdocPostsWithTags, + getParentTagArraysByChildName, + getBestBreadcrumbs, } from "../../db.js" import { enrichedBlocksToMarkdown } from "./enrichedToMarkdown.js" import { GdocAbout } from "./GdocAbout.js" @@ -204,7 +206,17 @@ export async function getGdocBaseObjectById( [id] ) gdoc.tags = tags + + if (tags.length) { + const parentTagArraysByChildName = + await getParentTagArraysByChildName(knex) + gdoc.breadcrumbs = getBestBreadcrumbs( + gdoc.tags, + parentTagArraysByChildName + ) + } } + return gdoc } @@ -292,6 +304,14 @@ export async function getPublishedGdocBaseObjectBySlug( [gdoc.id] ) gdoc.tags = tags + if (tags.length) { + const parentTagArraysByChildName = + await getParentTagArraysByChildName(knex) + gdoc.breadcrumbs = getBestBreadcrumbs( + gdoc.tags, + parentTagArraysByChildName + ) + } } return gdoc } @@ -575,7 +595,7 @@ export function getDbEnrichedGdocFromOwidGdoc( gdoc: OwidGdoc | GdocBase ): DbEnrichedPostGdoc { const enrichedGdoc = { - breadcrumbs: gdoc.breadcrumbs, + manualBreadcrumbs: gdoc.manualBreadcrumbs, content: gdoc.content, createdAt: gdoc.createdAt, id: gdoc.id, @@ -592,20 +612,22 @@ export function getDbEnrichedGdocFromOwidGdoc( export async function upsertGdoc( knex: KnexReadWriteTransaction, gdoc: OwidGdoc | GdocBase -): Promise { +): Promise { let sql = undefined try { const enrichedGdoc = getDbEnrichedGdocFromOwidGdoc(gdoc) - const rawPost = serializePostsGdocsRow(enrichedGdoc) + const { updatedAt: _, ...rawPost } = + serializePostsGdocsRow(enrichedGdoc) const query = knex .table(PostsGdocsTableName) .insert(rawPost) .onConflict("id") .merge() sql = query.toSQL() - const indices = await query + await query await updateDerivedGdocPostsComponents(knex, gdoc.id, gdoc.content.body) - return indices + const upserted = await getAndLoadGdocById(knex, gdoc.id) + return upserted } catch (e) { console.error(`Error occured in sql: ${sql}`, e) throw e diff --git a/db/tests/testHelpers.ts b/db/tests/testHelpers.ts index 47e3f231e55..8340b59bf75 100644 --- a/db/tests/testHelpers.ts +++ b/db/tests/testHelpers.ts @@ -7,6 +7,8 @@ import { MultiDimDataPagesTableName, MultiDimXChartConfigsTableName, PostsGdocsTableName, + TagGraphTableName, + TagsTableName, UsersTableName, VariablesTableName, } from "@ourworldindata/types" @@ -24,6 +26,8 @@ export const TABLES_IN_USE = [ DatasetsTableName, PostsGdocsTableName, UsersTableName, + TagGraphTableName, + TagsTableName, ] export async function cleanTestDb( diff --git a/packages/@ourworldindata/types/src/dbTypes/PostsGdocs.ts b/packages/@ourworldindata/types/src/dbTypes/PostsGdocs.ts index 1c7bc650b22..c5e0c63933d 100644 --- a/packages/@ourworldindata/types/src/dbTypes/PostsGdocs.ts +++ b/packages/@ourworldindata/types/src/dbTypes/PostsGdocs.ts @@ -8,7 +8,7 @@ import { MinimalTag } from "./Tags.js" export const PostsGdocsTableName = "posts_gdocs" export interface DbInsertPostGdoc { - breadcrumbs?: JsonString | null + manualBreadcrumbs?: JsonString | null content: JsonString createdAt: Date id: string @@ -23,10 +23,10 @@ export interface DbInsertPostGdoc { export type DbRawPostGdoc = Required export type DbEnrichedPostGdoc = Omit< DbRawPostGdoc, - "content" | "breadcrumbs" | "published" + "content" | "manualBreadcrumbs" | "published" > & { content: OwidGdocContent - breadcrumbs: BreadcrumbItem[] | null + manualBreadcrumbs: BreadcrumbItem[] | null published: boolean } @@ -62,7 +62,7 @@ export function parsePostsGdocsRow(row: DbRawPostGdoc): DbEnrichedPostGdoc { return { ...row, content: parsePostGdocContent(row.content), - breadcrumbs: parsePostsGdocsBreadcrumbs(row.breadcrumbs), + manualBreadcrumbs: parsePostsGdocsBreadcrumbs(row.manualBreadcrumbs), published: !!row.published, } } @@ -77,10 +77,17 @@ export function parsePostsGdocsWithTagsRow( } export function serializePostsGdocsRow(row: DbEnrichedPostGdoc): DbRawPostGdoc { + // Kind of awkward, but automatic breadcrumbs are part of OwidGdocBaseInterface, + // but not part of the DB schema. So we remove them here. + if ("breadcrumbs" in row) { + delete row.breadcrumbs + } return { ...row, content: serializePostGdocContent(row.content), - breadcrumbs: serializePostsGdocsBreadcrumbs(row.breadcrumbs), + manualBreadcrumbs: serializePostsGdocsBreadcrumbs( + row.manualBreadcrumbs + ), published: row.published ? 1 : 0, } } diff --git a/packages/@ourworldindata/types/src/dbTypes/TagsGraph.ts b/packages/@ourworldindata/types/src/dbTypes/TagsGraph.ts deleted file mode 100644 index 3e9aa929bd0..00000000000 --- a/packages/@ourworldindata/types/src/dbTypes/TagsGraph.ts +++ /dev/null @@ -1,8 +0,0 @@ -/** the entity in the `tags` table */ -export const TagsGraphTableName = "tag_graph" -export interface DbInsertTagGraph { - parentId: number - childId: number - weight?: number -} -export type DbPlainTag = Required diff --git a/packages/@ourworldindata/types/src/domainTypes/ContentGraph.ts b/packages/@ourworldindata/types/src/domainTypes/ContentGraph.ts index 34cf5a8fd8c..29c3d5a3bf8 100644 --- a/packages/@ourworldindata/types/src/domainTypes/ContentGraph.ts +++ b/packages/@ourworldindata/types/src/domainTypes/ContentGraph.ts @@ -12,6 +12,8 @@ export interface CategoryWithEntries { subcategories?: CategoryWithEntries[] } +export const TagGraphTableName = "tag_graph" + export type DbInsertTagGraphNode = { parentId: number childId: number diff --git a/packages/@ourworldindata/types/src/gdocTypes/Gdoc.ts b/packages/@ourworldindata/types/src/gdocTypes/Gdoc.ts index 318c5d1e5b2..06d27be4e5c 100644 --- a/packages/@ourworldindata/types/src/gdocTypes/Gdoc.ts +++ b/packages/@ourworldindata/types/src/gdocTypes/Gdoc.ts @@ -97,7 +97,7 @@ export interface OwidGdocBaseInterface { updatedAt: Date | null revisionId: string | null publicationContext: OwidGdocPublicationContext - breadcrumbs: BreadcrumbItem[] | null + manualBreadcrumbs: BreadcrumbItem[] | null linkedAuthors?: LinkedAuthor[] linkedDocuments?: Record linkedCharts?: Record @@ -106,6 +106,7 @@ export interface OwidGdocBaseInterface { relatedCharts?: RelatedChart[] tags?: MinimalTag[] | null errors?: OwidGdocErrorMessage[] + breadcrumbs?: BreadcrumbItem[] | null markdown: string | null } diff --git a/packages/@ourworldindata/types/src/index.ts b/packages/@ourworldindata/types/src/index.ts index a8a3f80db50..7c3f37e6213 100644 --- a/packages/@ourworldindata/types/src/index.ts +++ b/packages/@ourworldindata/types/src/index.ts @@ -127,6 +127,7 @@ export { export { TagGraphRootName, + TagGraphTableName, type CategoryWithEntries, type EntryMeta, type FlatTagGraph, diff --git a/packages/@ourworldindata/utils/src/Util.ts b/packages/@ourworldindata/utils/src/Util.ts index 547e9404216..5209f0e6702 100644 --- a/packages/@ourworldindata/utils/src/Util.ts +++ b/packages/@ourworldindata/utils/src/Util.ts @@ -177,6 +177,7 @@ import { GrapherInterface, DimensionProperty, GRAPHER_CHART_TYPES, + DbPlainTag, } from "@ourworldindata/types" import { PointVector } from "./PointVector.js" import * as React from "react" @@ -1930,6 +1931,19 @@ export function isFiniteWithGuard(value: unknown): value is number { return isFinite(value as any) } +// Use with getParentTagArraysByChildName to collapse all paths to the child into a single array of unique parent tag names +export function getUniqueNamesFromParentTagArrays( + parentTagArrays: Pick[][] +): string[] { + const tagNames = new Set( + parentTagArrays.flatMap((parentTagArray) => + parentTagArray.map((tag) => tag.name) + ) + ) + + return [...tagNames] +} + export function createTagGraph( tagGraphByParentId: Record, rootId: number diff --git a/site/Breadcrumb/Breadcrumb.tsx b/site/Breadcrumb/Breadcrumb.tsx index b8d5b92f9c2..d7460af295b 100644 --- a/site/Breadcrumb/Breadcrumb.tsx +++ b/site/Breadcrumb/Breadcrumb.tsx @@ -41,14 +41,13 @@ export const Breadcrumbs = ({ {items.map((item, idx) => { const isLast = idx === items.length - 1 - const breadcrumb = - !isLast && item.href ? ( - - {item.label} - - ) : ( - {item.label} - ) + const breadcrumb = item.href ? ( + + {item.label} + + ) : ( + {item.label} + ) return ( diff --git a/site/gdocs/components/centered-article.scss b/site/gdocs/components/centered-article.scss index ccce63cc548..835cf88d55f 100644 --- a/site/gdocs/components/centered-article.scss +++ b/site/gdocs/components/centered-article.scss @@ -490,11 +490,15 @@ h3.article-block__heading.has-supertitle { .centered-article-header__breadcrumbs-container { .centered-article-header__breadcrumbs { + @include body-3-regular; + @include sm-only { + font-size: 0.75rem; + } &.breadcrumbs-white { color: $white; } &.breadcrumbs-blue { - color: $blue-100; + color: $blue-90; } // Idea here: By using this positioning, we can have the breadcrumbs @@ -510,7 +514,6 @@ h3.article-block__heading.has-supertitle { margin-top: var(--header-breadcrumb-margin-top); margin-bottom: calc(-1.6em - var(--header-breadcrumb-margin-top)); - font-size: 1rem; a { @include owid-link-90; color: inherit; @@ -523,8 +526,9 @@ h3.article-block__heading.has-supertitle { .separator { margin: 0 0.5rem; - vertical-align: -0.0625em; - opacity: 0.7; + vertical-align: -0.05em; + opacity: 0.6; + color: inherit; } } } diff --git a/site/gdocs/pages/GdocPost.tsx b/site/gdocs/pages/GdocPost.tsx index 64468b80906..0fc0f892c41 100644 --- a/site/gdocs/pages/GdocPost.tsx +++ b/site/gdocs/pages/GdocPost.tsx @@ -47,6 +47,7 @@ export function GdocPost({ publishedAt, slug, breadcrumbs, + manualBreadcrumbs, }: OwidGdocPostInterface & { isPreviewing?: boolean }) { @@ -90,7 +91,7 @@ export function GdocPost({ {isDeprecated && content["deprecation-notice"] && (