diff --git a/adminSiteClient/GdocsBreadcrumbsInput.tsx b/adminSiteClient/GdocsManualBreadcrumbsInput.tsx
similarity index 59%
rename from adminSiteClient/GdocsBreadcrumbsInput.tsx
rename to adminSiteClient/GdocsManualBreadcrumbsInput.tsx
index 094e7188cba..86c382426d4 100644
--- a/adminSiteClient/GdocsBreadcrumbsInput.tsx
+++ b/adminSiteClient/GdocsManualBreadcrumbsInput.tsx
@@ -13,14 +13,12 @@ export const BreadcrumbLine = ({
item,
setItem,
removeItem,
- isLastBreadcrumbItem,
labelError,
hrefError,
}: {
item: BreadcrumbItem
setItem: (item: BreadcrumbItem) => void
removeItem: () => void
- isLastBreadcrumbItem?: boolean
labelError?: OwidGdocErrorMessage
hrefError?: OwidGdocErrorMessage
}) => {
@@ -30,15 +28,10 @@ export const BreadcrumbLine = ({
setItem({ ...item, href: e.target.value })
}
- disabled={isLastBreadcrumbItem}
status={hrefError?.type}
placeholder="e.g. /poverty"
/>
@@ -51,11 +44,7 @@ export const BreadcrumbLine = ({
onChange={(e) =>
setItem({ ...item, label: e.target.value })
}
- placeholder={
- isLastBreadcrumbItem
- ? "Concise version of the article's title"
- : undefined
- }
+ placeholder={"A topic name"}
status={labelError?.type}
/>
{labelError && }
@@ -70,7 +59,7 @@ export const BreadcrumbLine = ({
)
}
-export const GdocsBreadcrumbsInput = ({
+export const GdocsManualBreadcrumbsInput = ({
gdoc,
setCurrentGdoc,
errors,
@@ -79,62 +68,76 @@ export const GdocsBreadcrumbsInput = ({
setCurrentGdoc: (gdoc: OwidGdocPostInterface) => void
errors?: OwidGdocErrorMessage[]
}) => {
- const setBreadcrumbs = (breadcrumbs: BreadcrumbItem[] | undefined) => {
- if (breadcrumbs?.length) {
- // The last breadcrumb is not clickable, so we don't need a URL
- breadcrumbs[breadcrumbs.length - 1].href = undefined
- } else breadcrumbs = undefined
-
- setCurrentGdoc({ ...gdoc, breadcrumbs: breadcrumbs ?? null })
+ const setBreadcrumbs = (breadcrumbs: BreadcrumbItem[] | null) => {
+ setCurrentGdoc({ ...gdoc, manualBreadcrumbs: breadcrumbs })
}
const setItemAtIndex = (item: BreadcrumbItem, i: number) => {
- const breadcrumbs = [...(gdoc.breadcrumbs ?? [])]
+ if (!gdoc.manualBreadcrumbs) return
+
+ const breadcrumbs = [...gdoc.manualBreadcrumbs]
breadcrumbs[i] = item
setBreadcrumbs(breadcrumbs)
}
const removeItemAtIndex = (i: number) => {
- const breadcrumbs = [...(gdoc.breadcrumbs ?? [])]
+ if (!gdoc.manualBreadcrumbs) return
+
+ const breadcrumbs = [...gdoc.manualBreadcrumbs]
breadcrumbs.splice(i, 1)
- setBreadcrumbs(breadcrumbs)
+
+ setBreadcrumbs(breadcrumbs.length ? breadcrumbs : null)
}
return (
-
- Breadcrumbs
-
-
- {gdoc.breadcrumbs?.map((item, i) => (
+
Breadcrumbs
+ {!!gdoc.breadcrumbs?.length && !gdoc.manualBreadcrumbs?.length ? (
+
+
+ The breadcrumbs for this article will be automatically
+ generated, based on this article's tags and the tag
+ graph.
+
+
+ If you want to override these breadcrumbs, you can do so
+ here:
+
+
+ ) : (
+
+ Unless you are editing an SDG page, each breadcrumb should
+ have a URL and label.
+
+ )}
+
+ {gdoc.manualBreadcrumbs?.map((item, i) => (
setItemAtIndex(item, i)}
removeItem={() => removeItemAtIndex(i)}
key={i}
labelError={getPropertyMostCriticalError(
- `breadcrumbs[${i}].label`,
+ `manualBreadcrumbs[${i}].label`,
errors
)}
hrefError={getPropertyMostCriticalError(
- `breadcrumbs[${i}].href`,
+ `manualBreadcrumbs[${i}].href`,
errors
)}
- isLastBreadcrumbItem={i === gdoc.breadcrumbs!.length - 1}
/>
))}
- {!gdoc.breadcrumbs?.length && No breadcrumbs}
)
}
diff --git a/adminSiteClient/GdocsPreviewPage.tsx b/adminSiteClient/GdocsPreviewPage.tsx
index aa5ab051392..97b43fff73b 100644
--- a/adminSiteClient/GdocsPreviewPage.tsx
+++ b/adminSiteClient/GdocsPreviewPage.tsx
@@ -313,11 +313,6 @@ export const GdocsPreviewPage = ({ match, history }: GdocsMatchProps) => {
size="large"
onClose={onSettingsClose}
open={isSettingsOpen}
- extra={
-
- }
>
{tsMatch(currentGdoc)
.with(
diff --git a/adminSiteClient/GdocsSettingsForms.tsx b/adminSiteClient/GdocsSettingsForms.tsx
index a1911068063..5d555a45df1 100644
--- a/adminSiteClient/GdocsSettingsForms.tsx
+++ b/adminSiteClient/GdocsSettingsForms.tsx
@@ -15,7 +15,7 @@ import {
import { GdocsPublishedAt } from "./GdocsDateline.js"
import { GdocsPublicationContext } from "./GdocsPublicationContext.js"
import { Alert } from "antd"
-import { GdocsBreadcrumbsInput } from "./GdocsBreadcrumbsInput.js"
+import { GdocsManualBreadcrumbsInput } from "./GdocsManualBreadcrumbsInput.js"
const GdocCommonErrors = ({
errors,
@@ -155,7 +155,7 @@ export const GdocPostSettings = ({
errors={errors}
description="An optional property to override the excerpt of this post in our atom feed, which is used for the newsletter"
/>
- ,
boolean
> = {
- breadcrumbs: true,
+ breadcrumbs: true, // automatically generated, not actually possible to change via the admin preview
+ manualBreadcrumbs: true,
errors: true,
linkedAuthors: false,
linkedCharts: true,
diff --git a/adminSiteClient/gdocsValidation.ts b/adminSiteClient/gdocsValidation.ts
index 0ad8da3f38c..fe64982c675 100644
--- a/adminSiteClient/gdocsValidation.ts
+++ b/adminSiteClient/gdocsValidation.ts
@@ -140,12 +140,12 @@ function validateExcerpt(
}
}
-function validateBreadcrumbs(
+function validateManualBreadcrumbs(
gdoc: OwidGdocPostInterface,
errors: OwidGdocErrorMessage[]
) {
- if (gdoc.breadcrumbs) {
- for (const [i, breadcrumb] of gdoc.breadcrumbs.entries()) {
+ if (gdoc.manualBreadcrumbs) {
+ for (const [i, breadcrumb] of gdoc.manualBreadcrumbs.entries()) {
if (!breadcrumb.label) {
errors.push({
property: `breadcrumbs[${i}].label`,
@@ -155,7 +155,7 @@ function validateBreadcrumbs(
}
// Last item can be missing a href
- if (!breadcrumb.href && i !== gdoc.breadcrumbs.length - 1) {
+ if (!breadcrumb.href && i !== gdoc.manualBreadcrumbs.length - 1) {
errors.push({
property: `breadcrumbs[${i}].href`,
type: OwidGdocErrorMessageType.Error,
@@ -292,7 +292,7 @@ export const getErrors = (gdoc: OwidGdoc): OwidGdocErrorMessage[] => {
if (checkIsGdocPost(gdoc)) {
validateRefs(gdoc, errors)
validateExcerpt(gdoc, errors)
- validateBreadcrumbs(gdoc, errors)
+ validateManualBreadcrumbs(gdoc, errors)
validateAtomFields(gdoc, errors)
} else if (checkIsDataInsight(gdoc)) {
validateApprovedBy(gdoc, errors)
diff --git a/adminSiteServer/apiRoutes/gdocs.ts b/adminSiteServer/apiRoutes/gdocs.ts
index fbeb412e0d9..1a84b0e94da 100644
--- a/adminSiteServer/apiRoutes/gdocs.ts
+++ b/adminSiteServer/apiRoutes/gdocs.ts
@@ -178,11 +178,10 @@ export async function createOrUpdateGdoc(
: GdocLinkUpdateMode.DeleteOnly
)
- await upsertGdoc(trx, nextGdoc)
-
+ const upserted = await upsertGdoc(trx, nextGdoc)
await indexAndBakeGdocIfNeccesary(trx, res.locals.user, prevGdoc, nextGdoc)
- return nextGdoc
+ return upserted
}
async function validateTombstoneRelatedLinkUrl(
diff --git a/adminSiteServer/app.test.ts b/adminSiteServer/app.test.ts
index 69b96f537fc..f8c70d04aae 100644
--- a/adminSiteServer/app.test.ts
+++ b/adminSiteServer/app.test.ts
@@ -1,5 +1,5 @@
import { google } from "googleapis"
-import { jest } from "@jest/globals"
+import { beforeAll, jest } from "@jest/globals"
// Mock the google docs api to retrieve files from the test-files directory
// AFAICT, we have to do this directly after the import
// and before any other code that might import googleapis
@@ -43,7 +43,10 @@ import { Knex, knex } from "knex"
import { dbTestConfig } from "../db/tests/dbTestConfig.js"
import {
TransactionCloseMode,
+ getBestBreadcrumbs,
+ getParentTagArraysByChildName,
knexReadWriteTransaction,
+ knexReadonlyTransaction,
setKnexInstance,
} from "../db/db.js"
import { cleanTestDb, TABLES_IN_USE } from "../db/tests/testHelpers.js"
@@ -51,9 +54,19 @@ import {
ChartConfigsTableName,
ChartsTableName,
DatasetsTableName,
+ DbInsertTag,
+ DbInsertTagGraphNode,
MultiDimDataPagesTableName,
MultiDimXChartConfigsTableName,
+ TagsTableName,
+ TagGraphTableName,
VariablesTableName,
+ TagGraphRootName,
+ PostsGdocsTableName,
+ OwidGdocType,
+ DbInsertPostGdoc,
+ DbInsertPostGdocXTag,
+ PostsGdocsXTagsTableName,
} from "@ourworldindata/types"
import path from "path"
import fs from "fs"
@@ -920,3 +933,270 @@ describe("OwidAdminApp: indicator-level chart configs", () => {
expect(json.success).toBe(false)
})
})
+
+describe("OwidAdminApp: tag graph", () => {
+ // prettier-ignore
+ const dummyTags: DbInsertTag[] = [
+ { name: TagGraphRootName, id: 1 },
+ { name: "Energy and Environment", id: 2 },
+ { name: "Energy", slug: "energy", id: 3 },
+ { name: "Nuclear Energy", slug: "nuclear-energy", id: 4 },
+ { name: "CO2 & Greenhouse Gas Emissions", slug: "co2-and-greenhouse-gas-emissions", id: 5 },
+ ]
+
+ const dummyTagGraph: DbInsertTagGraphNode[] = [
+ { parentId: 1, childId: 2 },
+ { parentId: 2, childId: 3, weight: 110 },
+ { parentId: 2, childId: 5 },
+ { parentId: 3, childId: 4 },
+ { parentId: 5, childId: 4 },
+ ]
+
+ function makeDummyTopicPage(slug: string): DbInsertPostGdoc {
+ return {
+ slug,
+ content: JSON.stringify({
+ type: OwidGdocType.TopicPage,
+ authors: [] as string[],
+ }),
+ id: slug,
+ published: 1,
+ createdAt: new Date(),
+ publishedAt: new Date(),
+ markdown: "",
+ }
+ }
+ const dummyTopicPages: DbInsertPostGdoc[] = [
+ makeDummyTopicPage("energy"),
+ makeDummyTopicPage("nuclear-energy"),
+ makeDummyTopicPage("co2-and-greenhouse-gas-emissions"),
+ ]
+
+ const dummyPostTags: DbInsertPostGdocXTag[] = [
+ { gdocId: "energy", tagId: 3 },
+ { gdocId: "nuclear-energy", tagId: 4 },
+ { gdocId: "co2-and-greenhouse-gas-emissions", tagId: 5 },
+ ]
+
+ beforeEach(async () => {
+ await testKnexInstance!(TagsTableName).insert(dummyTags)
+ await testKnexInstance!(TagGraphTableName).insert(dummyTagGraph)
+ await testKnexInstance!(PostsGdocsTableName).insert(dummyTopicPages)
+ await testKnexInstance!(PostsGdocsXTagsTableName).insert(dummyPostTags)
+ })
+ it("should be able to see all the tags", async () => {
+ const tags = await fetchJsonFromAdminApi("/tags.json")
+ expect(tags).toEqual({
+ tags: [
+ {
+ id: 5,
+ isTopic: 1,
+ name: "CO2 & Greenhouse Gas Emissions",
+ slug: "co2-and-greenhouse-gas-emissions",
+ },
+ {
+ id: 3,
+ isTopic: 1,
+ name: "Energy",
+ slug: "energy",
+ },
+ {
+ id: 2,
+ isTopic: 0,
+ name: "Energy and Environment",
+ slug: null,
+ },
+ {
+ id: 4,
+ isTopic: 1,
+ name: "Nuclear Energy",
+ slug: "nuclear-energy",
+ },
+ {
+ id: 1,
+ isTopic: 0,
+ name: "tag-graph-root",
+ slug: null,
+ },
+ ],
+ })
+ })
+
+ it("should be able to generate a tag graph", async () => {
+ const json = await fetchJsonFromAdminApi("/flatTagGraph.json")
+ expect(json).toEqual({
+ "1": [
+ {
+ childId: 2,
+ isTopic: 0,
+ name: "Energy and Environment",
+ parentId: 1,
+ weight: 100,
+ },
+ ],
+ "2": [
+ {
+ childId: 3,
+ isTopic: 1,
+ name: "Energy",
+ parentId: 2,
+ weight: 110,
+ },
+ {
+ childId: 5,
+ isTopic: 1,
+ name: "CO2 & Greenhouse Gas Emissions",
+ parentId: 2,
+ weight: 100,
+ },
+ ],
+ "3": [
+ {
+ childId: 4,
+ isTopic: 1,
+ name: "Nuclear Energy",
+ parentId: 3,
+ weight: 100,
+ },
+ ],
+ "5": [
+ {
+ childId: 4,
+ isTopic: 1,
+ name: "Nuclear Energy",
+ parentId: 5,
+ weight: 100,
+ },
+ ],
+ __rootId: 1,
+ })
+ })
+
+ it("should be able to generate a set of breadcrumbs for a tag", async () => {
+ await knexReadonlyTransaction(
+ async (trx) => {
+ const parentTagArraysByChildName =
+ await getParentTagArraysByChildName(trx)
+ const breadcrumbs = getBestBreadcrumbs(
+ [
+ {
+ id: 4,
+ name: "Nuclear Energy",
+ slug: "nuclear-energy",
+ },
+ ],
+ parentTagArraysByChildName
+ )
+ // breadcrumb hrefs are env-dependent, so we just assert on the labels
+ const labelsOnly = breadcrumbs.map((b) => b.label)
+ expect(labelsOnly).toEqual(["Energy", "Nuclear Energy"])
+ },
+ TransactionCloseMode.KeepOpen,
+ testKnexInstance
+ )
+ })
+
+ it("should generate an optimal set of breadcrumbs when given multiple tags", async () => {
+ await knexReadonlyTransaction(
+ async (trx) => {
+ const parentTagArraysByChildName =
+ await getParentTagArraysByChildName(trx)
+ const breadcrumbs = getBestBreadcrumbs(
+ [
+ {
+ id: 4,
+ name: "Nuclear Energy",
+ slug: "nuclear-energy",
+ },
+ {
+ id: 5,
+ name: "CO2 & Greenhouse Gas Emissions",
+ slug: "co2-and-greenhouse-gas-emissions",
+ },
+ ],
+ parentTagArraysByChildName
+ )
+ // breadcrumb hrefs are env-dependent, so we just assert on the labels
+ const labelsOnly = breadcrumbs.map((b) => b.label)
+ expect(labelsOnly).toEqual(["Energy", "Nuclear Energy"])
+ },
+ TransactionCloseMode.KeepOpen,
+ testKnexInstance
+ )
+ })
+ it("should return an empty array when there are no topic tags in any of the tags' ancestors", async () => {
+ await knexReadonlyTransaction(
+ async (trx) => {
+ const parentTagArraysByChildName =
+ await getParentTagArraysByChildName(trx)
+ const breadcrumbs = getBestBreadcrumbs(
+ [
+ {
+ id: 2,
+ name: "Energy and Environment",
+ slug: "",
+ },
+ ],
+ parentTagArraysByChildName
+ )
+ // breadcrumb hrefs are env-dependent, so we just assert on the labels
+ const labelsOnly = breadcrumbs.map((b) => b.label)
+ expect(labelsOnly).toEqual([])
+ },
+ TransactionCloseMode.KeepOpen,
+ testKnexInstance
+ )
+ })
+ it("when there are two valid paths to a given tag, it selects the longest one", async () => {
+ await knexReadonlyTransaction(
+ async (trx) => {
+ // Here, Women's Employment has 2 paths:
+ // 1. Poverty and Economic Development > Women's Employment
+ // 2. Human Rights > Women's Rights > Women's Employment
+ // prettier-ignore
+ await testKnexInstance!(TagsTableName).insert([
+ { name: "Human Rights", id: 6 },
+ { name: "Women's Rights", slug: "womens-rights", id: 7 },
+ { name: "Women's Employment", slug: "womens-employment", id: 8 },
+ { name: "Poverty and Economic Development", id: 9 },
+ ])
+ await testKnexInstance!(TagGraphTableName).insert([
+ { parentId: 1, childId: 6 },
+ { parentId: 6, childId: 7 },
+ { parentId: 7, childId: 8 },
+ { parentId: 1, childId: 9 },
+ { parentId: 9, childId: 8 },
+ ])
+ await testKnexInstance!(PostsGdocsTableName).insert([
+ makeDummyTopicPage("womens-rights"),
+ makeDummyTopicPage("womens-employment"),
+ ])
+ await testKnexInstance!(PostsGdocsXTagsTableName).insert([
+ { gdocId: "womens-rights", tagId: 7 },
+ { gdocId: "womens-employment", tagId: 8 },
+ ])
+
+ const parentTagArraysByChildName =
+ await getParentTagArraysByChildName(trx)
+ const breadcrumbs = getBestBreadcrumbs(
+ [
+ {
+ id: 8,
+ name: "Women's Employment",
+ slug: "womens-employment",
+ },
+ ],
+ parentTagArraysByChildName
+ )
+ // breadcrumb hrefs are env-dependent, so we just assert on the labels
+ const labelsOnly = breadcrumbs.map((b) => b.label)
+ expect(labelsOnly).toEqual([
+ "Women's Rights",
+ "Women's Employment",
+ ])
+ },
+ TransactionCloseMode.KeepOpen,
+ testKnexInstance
+ )
+ })
+})
diff --git a/baker/SiteBaker.tsx b/baker/SiteBaker.tsx
index 52f9b4cb51a..3671b27f8c6 100644
--- a/baker/SiteBaker.tsx
+++ b/baker/SiteBaker.tsx
@@ -612,6 +612,9 @@ export class SiteBaker {
.getPublishedGdocPostsWithTags(knex)
.then((gdocs) => gdocs.map(gdocFromJSON))
+ const allParentTagArraysByChildName =
+ await db.getParentTagArraysByChildName(knex)
+
const gdocsToBake =
slugs !== undefined
? publishedGdocs.filter((gdoc) => slugs.includes(gdoc.slug))
@@ -647,6 +650,16 @@ export class SiteBaker {
publishedGdoc.linkedIndicators = attachments.linkedIndicators
publishedGdoc.linkedChartViews = attachments.linkedChartViews
+ if (
+ !publishedGdoc.manualBreadcrumbs?.length &&
+ publishedGdoc.tags?.length
+ ) {
+ publishedGdoc.breadcrumbs = db.getBestBreadcrumbs(
+ publishedGdoc.tags,
+ allParentTagArraysByChildName
+ )
+ }
+
// this is a no-op if the gdoc doesn't have an all-chart block
if ("loadRelatedCharts" in publishedGdoc) {
await publishedGdoc.loadRelatedCharts(knex)
diff --git a/baker/algolia/utils/charts.ts b/baker/algolia/utils/charts.ts
index 51f36eb0280..ac74e153536 100644
--- a/baker/algolia/utils/charts.ts
+++ b/baker/algolia/utils/charts.ts
@@ -13,6 +13,7 @@ import { isPathRedirectedToExplorer } from "../../../explorerAdminServer/Explore
import { ParsedChartRecordRow, RawChartRecordRow } from "./types.js"
import { excludeNullish } from "@ourworldindata/utils"
import { processAvailableEntities } from "./shared.js"
+import { getUniqueNamesFromParentTagArrays } from "@ourworldindata/utils/dist/Util.js"
const computeChartScore = (record: Omit): number => {
const { numRelatedArticles, views_7d } = record
@@ -99,7 +100,8 @@ export const getChartsRecords = async (
const pageviews = await getAnalyticsPageviewsByUrlObj(knex)
- const parentTagsByChildName = await db.getParentTagsByChildName(knex)
+ const parentTagArraysByChildName =
+ await db.getParentTagArraysByChildName(knex)
const records: ChartRecord[] = []
for (const c of parsedRows) {
@@ -121,10 +123,12 @@ export const getChartsRecords = async (
fontSize: 10, // doesn't matter, but is a mandatory field
}).plaintext
- const parentTags = c.tags.flatMap(
+ const parentTags = c.tags.flatMap((tagName) => {
+ const parentTagArrays = parentTagArraysByChildName[tagName]
// a chart can be tagged with a tag that isn't in the tag graph
- (tag) => parentTagsByChildName[tag] || []
- )
+ if (!parentTagArrays) return []
+ return getUniqueNamesFromParentTagArrays(parentTagArrays)
+ })
const record = {
objectID: c.id.toString(),
diff --git a/baker/algolia/utils/explorerViews.ts b/baker/algolia/utils/explorerViews.ts
index 84c2037fd4f..19b059688cc 100644
--- a/baker/algolia/utils/explorerViews.ts
+++ b/baker/algolia/utils/explorerViews.ts
@@ -49,6 +49,7 @@ import {
ChartRecord,
ChartRecordType,
} from "../../../site/search/searchTypes.js"
+import { getUniqueNamesFromParentTagArrays } from "@ourworldindata/utils/dist/Util.js"
export function explorerViewRecordToChartRecord(
e: ExplorerViewFinalRecord
@@ -698,7 +699,7 @@ async function getExplorersWithInheritedTags(trx: db.KnexReadonlyTransaction) {
// The DB query gets the tags for the explorer, but we need to add the parent tags as well.
// This isn't done in the query because it would require a recursive CTE.
// It's easier to write that query once, separately, and reuse it.
- const parentTags = await db.getParentTagsByChildName(trx)
+ const parentTagArrays = await db.getParentTagArraysByChildName(trx)
const publishedExplorersWithTags = []
for (const explorer of Object.values(explorersBySlug)) {
@@ -709,10 +710,14 @@ async function getExplorersWithInheritedTags(trx: db.KnexReadonlyTransaction) {
})
}
const tags = new Set()
- for (const tag of explorer.tags) {
- tags.add(tag)
- for (const parentTag of parentTags[tag]) {
- tags.add(parentTag)
+
+ for (const tagName of explorer.tags) {
+ tags.add(tagName)
+ const parentTagNames = getUniqueNamesFromParentTagArrays(
+ parentTagArrays[tagName]
+ )
+ for (const parentTagName of parentTagNames) {
+ tags.add(parentTagName)
}
}
diff --git a/db/db.ts b/db/db.ts
index ab9aa2e1ada..d64f6d29ad3 100644
--- a/db/db.ts
+++ b/db/db.ts
@@ -5,6 +5,7 @@ import {
GRAPHER_DB_PASS,
GRAPHER_DB_NAME,
GRAPHER_DB_PORT,
+ BAKED_BASE_URL,
} from "../settings/serverSettings.js"
import { registerExitHandler } from "./cleanup.js"
import { createTagGraph, keyBy } from "@ourworldindata/utils"
@@ -30,8 +31,10 @@ import {
MinimalExplorerInfo,
DbEnrichedImage,
DbEnrichedImageWithUserId,
+ MinimalTag,
+ BreadcrumbItem,
} from "@ourworldindata/types"
-import { groupBy, uniq } from "lodash"
+import { groupBy } from "lodash"
import { gdocFromJSON } from "./model/Gdoc/GdocFactory.js"
// Return the first match from a mysql query
@@ -387,7 +390,7 @@ export const getPublishedGdocPosts = async (
knex,
`-- sql
SELECT
- g.breadcrumbs,
+ g.manualBreadcrumbs,
g.content,
g.createdAt,
g.id,
@@ -423,7 +426,7 @@ export const getPublishedGdocPostsWithTags = async (
knex,
`-- sql
SELECT
- g.breadcrumbs,
+ g.manualBreadcrumbs,
g.content,
g.createdAt,
g.id,
@@ -536,43 +539,92 @@ export async function getFlatTagGraph(knex: KnexReadonlyTransaction): Promise<
return { ...tagGraphByParentId, __rootId: tagGraphRootIdResult.id }
}
-// DFS through the tag graph and create a map of parent tags for each child tag
-// e.g. { "Child": [ "Parent", "Grandparent" ], "Parent": [ "Grandparent" ] }
-// parent tags are listed in no particular order
-export async function getParentTagsByChildName(
+// DFS through the tag graph and track all paths from a child to the root
+// e.g. { "childTag": [ [parentTag1, parentTag2], [parentTag3] ] }
+// Use this with getUniqueNamesFromParentTagArrays to get Record instead
+export async function getParentTagArraysByChildName(
trx: KnexReadonlyTransaction
-): Promise> {
+): Promise<
+ Record[][]>
+> {
const { __rootId, ...flatTagGraph } = await getFlatTagGraph(trx)
const tagGraph = createTagGraph(flatTagGraph, __rootId)
-
- const tagsById = await trx("tags")
- .select("id", "name")
+ const tagsById = await trx("tags")
+ .select("id", "name", "slug")
.then((tags) => keyBy(tags, "id"))
- const parentTagsByChildName: Record<
+ const pathsByChildName: Record<
DbPlainTag["name"],
- DbPlainTag["name"][]
+ Pick[][]
> = {}
- function trackParents(node: TagGraphNode): void {
+ function trackAllPaths(
+ node: TagGraphNode,
+ currentPath: Pick[] = []
+ ): void {
+ const currentTag = tagsById[node.id]
+ const newPath = [...currentPath, currentTag]
+
+ // Don't add paths for root node
+ if (node.id !== __rootId) {
+ const nodeName = currentTag.name
+ if (!pathsByChildName[nodeName]) {
+ pathsByChildName[nodeName] = []
+ }
+
+ // Add the complete path (excluding root)
+ pathsByChildName[nodeName].push(newPath.slice(1))
+ }
+
for (const child of node.children) {
- trackParents(child)
+ trackAllPaths(child, newPath)
}
+ }
- const preexistingParents = parentTagsByChildName[node.name] ?? []
- // node.path is an array of tag ids from the root to the current node
- // slice to remove the root node and the current node, then map them into tag names
- const newParents = node.path.slice(1, -1).map((id) => tagsById[id].name)
+ trackAllPaths(tagGraph)
- parentTagsByChildName[node.name] = uniq([
- ...preexistingParents,
- ...newParents,
- ])
+ return pathsByChildName
+}
+
+export function getBestBreadcrumbs(
+ tags: MinimalTag[],
+ parentTagArraysByChildName: Record<
+ string,
+ Pick[][]
+ >
+): BreadcrumbItem[] {
+ // For each tag, find the best path according to our criteria
+ // e.g. { "Nuclear Energy ": ["Energy and Environment", "Energy"], "Air Pollution": ["Energy and Environment"] }
+ const result = new Map[]>()
+
+ for (const tag of tags) {
+ const paths = parentTagArraysByChildName[tag.name]
+ if (paths && paths.length > 0) {
+ // Since getFlatTagGraph already orders by weight DESC and name ASC,
+ // the first path in the array will be our best path
+ result.set(tag.id, paths[0])
+ }
}
- trackParents(tagGraph)
+ // Only keep the topics in the paths, because only topics are clickable as breadcrumbs
+ const topicsOnly = Array.from(result.values()).reduce(
+ (acc, path) => {
+ return [...acc, path.filter((tag) => tag.slug)]
+ },
+ [] as Pick[][]
+ )
+
+ // Pick the longest path from result, assuming that the longest path is the best
+ const longestPath = topicsOnly.reduce((best, path) => {
+ return path.length > best.length ? path : best
+ }, [])
+
+ const breadcrumbs = longestPath.map((tag) => ({
+ label: tag.name,
+ href: `${BAKED_BASE_URL}/${tag.slug}`,
+ }))
- return parentTagsByChildName
+ return breadcrumbs
}
export async function updateTagGraph(
diff --git a/db/migrateWpPostsToArchieMl.ts b/db/migrateWpPostsToArchieMl.ts
index a0938d6c546..e3a9647c39d 100644
--- a/db/migrateWpPostsToArchieMl.ts
+++ b/db/migrateWpPostsToArchieMl.ts
@@ -256,7 +256,7 @@ const migrate = async (trx: db.KnexReadWriteTransaction): Promise => {
updatedAt: post.updated_at_in_wordpress,
publicationContext: OwidGdocPublicationContext.listed, // TODO: not all articles are listed, take this from the DB
revisionId: null,
- breadcrumbs: null,
+ manualBreadcrumbs: null,
markdown: null,
}
const archieMlStatsContent = {
diff --git a/db/migration/1736443943021-RemoveManualBreadcrumbsFromNonSDGArticles.ts b/db/migration/1736443943021-RemoveManualBreadcrumbsFromNonSDGArticles.ts
new file mode 100644
index 00000000000..0de1dbac921
--- /dev/null
+++ b/db/migration/1736443943021-RemoveManualBreadcrumbsFromNonSDGArticles.ts
@@ -0,0 +1,49 @@
+import { MigrationInterface, QueryRunner } from "typeorm"
+
+export class RemoveManualBreadcrumbsFromNonSDGArticles1736443943021
+ implements MigrationInterface
+{
+ public async up(queryRunner: QueryRunner): Promise {
+ /* As of 09-01-2025, the articles affected by this migration are:
+ - https://ourworldindata.org/renewable-energy
+ - https://ourworldindata.org/palm-oil
+ - https://ourworldindata.org/water-sanitation-2020-update
+ - https://ourworldindata.org/fossil-fuels
+ - https://ourworldindata.org/inequality-co2
+ - https://ourworldindata.org/energy-gdp-decoupling
+ - https://ourworldindata.org/adopting-slower-growing-breeds-of-chicken-would-reduce-animal-suffering-significantly
+ - https://ourworldindata.org/number-without-electricity
+ - https://ourworldindata.org/hygiene
+ - https://ourworldindata.org/per-capita-energy
+ - https://ourworldindata.org/energy-definitions
+ - https://ourworldindata.org/land-use-per-energy-source
+ - https://ourworldindata.org/do-better-cages-or-cage-free-environments-really-improve-the-lives-of-hens
+ - https://ourworldindata.org/deforestation
+ - https://ourworldindata.org/energy-offshoring
+ - https://ourworldindata.org/electricity-mix
+ - https://ourworldindata.org/clean-water
+ - https://ourworldindata.org/global-energy-200-years
+ - https://ourworldindata.org/what-are-drivers-deforestation
+ - https://ourworldindata.org/energy-substitution-method
+ - https://ourworldindata.org/energy-missing-data
+ - https://ourworldindata.org/nuclear-energy
+ - https://ourworldindata.org/how-many-animals-get-slaughtered-every-day
+ - https://ourworldindata.org/energy-ladder
+ - https://ourworldindata.org/energy-access
+ - https://ourworldindata.org/sanitation
+ - https://ourworldindata.org/how-many-animals-are-factory-farmed
+ - https://ourworldindata.org/decarbonizing-energy-progress
+ */
+ await queryRunner.query(
+ `-- sql
+ UPDATE posts_gdocs SET breadcrumbs = NULL
+ WHERE type = 'article'
+ AND breadcrumbs IS NOT NULL
+ AND slug NOT LIKE "%sdgs%"`
+ )
+ }
+
+ public async down(): Promise {
+ // no-op
+ }
+}
diff --git a/db/migration/1736455365750-RenameBreadcrumbsColumn.ts b/db/migration/1736455365750-RenameBreadcrumbsColumn.ts
new file mode 100644
index 00000000000..e23ced84201
--- /dev/null
+++ b/db/migration/1736455365750-RenameBreadcrumbsColumn.ts
@@ -0,0 +1,15 @@
+import { MigrationInterface, QueryRunner } from "typeorm"
+
+export class RenameBreadcrumbsColumn1736455365750
+ implements MigrationInterface
+{
+ public async up(queryRunner: QueryRunner): Promise {
+ await queryRunner.query(`-- sql
+ ALTER TABLE posts_gdocs RENAME COLUMN breadcrumbs TO manualBreadcrumbs`)
+ }
+
+ public async down(queryRunner: QueryRunner): Promise {
+ await queryRunner.query(`-- sql
+ ALTER TABLE posts_gdocs RENAME COLUMN manualBreadcrumbs TO breadcrumbs`)
+ }
+}
diff --git a/db/migration/1736969067156-PostsGdocsUpdatedAt.ts b/db/migration/1736969067156-PostsGdocsUpdatedAt.ts
new file mode 100644
index 00000000000..a4412b4d288
--- /dev/null
+++ b/db/migration/1736969067156-PostsGdocsUpdatedAt.ts
@@ -0,0 +1,19 @@
+import { MigrationInterface, QueryRunner } from "typeorm"
+
+export class PostsGdocsUpdatedAt1736969067156 implements MigrationInterface {
+ public async up(queryRunner: QueryRunner): Promise {
+ await queryRunner.query(`-- sql
+ ALTER TABLE posts_gdocs
+ MODIFY COLUMN createdAt DATETIME DEFAULT CURRENT_TIMESTAMP,
+ MODIFY COLUMN updatedAt DATETIME DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP
+ `)
+ }
+
+ public async down(queryRunner: QueryRunner): Promise {
+ await queryRunner.query(`-- sql
+ ALTER TABLE posts_gdocs
+ MODIFY COLUMN createdAt DATETIME DEFAULT CURRENT_TIMESTAMP,
+ MODIFY COLUMN updatedAt DATETIME DEFAULT CURRENT_TIMESTAMP
+ `)
+ }
+}
diff --git a/db/model/Gdoc/GdocBase.ts b/db/model/Gdoc/GdocBase.ts
index 173fd06e6fb..9fc97412934 100644
--- a/db/model/Gdoc/GdocBase.ts
+++ b/db/model/Gdoc/GdocBase.ts
@@ -82,6 +82,7 @@ export class GdocBase implements OwidGdocBaseInterface {
publicationContext: OwidGdocPublicationContext =
OwidGdocPublicationContext.unlisted
breadcrumbs: BreadcrumbItem[] | null = null
+ manualBreadcrumbs: BreadcrumbItem[] | null = null
tags: DbPlainTag[] | null = null
errors: OwidGdocErrorMessage[] = []
donors: string[] = []
diff --git a/db/model/Gdoc/GdocFactory.ts b/db/model/Gdoc/GdocFactory.ts
index 0a192d45dbe..5323228022a 100644
--- a/db/model/Gdoc/GdocFactory.ts
+++ b/db/model/Gdoc/GdocFactory.ts
@@ -44,6 +44,8 @@ import {
KnexReadWriteTransaction,
getImageMetadataByFilenames,
getPublishedGdocPostsWithTags,
+ getParentTagArraysByChildName,
+ getBestBreadcrumbs,
} from "../../db.js"
import { enrichedBlocksToMarkdown } from "./enrichedToMarkdown.js"
import { GdocAbout } from "./GdocAbout.js"
@@ -204,7 +206,17 @@ export async function getGdocBaseObjectById(
[id]
)
gdoc.tags = tags
+
+ if (tags.length) {
+ const parentTagArraysByChildName =
+ await getParentTagArraysByChildName(knex)
+ gdoc.breadcrumbs = getBestBreadcrumbs(
+ gdoc.tags,
+ parentTagArraysByChildName
+ )
+ }
}
+
return gdoc
}
@@ -292,6 +304,14 @@ export async function getPublishedGdocBaseObjectBySlug(
[gdoc.id]
)
gdoc.tags = tags
+ if (tags.length) {
+ const parentTagArraysByChildName =
+ await getParentTagArraysByChildName(knex)
+ gdoc.breadcrumbs = getBestBreadcrumbs(
+ gdoc.tags,
+ parentTagArraysByChildName
+ )
+ }
}
return gdoc
}
@@ -575,7 +595,7 @@ export function getDbEnrichedGdocFromOwidGdoc(
gdoc: OwidGdoc | GdocBase
): DbEnrichedPostGdoc {
const enrichedGdoc = {
- breadcrumbs: gdoc.breadcrumbs,
+ manualBreadcrumbs: gdoc.manualBreadcrumbs,
content: gdoc.content,
createdAt: gdoc.createdAt,
id: gdoc.id,
@@ -592,20 +612,22 @@ export function getDbEnrichedGdocFromOwidGdoc(
export async function upsertGdoc(
knex: KnexReadWriteTransaction,
gdoc: OwidGdoc | GdocBase
-): Promise {
+): Promise {
let sql = undefined
try {
const enrichedGdoc = getDbEnrichedGdocFromOwidGdoc(gdoc)
- const rawPost = serializePostsGdocsRow(enrichedGdoc)
+ const { updatedAt: _, ...rawPost } =
+ serializePostsGdocsRow(enrichedGdoc)
const query = knex
.table(PostsGdocsTableName)
.insert(rawPost)
.onConflict("id")
.merge()
sql = query.toSQL()
- const indices = await query
+ await query
await updateDerivedGdocPostsComponents(knex, gdoc.id, gdoc.content.body)
- return indices
+ const upserted = await getAndLoadGdocById(knex, gdoc.id)
+ return upserted
} catch (e) {
console.error(`Error occured in sql: ${sql}`, e)
throw e
diff --git a/db/tests/testHelpers.ts b/db/tests/testHelpers.ts
index 47e3f231e55..8340b59bf75 100644
--- a/db/tests/testHelpers.ts
+++ b/db/tests/testHelpers.ts
@@ -7,6 +7,8 @@ import {
MultiDimDataPagesTableName,
MultiDimXChartConfigsTableName,
PostsGdocsTableName,
+ TagGraphTableName,
+ TagsTableName,
UsersTableName,
VariablesTableName,
} from "@ourworldindata/types"
@@ -24,6 +26,8 @@ export const TABLES_IN_USE = [
DatasetsTableName,
PostsGdocsTableName,
UsersTableName,
+ TagGraphTableName,
+ TagsTableName,
]
export async function cleanTestDb(
diff --git a/packages/@ourworldindata/types/src/dbTypes/PostsGdocs.ts b/packages/@ourworldindata/types/src/dbTypes/PostsGdocs.ts
index 1c7bc650b22..c5e0c63933d 100644
--- a/packages/@ourworldindata/types/src/dbTypes/PostsGdocs.ts
+++ b/packages/@ourworldindata/types/src/dbTypes/PostsGdocs.ts
@@ -8,7 +8,7 @@ import { MinimalTag } from "./Tags.js"
export const PostsGdocsTableName = "posts_gdocs"
export interface DbInsertPostGdoc {
- breadcrumbs?: JsonString | null
+ manualBreadcrumbs?: JsonString | null
content: JsonString
createdAt: Date
id: string
@@ -23,10 +23,10 @@ export interface DbInsertPostGdoc {
export type DbRawPostGdoc = Required
export type DbEnrichedPostGdoc = Omit<
DbRawPostGdoc,
- "content" | "breadcrumbs" | "published"
+ "content" | "manualBreadcrumbs" | "published"
> & {
content: OwidGdocContent
- breadcrumbs: BreadcrumbItem[] | null
+ manualBreadcrumbs: BreadcrumbItem[] | null
published: boolean
}
@@ -62,7 +62,7 @@ export function parsePostsGdocsRow(row: DbRawPostGdoc): DbEnrichedPostGdoc {
return {
...row,
content: parsePostGdocContent(row.content),
- breadcrumbs: parsePostsGdocsBreadcrumbs(row.breadcrumbs),
+ manualBreadcrumbs: parsePostsGdocsBreadcrumbs(row.manualBreadcrumbs),
published: !!row.published,
}
}
@@ -77,10 +77,17 @@ export function parsePostsGdocsWithTagsRow(
}
export function serializePostsGdocsRow(row: DbEnrichedPostGdoc): DbRawPostGdoc {
+ // Kind of awkward, but automatic breadcrumbs are part of OwidGdocBaseInterface,
+ // but not part of the DB schema. So we remove them here.
+ if ("breadcrumbs" in row) {
+ delete row.breadcrumbs
+ }
return {
...row,
content: serializePostGdocContent(row.content),
- breadcrumbs: serializePostsGdocsBreadcrumbs(row.breadcrumbs),
+ manualBreadcrumbs: serializePostsGdocsBreadcrumbs(
+ row.manualBreadcrumbs
+ ),
published: row.published ? 1 : 0,
}
}
diff --git a/packages/@ourworldindata/types/src/dbTypes/TagsGraph.ts b/packages/@ourworldindata/types/src/dbTypes/TagsGraph.ts
deleted file mode 100644
index 3e9aa929bd0..00000000000
--- a/packages/@ourworldindata/types/src/dbTypes/TagsGraph.ts
+++ /dev/null
@@ -1,8 +0,0 @@
-/** the entity in the `tags` table */
-export const TagsGraphTableName = "tag_graph"
-export interface DbInsertTagGraph {
- parentId: number
- childId: number
- weight?: number
-}
-export type DbPlainTag = Required
diff --git a/packages/@ourworldindata/types/src/domainTypes/ContentGraph.ts b/packages/@ourworldindata/types/src/domainTypes/ContentGraph.ts
index 34cf5a8fd8c..29c3d5a3bf8 100644
--- a/packages/@ourworldindata/types/src/domainTypes/ContentGraph.ts
+++ b/packages/@ourworldindata/types/src/domainTypes/ContentGraph.ts
@@ -12,6 +12,8 @@ export interface CategoryWithEntries {
subcategories?: CategoryWithEntries[]
}
+export const TagGraphTableName = "tag_graph"
+
export type DbInsertTagGraphNode = {
parentId: number
childId: number
diff --git a/packages/@ourworldindata/types/src/gdocTypes/Gdoc.ts b/packages/@ourworldindata/types/src/gdocTypes/Gdoc.ts
index 318c5d1e5b2..06d27be4e5c 100644
--- a/packages/@ourworldindata/types/src/gdocTypes/Gdoc.ts
+++ b/packages/@ourworldindata/types/src/gdocTypes/Gdoc.ts
@@ -97,7 +97,7 @@ export interface OwidGdocBaseInterface {
updatedAt: Date | null
revisionId: string | null
publicationContext: OwidGdocPublicationContext
- breadcrumbs: BreadcrumbItem[] | null
+ manualBreadcrumbs: BreadcrumbItem[] | null
linkedAuthors?: LinkedAuthor[]
linkedDocuments?: Record
linkedCharts?: Record
@@ -106,6 +106,7 @@ export interface OwidGdocBaseInterface {
relatedCharts?: RelatedChart[]
tags?: MinimalTag[] | null
errors?: OwidGdocErrorMessage[]
+ breadcrumbs?: BreadcrumbItem[] | null
markdown: string | null
}
diff --git a/packages/@ourworldindata/types/src/index.ts b/packages/@ourworldindata/types/src/index.ts
index a8a3f80db50..7c3f37e6213 100644
--- a/packages/@ourworldindata/types/src/index.ts
+++ b/packages/@ourworldindata/types/src/index.ts
@@ -127,6 +127,7 @@ export {
export {
TagGraphRootName,
+ TagGraphTableName,
type CategoryWithEntries,
type EntryMeta,
type FlatTagGraph,
diff --git a/packages/@ourworldindata/utils/src/Util.ts b/packages/@ourworldindata/utils/src/Util.ts
index 547e9404216..5209f0e6702 100644
--- a/packages/@ourworldindata/utils/src/Util.ts
+++ b/packages/@ourworldindata/utils/src/Util.ts
@@ -177,6 +177,7 @@ import {
GrapherInterface,
DimensionProperty,
GRAPHER_CHART_TYPES,
+ DbPlainTag,
} from "@ourworldindata/types"
import { PointVector } from "./PointVector.js"
import * as React from "react"
@@ -1930,6 +1931,19 @@ export function isFiniteWithGuard(value: unknown): value is number {
return isFinite(value as any)
}
+// Use with getParentTagArraysByChildName to collapse all paths to the child into a single array of unique parent tag names
+export function getUniqueNamesFromParentTagArrays(
+ parentTagArrays: Pick[][]
+): string[] {
+ const tagNames = new Set(
+ parentTagArrays.flatMap((parentTagArray) =>
+ parentTagArray.map((tag) => tag.name)
+ )
+ )
+
+ return [...tagNames]
+}
+
export function createTagGraph(
tagGraphByParentId: Record,
rootId: number
diff --git a/site/Breadcrumb/Breadcrumb.tsx b/site/Breadcrumb/Breadcrumb.tsx
index b8d5b92f9c2..d7460af295b 100644
--- a/site/Breadcrumb/Breadcrumb.tsx
+++ b/site/Breadcrumb/Breadcrumb.tsx
@@ -41,14 +41,13 @@ export const Breadcrumbs = ({
{items.map((item, idx) => {
const isLast = idx === items.length - 1
- const breadcrumb =
- !isLast && item.href ? (
-
- {item.label}
-
- ) : (
- {item.label}
- )
+ const breadcrumb = item.href ? (
+
+ {item.label}
+
+ ) : (
+ {item.label}
+ )
return (
diff --git a/site/gdocs/components/centered-article.scss b/site/gdocs/components/centered-article.scss
index ccce63cc548..835cf88d55f 100644
--- a/site/gdocs/components/centered-article.scss
+++ b/site/gdocs/components/centered-article.scss
@@ -490,11 +490,15 @@ h3.article-block__heading.has-supertitle {
.centered-article-header__breadcrumbs-container {
.centered-article-header__breadcrumbs {
+ @include body-3-regular;
+ @include sm-only {
+ font-size: 0.75rem;
+ }
&.breadcrumbs-white {
color: $white;
}
&.breadcrumbs-blue {
- color: $blue-100;
+ color: $blue-90;
}
// Idea here: By using this positioning, we can have the breadcrumbs
@@ -510,7 +514,6 @@ h3.article-block__heading.has-supertitle {
margin-top: var(--header-breadcrumb-margin-top);
margin-bottom: calc(-1.6em - var(--header-breadcrumb-margin-top));
- font-size: 1rem;
a {
@include owid-link-90;
color: inherit;
@@ -523,8 +526,9 @@ h3.article-block__heading.has-supertitle {
.separator {
margin: 0 0.5rem;
- vertical-align: -0.0625em;
- opacity: 0.7;
+ vertical-align: -0.05em;
+ opacity: 0.6;
+ color: inherit;
}
}
}
diff --git a/site/gdocs/pages/GdocPost.tsx b/site/gdocs/pages/GdocPost.tsx
index 64468b80906..0fc0f892c41 100644
--- a/site/gdocs/pages/GdocPost.tsx
+++ b/site/gdocs/pages/GdocPost.tsx
@@ -47,6 +47,7 @@ export function GdocPost({
publishedAt,
slug,
breadcrumbs,
+ manualBreadcrumbs,
}: OwidGdocPostInterface & {
isPreviewing?: boolean
}) {
@@ -90,7 +91,7 @@ export function GdocPost({
{isDeprecated && content["deprecation-notice"] && (