From 35db5ff31e17d3c1b85e1bb6658d3fa074a2bfb3 Mon Sep 17 00:00:00 2001 From: Daniel Bachler Date: Fri, 13 Oct 2023 13:37:17 +0200 Subject: [PATCH] :hammer: tweak related research query This deduplicates by url, sorts authors and only uses full chart embeds and ignores plain links to charts --- db/wpdb.ts | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/db/wpdb.ts b/db/wpdb.ts index 5df0030fc84..0be61f0b173 100644 --- a/db/wpdb.ts +++ b/db/wpdb.ts @@ -653,6 +653,7 @@ export const getRelatedChartsForVariable = async ( interface RelatedResearchQueryResult { linkTargetSlug: string + componentType: string chartSlug: string title: string postSlug: string @@ -670,6 +671,7 @@ export const getRelatedResearchAndWritingForVariable = async ( select distinct pl.target as linkTargetSlug, + pl.componentType as componentType, c.slug as chartSlug, p.title as title, p.slug as postSlug, @@ -694,6 +696,7 @@ export const getRelatedResearchAndWritingForVariable = async ( pg.id = p.gdocSuccessorId where pl.linkType = 'grapher' + and componentType = 'src' -- this filters out links in tags and keeps only embedded charts and cd.variableId = ? and cd.property in ('x', 'y') -- ignore cases where the indicator is size, color etc and p.status = 'publish' -- only use published wp charts @@ -707,6 +710,7 @@ export const getRelatedResearchAndWritingForVariable = async ( select distinct pl.target as linkTargetSlug, + pl.componentType as componentType, c.slug as chartSlug, p.content ->> '$.title' as title, p.slug as postSlug, @@ -729,6 +733,7 @@ export const getRelatedResearchAndWritingForVariable = async ( pv.url = concat('https://ourworldindata.org/', p.slug ) where pl.linkType = 'grapher' + and componentType = 'chart' -- this filters out links in tags and keeps only embedded charts and cd.variableId = ? and cd.property in ('x', 'y') -- ignore cases where the indicator is size, color etc and p.published = 1`, @@ -741,14 +746,17 @@ export const getRelatedResearchAndWritingForVariable = async ( // but it seemed easier to understand if we do the sort here const sorted = sortBy(combined, (post) => -post.pageviews) - return sorted.map((post) => { + const allSortedRelatedResearch = sorted.map((post) => { const parsedAuthors = JSON.parse(post.authors) // The authors in the gdocs table are just a list of strings, but in the wp_posts table // they are a list of objects with an "author" key and an "order" key. We want to normalize this so that // we can just use the same code to display the authors in both cases. - const authors = parsedAuthors.map((author: any) => - !isString(author) ? author.author : author - ) + let authors + if (parsedAuthors.length > 0 && !isString(parsedAuthors[0])) { + authors = sortBy(parsedAuthors, (author) => author.order).map( + (author: any) => author.author + ) + } else authors = parsedAuthors return { title: post.title, url: `/${post.postSlug}`, @@ -757,6 +765,10 @@ export const getRelatedResearchAndWritingForVariable = async ( imageUrl: post.thumbnail, } }) + // the queries above use distinct but because of the information we pull in if the same piece of research + // uses different charts that all use a single indicator we would get duplicates for the post to link to so + // here we deduplicate by url. The first item is retained by uniqBy, latter ones are discarded. + return uniqBy(allSortedRelatedResearch, "url") } export const getRelatedArticles = async (