Skip to content

Commit

Permalink
🔨 tweak related research query
Browse files Browse the repository at this point in the history
This deduplicates by url, sorts authors and only uses full chart embeds and ignores plain links to charts
  • Loading branch information
danyx23 committed Oct 13, 2023
1 parent aebfc8a commit 35db5ff
Showing 1 changed file with 16 additions and 4 deletions.
20 changes: 16 additions & 4 deletions db/wpdb.ts
Original file line number Diff line number Diff line change
Expand Up @@ -653,6 +653,7 @@ export const getRelatedChartsForVariable = async (

interface RelatedResearchQueryResult {
linkTargetSlug: string
componentType: string
chartSlug: string
title: string
postSlug: string
Expand All @@ -670,6 +671,7 @@ export const getRelatedResearchAndWritingForVariable = async (
select
distinct
pl.target as linkTargetSlug,
pl.componentType as componentType,
c.slug as chartSlug,
p.title as title,
p.slug as postSlug,
Expand All @@ -694,6 +696,7 @@ export const getRelatedResearchAndWritingForVariable = async (
pg.id = p.gdocSuccessorId
where
pl.linkType = 'grapher'
and componentType = 'src' -- this filters out links in tags and keeps only embedded charts
and cd.variableId = ?
and cd.property in ('x', 'y') -- ignore cases where the indicator is size, color etc
and p.status = 'publish' -- only use published wp charts
Expand All @@ -707,6 +710,7 @@ export const getRelatedResearchAndWritingForVariable = async (
select
distinct
pl.target as linkTargetSlug,
pl.componentType as componentType,
c.slug as chartSlug,
p.content ->> '$.title' as title,
p.slug as postSlug,
Expand All @@ -729,6 +733,7 @@ export const getRelatedResearchAndWritingForVariable = async (
pv.url = concat('https://ourworldindata.org/', p.slug )
where
pl.linkType = 'grapher'
and componentType = 'chart' -- this filters out links in tags and keeps only embedded charts
and cd.variableId = ?
and cd.property in ('x', 'y') -- ignore cases where the indicator is size, color etc
and p.published = 1`,
Expand All @@ -741,14 +746,17 @@ export const getRelatedResearchAndWritingForVariable = async (
// but it seemed easier to understand if we do the sort here
const sorted = sortBy(combined, (post) => -post.pageviews)

return sorted.map((post) => {
const allSortedRelatedResearch = sorted.map((post) => {
const parsedAuthors = JSON.parse(post.authors)
// The authors in the gdocs table are just a list of strings, but in the wp_posts table
// they are a list of objects with an "author" key and an "order" key. We want to normalize this so that
// we can just use the same code to display the authors in both cases.
const authors = parsedAuthors.map((author: any) =>
!isString(author) ? author.author : author
)
let authors
if (parsedAuthors.length > 0 && !isString(parsedAuthors[0])) {
authors = sortBy(parsedAuthors, (author) => author.order).map(
(author: any) => author.author
)
} else authors = parsedAuthors
return {
title: post.title,
url: `/${post.postSlug}`,
Expand All @@ -757,6 +765,10 @@ export const getRelatedResearchAndWritingForVariable = async (
imageUrl: post.thumbnail,
}
})
// the queries above use distinct but because of the information we pull in if the same piece of research
// uses different charts that all use a single indicator we would get duplicates for the post to link to so
// here we deduplicate by url. The first item is retained by uniqBy, latter ones are discarded.
return uniqBy(allSortedRelatedResearch, "url")
}

export const getRelatedArticles = async (
Expand Down

0 comments on commit 35db5ff

Please sign in to comment.