From 0585e2ef013aad054b07c594a1af39bd0dabf795 Mon Sep 17 00:00:00 2001 From: Deadbeef Date: Tue, 24 Oct 2023 23:25:34 +0800 Subject: [PATCH] update pagelinks queries to use linktarget --- dbreps2/src/enwiki/boteditcount.rs | 5 +++-- dbreps2/src/enwiki/editcount.rs | 7 ++++--- dbreps2/src/enwiki/linkedmiscapitalizations.rs | 5 +++-- dbreps2/src/enwiki/linkedmisspellings.rs | 5 +++-- dbreps2/src/enwiki/templatedisambigs.rs | 13 +++++++------ dbreps2/src/general/linkedredlinkedcats.rs | 5 +++-- dbreps2/src/general/userlinksinarticles.rs | 3 ++- 7 files changed, 25 insertions(+), 18 deletions(-) diff --git a/dbreps2/src/enwiki/boteditcount.rs b/dbreps2/src/enwiki/boteditcount.rs index c546e250..db721c13 100644 --- a/dbreps2/src/enwiki/boteditcount.rs +++ b/dbreps2/src/enwiki/boteditcount.rs @@ -40,11 +40,12 @@ SELECT FROM page JOIN pagelinks ON pl_from = page_id +JOIN linktarget on pl_target_id = lt_id JOIN user -ON user_name = REPLACE(pl_title, "_", " ") +ON user_name = REPLACE(lt_title, "_", " ") WHERE page_title = ? AND page_namespace = 4 -AND pl_namespace IN (2,3); +AND lt_namespace IN (2,3); "#, (page,), |(user_name, user_editcount)| (user_name, user_editcount), diff --git a/dbreps2/src/enwiki/editcount.rs b/dbreps2/src/enwiki/editcount.rs index 4cc6b931..b19e842d 100644 --- a/dbreps2/src/enwiki/editcount.rs +++ b/dbreps2/src/enwiki/editcount.rs @@ -32,16 +32,17 @@ async fn get_user_list(conn: &mut Conn, page: &str) -> Result> { r#" /* editcount.rs SLOW_OK */ SELECT DISTINCT - pl_title + lt_title FROM page JOIN pagelinks ON pl_from = page_id +JOIN linktarget on pl_target_id = lt_id WHERE page_title = ? AND page_namespace = 4 -AND pl_namespace IN (2,3); +AND lt_namespace IN (2,3); "#, (page,), - |(pl_title,)| pl_title, + |(lt_title,)| lt_title, ) .await?; Ok(rows diff --git a/dbreps2/src/enwiki/linkedmiscapitalizations.rs b/dbreps2/src/enwiki/linkedmiscapitalizations.rs index a93ae9b6..4ebd9f79 100644 --- a/dbreps2/src/enwiki/linkedmiscapitalizations.rs +++ b/dbreps2/src/enwiki/linkedmiscapitalizations.rs @@ -47,8 +47,9 @@ SELECT FROM page AS p1 JOIN categorylinks ON p1.page_id = cl_from - JOIN pagelinks ON p1.page_title = pl_title - AND pl_namespace = 0 + JOIN linktarget on p1.page_title = lt_title + AND lt_namespace = 0 + JOIN pagelinks ON pl_target_id = lt_id JOIN page AS p2 ON pl_from = p2.page_id AND p2.page_namespace = 0 WHERE diff --git a/dbreps2/src/enwiki/linkedmisspellings.rs b/dbreps2/src/enwiki/linkedmisspellings.rs index 2b381709..5f896742 100644 --- a/dbreps2/src/enwiki/linkedmisspellings.rs +++ b/dbreps2/src/enwiki/linkedmisspellings.rs @@ -47,8 +47,9 @@ SELECT FROM page AS p1 JOIN categorylinks ON p1.page_id = cl_from - JOIN pagelinks ON p1.page_title = pl_title - AND pl_namespace = 0 + JOIN linktarget on p1.page_title = lt_title + AND lt_namespace = 0 + JOIN pagelinks ON pl_target_id = lt_id JOIN page AS p2 ON pl_from = p2.page_id AND p2.page_namespace = 0 WHERE diff --git a/dbreps2/src/enwiki/templatedisambigs.rs b/dbreps2/src/enwiki/templatedisambigs.rs index 867dfd5b..bea4a027 100644 --- a/dbreps2/src/enwiki/templatedisambigs.rs +++ b/dbreps2/src/enwiki/templatedisambigs.rs @@ -44,7 +44,7 @@ impl Report for TemplateDisambigs { /* templatedisambigs.rs SLOW_OK */ SELECT pltmp.page_title AS template_title, - pltmp.pl_title AS disambiguation_title, + pltmp.lt_title AS disambiguation_title, ( SELECT COUNT(*) @@ -60,21 +60,22 @@ FROM SELECT page_namespace, page_title, - pl_namespace, - pl_title + lt_namespace, + lt_title FROM page JOIN pagelinks ON pl_from = page_id + JOIN linktarget ON pl_target_id = lt_id WHERE page_namespace = 10 - AND pl_namespace = 0 + AND lt_namespace = 0 LIMIT 1000000 ) AS pltmp JOIN page AS pg2 /* removes red links */ - ON pltmp.pl_namespace = pg2.page_namespace - AND pltmp.pl_title = pg2.page_title + ON pltmp.lt_namespace = pg2.page_namespace + AND pltmp.lt_title = pg2.page_title WHERE EXISTS ( SELECT diff --git a/dbreps2/src/general/linkedredlinkedcats.rs b/dbreps2/src/general/linkedredlinkedcats.rs index 9c0dcf84..072fd34d 100644 --- a/dbreps2/src/general/linkedredlinkedcats.rs +++ b/dbreps2/src/general/linkedredlinkedcats.rs @@ -45,9 +45,10 @@ SELECT cl_to, COUNT(*) FROM + /* FIXME when categorylinks gets normalized as well */ categorylinks - JOIN pagelinks ON pl_title = cl_to - AND pl_namespace = 14 + JOIN linktarget ON lt_title = cl_to AND lt_namespace = 14 + JOIN pagelinks ON pl_target_id = lt_id JOIN page AS p1 ON pl_from = p1.page_id AND p1.page_namespace IN (0, 6, 10, 12, 14, 100) LEFT JOIN page AS p2 ON cl_to = p2.page_title diff --git a/dbreps2/src/general/userlinksinarticles.rs b/dbreps2/src/general/userlinksinarticles.rs index 1af9af3d..f21815fd 100644 --- a/dbreps2/src/general/userlinksinarticles.rs +++ b/dbreps2/src/general/userlinksinarticles.rs @@ -45,9 +45,10 @@ SELECT FROM page JOIN pagelinks ON pl_from = page_id + JOIN linktarget ON pl_target_id = lt_id WHERE pl_from_namespace = 0 - AND pl_namespace IN (2, 3) + AND lt_namespace IN (2, 3) AND NOT EXISTS ( SELECT 1 FROM templatelinks JOIN linktarget ON tl_target_id = lt_id