forked from derpibooru/philomena
-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge remote-tracking branch 'philo/master'
- Loading branch information
Showing
29 changed files
with
565 additions
and
42 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -41,3 +41,6 @@ npm-debug.log | |
|
||
# Intellij IDEA | ||
.idea | ||
|
||
# Index dumps | ||
*.jsonl |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
all: comments galleries images posts reports tags | ||
|
||
comments: | ||
$(MAKE) -f comments.mk | ||
|
||
galleries: | ||
$(MAKE) -f galleries.mk | ||
|
||
images: | ||
$(MAKE) -f images.mk | ||
|
||
posts: | ||
$(MAKE) -f posts.mk | ||
|
||
reports: | ||
$(MAKE) -f reports.mk | ||
|
||
tags: | ||
$(MAKE) -f tags.mk | ||
|
||
clean: | ||
rm -f ./*.jsonl |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
DATABASE ?= philomena | ||
ELASTICDUMP ?= elasticdump | ||
.ONESHELL: | ||
|
||
all: import_es | ||
|
||
import_es: dump_jsonl | ||
$(ELASTICDUMP) --input=comments.jsonl --output=http://localhost:9200/ --output-index=comments --limit 10000 --retryAttempts=5 --type=data --transform="doc._source = Object.assign({},doc)" | ||
|
||
dump_jsonl: metadata authors tags | ||
psql $(DATABASE) -v ON_ERROR_STOP=1 <<< 'copy (select temp_comments.jsonb_object_agg(object) from temp_comments.comment_search_json group by comment_id) to stdout;' > comments.jsonl | ||
psql $(DATABASE) -v ON_ERROR_STOP=1 <<< 'drop schema temp_comments cascade;' | ||
sed -i comments.jsonl -e 's/\\\\/\\/g' | ||
|
||
metadata: comment_search_json | ||
psql $(DATABASE) -v ON_ERROR_STOP=1 <<-SQL | ||
insert into temp_comments.comment_search_json (comment_id, object) select c.id, jsonb_build_object( | ||
'id', c.id, | ||
'posted_at', c.created_at, | ||
'ip', c.ip, | ||
'fingerprint', c.fingerprint, | ||
'image_id', c.image_id, | ||
'user_id', c.user_id, | ||
'anonymous', c.anonymous, | ||
'body', c.body, | ||
'hidden_from_users', (c.hidden_from_users or i.hidden_from_users) | ||
) from comments c inner join images i on c.image_id=i.id; | ||
SQL | ||
|
||
authors: comment_search_json | ||
psql $(DATABASE) -v ON_ERROR_STOP=1 <<-SQL | ||
insert into temp_comments.comment_search_json (comment_id, object) select c.id, jsonb_build_object('author', (case when c.anonymous='t' then null else u.name end)) from comments c left join users u on c.user_id=u.id; | ||
SQL | ||
|
||
tags: comment_search_json | ||
psql $(DATABASE) -v ON_ERROR_STOP=1 <<-SQL | ||
create unlogged table temp_comments.image_tags (image_id bigint not null, tags jsonb not null); | ||
insert into temp_comments.image_tags (image_id, tags) select it.image_id, jsonb_agg(it.tag_id) from image_taggings it group by it.image_id; | ||
insert into temp_comments.comment_search_json (comment_id, object) select c.id, jsonb_build_object('image_tag_ids', it.tags) from comments c inner join temp_comments.image_tags it on c.image_id=it.image_id; | ||
SQL | ||
|
||
comment_search_json: | ||
psql $(DATABASE) -v ON_ERROR_STOP=1 <<-SQL | ||
drop schema if exists temp_comments cascade; | ||
create schema temp_comments; | ||
create unlogged table temp_comments.comment_search_json (comment_id bigint not null, object jsonb not null); | ||
create or replace aggregate temp_comments.jsonb_object_agg(jsonb) (sfunc = 'jsonb_concat', stype = jsonb, initcond='{}'); | ||
SQL |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
DATABASE ?= philomena | ||
ELASTICDUMP ?= elasticdump | ||
.ONESHELL: | ||
|
||
all: import_es | ||
|
||
import_es: dump_jsonl | ||
$(ELASTICDUMP) --input=galleries.jsonl --output=http://localhost:9200/ --output-index=galleries --limit 10000 --retryAttempts=5 --type=data --transform="doc._source = Object.assign({},doc)" | ||
|
||
dump_jsonl: metadata subscribers images | ||
psql $(DATABASE) -v ON_ERROR_STOP=1 <<< 'copy (select temp_galleries.jsonb_object_agg(object) from temp_galleries.gallery_search_json group by gallery_id) to stdout;' > galleries.jsonl | ||
psql $(DATABASE) -v ON_ERROR_STOP=1 <<< 'drop schema temp_galleries cascade;' | ||
sed -i galleries.jsonl -e 's/\\\\/\\/g' | ||
|
||
metadata: gallery_search_json | ||
psql $(DATABASE) -v ON_ERROR_STOP=1 <<-SQL | ||
insert into temp_galleries.gallery_search_json (gallery_id, object) select g.id, jsonb_build_object( | ||
'id', g.id, | ||
'image_count', g.image_count, | ||
'updated_at', g.updated_at, | ||
'created_at', g.created_at, | ||
'title', lower(g.title), | ||
'creator', lower(u.name), | ||
'description', g.description | ||
) from galleries g left join users u on g.creator_id=u.id; | ||
SQL | ||
|
||
subscribers: gallery_search_json | ||
psql $(DATABASE) -v ON_ERROR_STOP=1 <<-SQL | ||
insert into temp_galleries.gallery_search_json (gallery_id, object) select gallery_id, json_build_object('watcher_ids', jsonb_agg(user_id), 'watcher_count', count(*)) from gallery_subscriptions group by gallery_id; | ||
SQL | ||
|
||
images: gallery_search_json | ||
psql $(DATABASE) -v ON_ERROR_STOP=1 <<-SQL | ||
insert into temp_galleries.gallery_search_json (gallery_id, object) select gallery_id, json_build_object('image_ids', jsonb_agg(image_id)) from gallery_interactions group by gallery_id; | ||
SQL | ||
|
||
gallery_search_json: | ||
psql $(DATABASE) -v ON_ERROR_STOP=1 <<-SQL | ||
drop schema if exists temp_galleries cascade; | ||
create schema temp_galleries; | ||
create unlogged table temp_galleries.gallery_search_json (gallery_id bigint not null, object jsonb not null); | ||
create or replace aggregate temp_galleries.jsonb_object_agg(jsonb) (sfunc = 'jsonb_concat', stype = jsonb, initcond='{}'); | ||
SQL |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,132 @@ | ||
DATABASE ?= philomena | ||
ELASTICDUMP ?= elasticdump | ||
.ONESHELL: | ||
|
||
all: import_es | ||
|
||
import_es: dump_jsonl | ||
$(ELASTICDUMP) --input=images.jsonl --output=http://localhost:9200/ --output-index=images --limit 10000 --retryAttempts=5 --type=data --transform="doc._source = Object.assign({},doc)" | ||
|
||
dump_jsonl: metadata true_uploaders uploaders deleters galleries tags hides upvotes downvotes faves tag_names | ||
psql $(DATABASE) -v ON_ERROR_STOP=1 <<< 'copy (select temp_images.jsonb_object_agg(object) from temp_images.image_search_json group by image_id) to stdout;' > images.jsonl | ||
psql $(DATABASE) -v ON_ERROR_STOP=1 <<< 'drop schema temp_images cascade;' | ||
sed -i images.jsonl -e 's/\\\\/\\/g' | ||
|
||
metadata: image_search_json | ||
psql $(DATABASE) -v ON_ERROR_STOP=1 <<-SQL | ||
insert into temp_images.image_search_json (image_id, object) select id, jsonb_build_object( | ||
'anonymous', anonymous, | ||
'aspect_ratio', nullif(image_aspect_ratio, 'NaN'::float8), | ||
'comment_count', comments_count, | ||
'created_at', created_at, | ||
'deletion_reason', deletion_reason, | ||
'description', description, | ||
'downvotes', downvotes_count, | ||
'duplicate_id', duplicate_id, | ||
'faves', faves_count, | ||
'file_name', image_name, | ||
'fingerprint', fingerprint, | ||
'first_seen_at', first_seen_at, | ||
'height', image_height, | ||
'hidden_from_users', hidden_from_users, | ||
'id', id, | ||
'ip', ip, | ||
'mime_type', image_mime_type, | ||
'orig_sha512_hash', image_orig_sha512_hash, | ||
'original_format', image_format, | ||
'pixels', cast(image_width as bigint)*cast(image_height as bigint), | ||
'score', score, | ||
'size', image_size, | ||
'sha512_hash', image_sha512_hash, | ||
'source_url', lower(source_url), | ||
'updated_at', updated_at, | ||
'upvotes', upvotes_count, | ||
'width', image_width, | ||
'wilson_score', temp_images.wilson_995(upvotes_count, downvotes_count) | ||
) from images; | ||
SQL | ||
|
||
true_uploaders: image_search_json | ||
psql $(DATABASE) -v ON_ERROR_STOP=1 <<-SQL | ||
insert into temp_images.image_search_json (image_id, object) select i.id, jsonb_build_object('true_uploader_id', u.id, 'true_uploader', u.name) from images i left join users u on u.id = i.user_id; | ||
SQL | ||
|
||
uploaders: image_search_json | ||
psql $(DATABASE) -v ON_ERROR_STOP=1 <<-SQL | ||
insert into temp_images.image_search_json (image_id, object) select i.id, jsonb_build_object('uploader_id', (case when i.anonymous = 't' then null else u.id end), 'uploader', (case when i.anonymous = 't' then null else lower(u.name) end)) from images i left join users u on u.id = i.user_id; | ||
SQL | ||
|
||
deleters: image_search_json | ||
psql $(DATABASE) -v ON_ERROR_STOP=1 <<-SQL | ||
insert into temp_images.image_search_json (image_id, object) select i.id, jsonb_build_object('deleted_by_user_id', u.id, 'deleted_by_user', lower(u.name)) from images i left join users u on u.id = i.deleted_by_id; | ||
SQL | ||
|
||
galleries: image_search_json | ||
psql $(DATABASE) -v ON_ERROR_STOP=1 <<-SQL | ||
insert into temp_images.image_search_json (image_id, object) select gi.image_id, jsonb_build_object('gallery_interactions', jsonb_agg(jsonb_build_object('gallery_id', gi.gallery_id, 'position', gi.position))) from gallery_interactions gi group by image_id; | ||
SQL | ||
|
||
tags: image_search_json | ||
psql $(DATABASE) -v ON_ERROR_STOP=1 <<-SQL | ||
insert into temp_images.image_search_json (image_id, object) select it.image_id, jsonb_build_object('tag_ids', jsonb_agg(it.tag_id), 'tag_count', count(*)) from image_taggings it group by image_id; | ||
SQL | ||
|
||
hides: image_search_json | ||
psql $(DATABASE) -v ON_ERROR_STOP=1 <<-SQL | ||
insert into temp_images.image_search_json (image_id, object) select ih.image_id, jsonb_build_object('hidden_by_ids', jsonb_agg(ih.user_id), 'hidden_by', jsonb_agg(lower(u.name))) from image_hides ih inner join users u on u.id = ih.user_id group by image_id; | ||
SQL | ||
|
||
downvotes: image_search_json | ||
psql $(DATABASE) -v ON_ERROR_STOP=1 <<-SQL | ||
insert into temp_images.image_search_json (image_id, object) select iv.image_id, jsonb_build_object('downvoted_by_ids', jsonb_agg(iv.user_id), 'downvoted_by', jsonb_agg(lower(u.name))) from image_votes iv inner join users u on u.id = iv.user_id where iv.up = false group by image_id; | ||
SQL | ||
|
||
upvotes: image_search_json | ||
psql $(DATABASE) -v ON_ERROR_STOP=1 <<-SQL | ||
insert into temp_images.image_search_json (image_id, object) select iv.image_id, jsonb_build_object('upvoted_by_ids', jsonb_agg(iv.user_id), 'upvoted_by', jsonb_agg(lower(u.name))) from image_votes iv inner join users u on u.id = iv.user_id where iv.up = true group by image_id; | ||
SQL | ||
|
||
faves: image_search_json | ||
psql $(DATABASE) -v ON_ERROR_STOP=1 <<-SQL | ||
insert into temp_images.image_search_json (image_id, object) select if.image_id, jsonb_build_object('faved_by_ids', jsonb_agg(if.user_id), 'faved_by', jsonb_agg(lower(u.name))) from image_faves if inner join users u on u.id = if.user_id group by image_id; | ||
SQL | ||
|
||
tag_names: tags_with_aliases | ||
psql $(DATABASE) -v ON_ERROR_STOP=1 <<-SQL | ||
insert into temp_images.image_search_json (image_id, object) select image_id, jsonb_build_object('namespaced_tags', jsonb_build_object('name', jsonb_agg(lower(tag_name)))) from temp_images.tags_with_aliases group by image_id; | ||
SQL | ||
|
||
tags_with_aliases: image_search_json | ||
psql $(DATABASE) -v ON_ERROR_STOP=1 <<-SQL | ||
create unlogged table if not exists temp_images.tags_with_aliases (image_id bigint not null, tag_name text not null); | ||
truncate temp_images.tags_with_aliases; | ||
insert into temp_images.tags_with_aliases (image_id, tag_name) select it.image_id, t.name from image_taggings it inner join tags t on t.id = it.tag_id; | ||
insert into temp_images.tags_with_aliases (image_id, tag_name) select it.image_id, t.name from image_taggings it left outer join tags t on t.aliased_tag_id = it.tag_id where t.name is not null; | ||
SQL | ||
|
||
image_search_json: | ||
psql $(DATABASE) -v ON_ERROR_STOP=1 <<-SQL | ||
drop schema if exists temp_images cascade; | ||
create schema temp_images; | ||
create unlogged table temp_images.image_search_json (image_id bigint not null, object jsonb not null); | ||
create function temp_images.wilson_995(succ bigint, fail bigint) returns double precision as ' | ||
declare | ||
n double precision; | ||
p_hat double precision; | ||
z double precision; | ||
z2 double precision; | ||
begin | ||
if succ <= 0 then | ||
return 0; | ||
end if; | ||
|
||
n := succ + fail; | ||
p_hat := succ / n; | ||
z := 2.57583; | ||
z2 := 6.634900189; | ||
|
||
return (p_hat + z2 / (2 * n) - z * sqrt((p_hat * (1 - p_hat) + z2 / (4 * n)) / n)) / (1 + z2 / n); | ||
end | ||
' language plpgsql; | ||
create aggregate temp_images.jsonb_object_agg(jsonb) (sfunc = 'jsonb_concat', stype = jsonb, initcond='{}'); | ||
SQL |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
DATABASE ?= philomena | ||
ELASTICDUMP ?= elasticdump | ||
.ONESHELL: | ||
|
||
all: import_es | ||
|
||
import_es: dump_jsonl | ||
$(ELASTICDUMP) --input=posts.jsonl --output=http://localhost:9200/ --output-index=posts --limit 10000 --retryAttempts=5 --type=data --transform="doc._source = Object.assign({},doc)" | ||
|
||
dump_jsonl: metadata authors | ||
psql $(DATABASE) -v ON_ERROR_STOP=1 <<< 'copy (select temp_posts.jsonb_object_agg(object) from temp_posts.post_search_json group by post_id) to stdout;' > posts.jsonl | ||
psql $(DATABASE) -v ON_ERROR_STOP=1 <<< 'drop schema temp_posts cascade;' | ||
sed -i posts.jsonl -e 's/\\\\/\\/g' | ||
|
||
metadata: post_search_json | ||
psql $(DATABASE) -v ON_ERROR_STOP=1 <<-SQL | ||
insert into temp_posts.post_search_json (post_id, object) select p.id, jsonb_build_object( | ||
'id', p.id, | ||
'topic_id', p.topic_id, | ||
'body', p.body, | ||
'subject', t.title, | ||
'ip', p.ip, | ||
'user_agent', p.user_agent, | ||
'referrer', p.referrer, | ||
'fingerprint', p.fingerprint, | ||
'topic_position', p.topic_position, | ||
'forum_id', t.forum_id, | ||
'user_id', p.user_id, | ||
'anonymous', p.anonymous, | ||
'created_at', p.created_at, | ||
'updated_at', p.updated_at, | ||
'deleted', p.hidden_from_users, | ||
'destroyed_content', p.destroyed_content, | ||
'access_level', f.access_level | ||
) from posts p inner join topics t on t.id=p.topic_id inner join forums f on f.id=t.forum_id; | ||
SQL | ||
|
||
authors: post_search_json | ||
psql $(DATABASE) -v ON_ERROR_STOP=1 <<-SQL | ||
insert into temp_posts.post_search_json (post_id, object) select p.id, jsonb_build_object('author', (case when p.anonymous='t' then null else u.name end)) from posts p left join users u on p.user_id=u.id; | ||
SQL | ||
|
||
post_search_json: | ||
psql $(DATABASE) -v ON_ERROR_STOP=1 <<-SQL | ||
drop schema if exists temp_posts cascade; | ||
create schema temp_posts; | ||
create unlogged table temp_posts.post_search_json (post_id bigint not null, object jsonb not null); | ||
create or replace aggregate temp_posts.jsonb_object_agg(jsonb) (sfunc = 'jsonb_concat', stype = jsonb, initcond='{}'); | ||
SQL |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
DATABASE ?= philomena | ||
ELASTICDUMP ?= elasticdump | ||
.ONESHELL: | ||
|
||
all: import_es | ||
|
||
import_es: dump_jsonl | ||
$(ELASTICDUMP) --input=reports.jsonl --output=http://localhost:9200/ --output-index=reports --limit 10000 --retryAttempts=5 --type=data --transform="doc._source = Object.assign({},doc)" | ||
|
||
dump_jsonl: metadata image_ids comment_image_ids | ||
psql $(DATABASE) -v ON_ERROR_STOP=1 <<< 'copy (select temp_reports.jsonb_object_agg(object) from temp_reports.report_search_json group by report_id) to stdout;' > reports.jsonl | ||
psql $(DATABASE) -v ON_ERROR_STOP=1 <<< 'drop schema temp_reports cascade;' | ||
sed -i reports.jsonl -e 's/\\\\/\\/g' | ||
|
||
metadata: report_search_json | ||
psql $(DATABASE) -v ON_ERROR_STOP=1 <<-SQL | ||
insert into temp_reports.report_search_json (report_id, object) select r.id, jsonb_build_object( | ||
'id', r.id, | ||
'created_at', r.created_at, | ||
'ip', r.ip, | ||
'state', r.state, | ||
'user', lower(u.name), | ||
'user_id', r.user_id, | ||
'admin', lower(a.name), | ||
'admin_id', r.admin_id, | ||
'reportable_type', r.reportable_type, | ||
'reportable_id', r.reportable_id, | ||
'fingerprint', r.fingerprint, | ||
'open', r.open, | ||
'reason', r.reason | ||
) from reports r left join users u on r.user_id=u.id left join users a on r.admin_id=a.id; | ||
SQL | ||
|
||
image_ids: report_search_json | ||
psql $(DATABASE) -v ON_ERROR_STOP=1 <<-SQL | ||
insert into temp_reports.report_search_json (report_id, object) select r.id, jsonb_build_object('image_id', r.reportable_id) from reports r where r.reportable_type = 'Image'; | ||
SQL | ||
|
||
comment_image_ids: report_search_json | ||
psql $(DATABASE) -v ON_ERROR_STOP=1 <<-SQL | ||
insert into temp_reports.report_search_json (report_id, object) select r.id, jsonb_build_object('image_id', c.image_id) from reports r inner join comments c on c.id = r.reportable_id where r.reportable_type = 'Comment'; | ||
SQL | ||
|
||
report_search_json: | ||
psql $(DATABASE) -v ON_ERROR_STOP=1 <<-SQL | ||
drop schema if exists temp_reports cascade; | ||
create schema temp_reports; | ||
create unlogged table temp_reports.report_search_json (report_id bigint not null, object jsonb not null); | ||
create or replace aggregate temp_reports.jsonb_object_agg(jsonb) (sfunc = 'jsonb_concat', stype = jsonb, initcond='{}'); | ||
SQL |
Oops, something went wrong.