diff --git a/test/site_test.py b/test/site_test.py index ab642ac..444bada 100644 --- a/test/site_test.py +++ b/test/site_test.py @@ -146,6 +146,7 @@ def as_timestamp(dt): ('date_from', data.draw(st.one_of(st.just(''), st.dates().map(str)))), ('time_to', data.draw(st.one_of(st.just(''), st.datetimes().map(as_timestamp)))), ('time_from', data.draw(st.one_of(st.just(''), st.datetimes().map(as_timestamp)))), + ('updated_since', data.draw(st.one_of(st.just(''), st.datetimes().map(as_timestamp)))), ('q', data.draw(st.one_of(players, substrings(st.sampled_from(names))))), ] + [('steamid64', steamid) for steamid in data.draw(st.lists(players))]) @@ -245,30 +246,39 @@ def get(**params): 'matchid', 'time', 'title', + 'updated', } + updated_pivot = 0 for log in logs: logid = log['logid'] assert logid is not None assert set(log.keys()) == valid_keys + unupdated = True if logid in linked_demos: assert log['demoid'] == linked_demos[logid] + unupdated = False else: assert log['demoid'] is None if logid in linked_matches: assert (log['league'], log['matchid']) == linked_matches[logid] + unupdated = False else: assert log['league'] is None assert log['matchid'] is None if logid in duplicates: assert log['duplicate_of'] == duplicates[logid] + unupdated = False else: assert log['duplicate_of'] is None + if unupdated: + updated_pivot = max(updated_pivot, log['updated']) + assert logs == sorted(logs, key=lambda log: log['logid'], reverse=True) def paged(limit=10): @@ -331,3 +341,6 @@ def paged(limit=10): for log in get(time_to=1573016400): assert log['time'] <= 1573016400 + + for log in get(updated_since=updated_pivot): + assert log['updated'] > updated_pivot diff --git a/trends/importer/link_demos.py b/trends/importer/link_demos.py index 34fc1ec..2358ca1 100644 --- a/trends/importer/link_demos.py +++ b/trends/importer/link_demos.py @@ -41,7 +41,9 @@ def link_logs(args, c): cur.execute("SELECT count(*) from linked;"); count = cur.fetchone()[0] cur.execute("""UPDATE log - SET demoid = linked.demoid + SET + demoid = linked.demoid, + updated = extract(EPOCH FROM now())::BIGINT FROM linked WHERE log.logid = linked.logid;""") cur.execute("COMMIT;") diff --git a/trends/importer/link_matches.py b/trends/importer/link_matches.py index fe92870..d8cc5a1 100644 --- a/trends/importer/link_matches.py +++ b/trends/importer/link_matches.py @@ -86,6 +86,7 @@ def link_matches(args, c): cur.execute("""UPDATE log SET league = log_matches.league, matchid = log_matches.matchid, + updated = extract(EPOCH FROM now())::BIGINT, team1_is_red = log_matches.team1_is_red FROM log_matches WHERE log.logid = log_matches.logid;""") diff --git a/trends/importer/logs.py b/trends/importer/logs.py index 5799083..5ca6635 100644 --- a/trends/importer/logs.py +++ b/trends/importer/logs.py @@ -108,12 +108,13 @@ def import_log(c, logid, log): info['uploader_playerid'] = c.fetchone()[0] c.execute("""INSERT INTO log ( logid, time, duration, title, mapid, red_score, blue_score, ad_scoring, - uploader, uploader_nameid + uploader, uploader_nameid, updated ) VALUES ( %(logid)s, %(date)s, %(duration)s, %(title)s, (SELECT mapid FROM map WHERE map = %(map)s), %(red_score)s, %(blue_score)s, %(AD_scoring)s, %(uploader_playerid)s, - (SELECT nameid FROM name WHERE name = %(uploader_name)s) + (SELECT nameid FROM name WHERE name = %(uploader_name)s), + extract(EPOCH FROM now())::BIGINT );""", info) c.execute("INSERT INTO log_json (logid, data) VALUES (%s, %s)", (logid, log)) diff --git a/trends/migrations/log_updated.sql b/trends/migrations/log_updated.sql new file mode 100644 index 0000000..24af96c --- /dev/null +++ b/trends/migrations/log_updated.sql @@ -0,0 +1,17 @@ +BEGIN; +ALTER TABLE log ADD updated BIGINT; +CREATE TABLE new AS SELECT + logid, + greatest(log.time, demo.time, match.fetched) AS updated +FROM log +LEFT JOIN demo USING (demoid) +LEFT JOIN match USING (league, matchid); +UPDATE log SET + updated = new.updated +FROM new +WHERE log.logid = new.logid; +ALTER TABLE log ALTER updated SET NOT NULL; +ALTER TABLE log ADD CHECK (updated >= time); +CREATE INDEX log_updated ON log (updated); +COMMIT; +VACUUM VERBOSE ANALYZE log; diff --git a/trends/schema.sql b/trends/schema.sql index 9fe323f..edfcbc2 100644 --- a/trends/schema.sql +++ b/trends/schema.sql @@ -440,6 +440,7 @@ CREATE TABLE IF NOT EXISTS log ( league LEAGUE, matchid INT, team1_is_red BOOL, + updated BIGINT NOT NULL, FOREIGN KEY (league, matchid) REFERENCES match (league, matchid), CHECK ((uploader ISNULL) = (uploader_nameid ISNULL)), -- All duplicates must be newer (and have larger logids) than what they are duplicates of @@ -468,12 +469,10 @@ CREATE OR REPLACE VIEW log_nodups AS SELECT FROM log WHERE duplicate_of ISNULL; --- For log search CREATE INDEX IF NOT EXISTS log_title ON log USING gin (title gin_trgm_ops); - --- To filter by date CREATE INDEX IF NOT EXISTS log_time ON log (time); - +CREATE INDEX IF NOT EXISTS log_updated on log (updated, time); +CREATE INDEX IF NOT EXISTS log_map ON log (mapid); CREATE INDEX IF NOT EXISTS log_match ON log (league, matchid); CREATE MATERIALIZED VIEW IF NOT EXISTS map_popularity AS diff --git a/trends/site/common.py b/trends/site/common.py index ec14eb3..bedaad0 100644 --- a/trends/site/common.py +++ b/trends/site/common.py @@ -7,20 +7,43 @@ last_modified def logs_last_modified(): + filters = get_filter_params() + filter_clauses = get_filter_clauses(filters, 'title', 'formatd', 'mapid', 'time', 'logid', + 'updated', 'league') + db = get_db() cur = db.cursor() - cur.execute("SELECT max(time) FROM log;") + if filters['date_to_ts'] is not None: + # Postgres doesn't use log_time to filter on date_to_ts so we get a bad plan if date_to_ts + # is too far in the past (as we end up doing a full scan of log_updated). Give the planner a + # hint that it should use log_time instead. We pay the price by always reading 1000 rows. + query = f"""WITH log AS MATERIALIZED (SELECT + updated + FROM log + WHERE TRUE + {filter_clauses} + ORDER BY updated DESC + LIMIT 1000 + ) SELECT max(updated) FROM log;""" + else: + query = f"""SELECT max(updated) + FROM log + WHERE TRUE + {filter_clauses};""" + + cur.execute(query, filters) return last_modified(cur.fetchone()[0]) def get_logs(view): limit, offset = get_pagination() filters = get_filter_params() filter_clauses = get_filter_clauses(filters, 'title', 'format', 'map', 'time', 'logid', - league='log.league') + 'updated', league='log.league') order, order_clause = get_order({ 'logid': "logid", 'duration': "duration", 'date': "time", + 'updated': "updated", }, 'logid') if view == 'players': @@ -73,6 +96,7 @@ def get_logs(view): logs.execute(f"""SELECT logid, time, + updated, duration, title, map, diff --git a/trends/site/player.py b/trends/site/player.py index e0d4d13..d817da3 100644 --- a/trends/site/player.py +++ b/trends/site/player.py @@ -24,6 +24,7 @@ def get_overview(): if not last_active: flask.abort(404) + # FIXME: this is not really accurate... if resp := last_modified(last_active): return resp break diff --git a/trends/site/root.py b/trends/site/root.py index c1459eb..020d18a 100644 --- a/trends/site/root.py +++ b/trends/site/root.py @@ -177,6 +177,7 @@ def log(logids): logs.execute("""SELECT logid, time, + updated, title, map, format, @@ -197,7 +198,7 @@ def log(logids): logids = tuple(log['logid'] for log in logs) if not logids: flask.abort(404) - if resp := last_modified(max(log['time'] for log in logs)): + if resp := last_modified(max(log['updated'] for log in logs)): return resp params = { 'logids': logids, 'llogids': list(logids) } diff --git a/trends/site/team.py b/trends/site/team.py index fa69212..2c7b3b4 100644 --- a/trends/site/team.py +++ b/trends/site/team.py @@ -44,6 +44,7 @@ def get_comp(): args['teamid'] = flask.g.team['teamid'] return flask.redirect(flask.url_for(flask.request.endpoint, **args), 301) + # FIXME: not quite accurate when we display logs if resp := last_modified(flask.g.team['fetched']): return resp diff --git a/trends/site/templates/api.html b/trends/site/templates/api.html index 48fcdb4..2a83929 100644 --- a/trends/site/templates/api.html +++ b/trends/site/templates/api.html @@ -226,6 +226,15 @@
+ Filter the results to those updated after the specified UNIX time. + For example, supplying {{ pre("updated_since=1646334000".format(p)) }} to the + {{ apiref('logs') }} endpoint would + filter results to logs + updated after 2022-03-04 05:00:00Z. +
+This section documents members common across multiple responses. @@ -324,6 +333,8 @@