From 605d2aeb7bdf1e8b958ee0cde6b6cfd29ac3adb2 Mon Sep 17 00:00:00 2001 From: barry Date: Mon, 12 Aug 2024 18:58:26 -0400 Subject: [PATCH] Fix issue with auth token not refreshing --- .../celery/task_logic/scheduled_task_logic.py | 2 +- redditrepostsleuth/ingestsvc/ingestsvc.py | 19 ++++++++++--------- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/redditrepostsleuth/core/celery/task_logic/scheduled_task_logic.py b/redditrepostsleuth/core/celery/task_logic/scheduled_task_logic.py index 17d0539..d437f11 100644 --- a/redditrepostsleuth/core/celery/task_logic/scheduled_task_logic.py +++ b/redditrepostsleuth/core/celery/task_logic/scheduled_task_logic.py @@ -101,7 +101,7 @@ def run_update_top_reposts(uow: UnitOfWork) -> None: def update_top_reposters(uow: UnitOfWork, post_type_id: int, day_range: int = None) -> None: log.info('Getting top repostors for post type %s with range %s', post_type_id, day_range) - range_query = "SELECT author, COUNT(*) c FROM repost WHERE detected_at > NOW() - INTERVAL :days DAY AND post_type_id=:posttype AND author is not NULL AND author!= '[deleted]' GROUP BY author HAVING c > 10 ORDER BY c DESC" + range_query = "SELECT author, COUNT(*) c FROM repost WHERE detected_at > NOW() - INTERVAL :days DAY AND post_type_id=:posttype AND author is not NULL AND author!= '[deleted]' GROUP BY author HAVING c > 10 ORDER BY c DESC LIMIT 100000" all_time_query = "SELECT author, COUNT(*) c FROM repost WHERE post_type_id=:posttype AND author is not NULL AND author!= '[deleted]' GROUP BY author HAVING c > 10 ORDER BY c DESC LIMIT 100000" if day_range: query = range_query diff --git a/redditrepostsleuth/ingestsvc/ingestsvc.py b/redditrepostsleuth/ingestsvc/ingestsvc.py index 170688e..e259f41 100644 --- a/redditrepostsleuth/ingestsvc/ingestsvc.py +++ b/redditrepostsleuth/ingestsvc/ingestsvc.py @@ -226,7 +226,7 @@ def get_auth_headers(reddit: Reddit) -> dict: :param reddit: :return: """ - reddit.user.me() + list(reddit.subreddit('all').new(limit=1)) # Force praw to make a req so we can steal the token return {**HEADERS, **{'Authorization': f'Bearer {reddit.auth._reddit._core._authorizer.access_token}'}} async def main() -> None: @@ -300,16 +300,17 @@ async def main() -> None: newest_id = res_data['data']['children'][-1]['data']['id'] - - saved_ids = [x['id'] for x in posts_to_save] - missing_ids_in_this_req = list(set(ids_to_get).difference(saved_ids)) - missed_ids += [base36decode(x) for x in missing_ids_in_this_req] time.sleep(request_delay) - log.info('Missed IDs: %s', len(missed_ids)) - if len(missed_ids) > missed_id_retry_count: - await ingest_sequence(missed_ids, alt_headers=auth_headers) - missed_ids = [] + # saved_ids = [x['id'] for x in posts_to_save] + # missing_ids_in_this_req = list(set(ids_to_get).difference(saved_ids)) + # missed_ids += [base36decode(x) for x in missing_ids_in_this_req] + + + # log.info('Missed IDs: %s', len(missed_ids)) + # if len(missed_ids) > missed_id_retry_count: + # await ingest_sequence(missed_ids, alt_headers=auth_headers) + # missed_ids = [] if __name__ == '__main__': run(main()) \ No newline at end of file