From 8e745f1428a66bdfa7844e95b66816c5689c53d9 Mon Sep 17 00:00:00 2001 From: ntorba <32570754+ntorba@users.noreply.github.com> Date: Thu, 10 Nov 2022 14:56:35 -0500 Subject: [PATCH 1/2] implement a function for home timeline reverse chrono --- test_twarc2.py | 14 +++++ twarc/client2.py | 141 ++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 153 insertions(+), 2 deletions(-) diff --git a/test_twarc2.py b/test_twarc2.py index 715fe86d..65a2ba08 100644 --- a/test_twarc2.py +++ b/test_twarc2.py @@ -301,6 +301,20 @@ def atest_timeline(): assert found >= 200 +def atest_home_timeline_reverse_chrono(): + """ + Test the user timeline endpoints. + + """ + # get @jack's first pages of tweets and mentions + found = 0 + for pages, tweets in enumerate(T.timeline_reverse_chrono(12)): + found += len(tweets["data"]) + if pages == 3: + break + assert found >= 200 + + def atest_timeline_username(): """ Test the user timeline endpoints with username. diff --git a/twarc/client2.py b/twarc/client2.py index a6a0058c..d9b59150 100644 --- a/twarc/client2.py +++ b/twarc/client2.py @@ -44,6 +44,7 @@ def __init__( bearer_token=None, connection_errors=0, metadata=True, + user_auth=False, ): """ Instantiate a Twarc2 instance to talk to the Twitter V2+ API. @@ -80,10 +81,12 @@ def __init__( self.metadata = metadata self.bearer_token = None - if bearer_token: + if access_token and user_auth: + self.access_token = access_token + self.auth_type = "user" + elif bearer_token: self.bearer_token = bearer_token self.auth_type = "application" - elif consumer_key and consumer_secret: if access_token and access_token_secret: self.consumer_key = consumer_key @@ -1328,6 +1331,136 @@ def timeline( pagination_token=pagination_token, ) + def _timeline_reverse_chrono( + self, + user_id, + timeline_type, + since_id, + until_id, + start_time, + end_time, + exclude_retweets, + exclude_replies, + max_results=None, + expansions=None, + tweet_fields=None, + user_fields=None, + media_fields=None, + poll_fields=None, + place_fields=None, + pagination_token=None, + ): + """ + Helper function for user and mention timelines + + Calls [GET /2/users/:id/tweets](https://developer.twitter.com/en/docs/twitter-api/tweets/timelines/api-reference/get-users-id-tweets) + or [GET /2/users/:id/mentions](https://developer.twitter.com/en/docs/twitter-api/tweets/timelines/api-reference/get-users-id-mentions) + + Args: + user_id (int): ID of the user. + timeline_type (str): timeline type: `tweets` or `mentions` + since_id (int): results with a Tweet ID greater than (newer) than specified + until_id (int): results with a Tweet ID less than (older) than specified + start_time (datetime): oldest UTC timestamp from which the Tweets will be provided + end_time (datetime): newest UTC timestamp from which the Tweets will be provided + exclude_retweets (boolean): remove retweets from timeline + exlucde_replies (boolean): remove replies from timeline + Returns: + generator[dict]: A generator, dict for each page of results. + """ + + url = ( + f"https://api.twitter.com/2/users/{user_id}/timelines/reverse_chronological" + ) + + params = self._prepare_params( + since_id=since_id, + until_id=until_id, + start_time=start_time, + end_time=end_time, + max_results=max_results, + expansions=expansions, + tweet_fields=tweet_fields, + user_fields=user_fields, + media_fields=media_fields, + poll_fields=poll_fields, + place_fields=place_fields, + pagination_token=pagination_token, + ) + + excludes = [] + if exclude_retweets: + excludes.append("retweets") + if exclude_replies: + excludes.append("replies") + if len(excludes) > 0: + params["exclude"] = ",".join(excludes) + + for response in self.get_paginated(url, params=params): + # can return without 'data' if there are no results + if "data" in response: + yield response + else: + log.info(f"Retrieved an empty page of results for timeline {user_id}") + + log.info(f"No more results for timeline {user_id}.") + + def timeline_reverse_chrono( + self, + user, + since_id=None, + until_id=None, + start_time=None, + end_time=None, + exclude_retweets=False, + exclude_replies=False, + max_results=100, + expansions=None, + tweet_fields=None, + user_fields=None, + media_fields=None, + poll_fields=None, + place_fields=None, + pagination_token=None, + ): + """ + Retrieve up to the 3200 most recent tweets made by the given user. + + Calls [GET /2/users/:id/tweets](https://developer.twitter.com/en/docs/twitter-api/tweets/timelines/api-reference/get-users-id-tweets) + + Args: + user (int): ID of the user. + since_id (int): results with a Tweet ID greater than (newer) than specified + until_id (int): results with a Tweet ID less than (older) than specified + start_time (datetime): oldest UTC timestamp from which the Tweets will be provided + end_time (datetime): newest UTC timestamp from which the Tweets will be provided + exclude_retweets (boolean): remove retweets from timeline results + exclude_replies (boolean): remove replies from timeline results + max_results (int): the maximum number of Tweets to retrieve. Between 5 and 100. + + Returns: + generator[dict]: A generator, dict for each page of results. + """ + user_id = self._ensure_user_id(user) + return self._timeline_reverse_chrono( + user_id=user_id, + timeline_type="tweets", + since_id=since_id, + until_id=until_id, + start_time=start_time, + end_time=end_time, + exclude_retweets=exclude_retweets, + exclude_replies=exclude_replies, + max_results=max_results, + expansions=expansions, + tweet_fields=tweet_fields, + user_fields=user_fields, + media_fields=media_fields, + poll_fields=poll_fields, + place_fields=place_fields, + pagination_token=pagination_token, + ) + def mentions( self, user, @@ -1712,6 +1845,10 @@ def connect(self): client_id=self.consumer_key, client_secret=self.consumer_secret, ) + elif self.auth_type == "user" and self.access_token: + self.client = requests.Session() + auth = f"Bearer {self.access_token}" + self.client.headers.update({"Authorization": auth}) else: log.info("creating user auth client") log.debug("client_id: %s", self.consumer_key) From dfbc8d73d6c0ced20f6fbb953bd1457eb7cc9c60 Mon Sep 17 00:00:00 2001 From: ntorba <32570754+ntorba@users.noreply.github.com> Date: Fri, 11 Nov 2022 09:12:15 -0500 Subject: [PATCH 2/2] adding docs for new arg --- twarc/client2.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/twarc/client2.py b/twarc/client2.py index d9b59150..54ecaee1 100644 --- a/twarc/client2.py +++ b/twarc/client2.py @@ -59,6 +59,8 @@ def __init__( 3. If `consumer_key`, `consumer_secret`, `access_token` and `access_token_secret` are all passed, then user authentication is used instead. + 4. If `access_token` and `user_auth=True` are passed, then user + authentication (OauthV2) is used instead. Args: consumer_key (str): @@ -75,6 +77,8 @@ def __init__( Number of retries for GETs metadata (bool): Append `__twarc` metadata to results. + user_auth (bool): + Use user authentication (OauthV2) with user acess_token instead of app authentication """ self.api_version = "2" self.connection_errors = connection_errors