Skip to content

Commit

Permalink
Merge pull request #37 from y-pakorn/feat/get-article
Browse files Browse the repository at this point in the history
Add `getArticle` function to scraper.
  • Loading branch information
wtfsayo authored Jan 5, 2025
2 parents 8c07898 + d71542a commit 0fa3988
Show file tree
Hide file tree
Showing 6 changed files with 173 additions and 20 deletions.
2 changes: 2 additions & 0 deletions src/api-data.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ const endpoints = {
'https://twitter.com/i/api/graphql/eSSNbhECHHWWALkkQq-YTA/Likes?variables=%7B%22userId%22%3A%222244196397%22%2C%22count%22%3A20%2C%22includePromotedContent%22%3Afalse%2C%22withClientEventToken%22%3Afalse%2C%22withBirdwatchNotes%22%3Afalse%2C%22withVoice%22%3Atrue%2C%22withV2Timeline%22%3Atrue%7D&features=%7B%22responsive_web_graphql_exclude_directive_enabled%22%3Atrue%2C%22verified_phone_label_enabled%22%3Afalse%2C%22creator_subscriptions_tweet_preview_api_enabled%22%3Atrue%2C%22responsive_web_graphql_timeline_navigation_enabled%22%3Atrue%2C%22responsive_web_graphql_skip_user_profile_image_extensions_enabled%22%3Afalse%2C%22c9s_tweet_anatomy_moderator_badge_enabled%22%3Atrue%2C%22tweetypie_unmention_optimization_enabled%22%3Atrue%2C%22responsive_web_edit_tweet_api_enabled%22%3Atrue%2C%22graphql_is_translatable_rweb_tweet_is_translatable_enabled%22%3Atrue%2C%22view_counts_everywhere_api_enabled%22%3Atrue%2C%22longform_notetweets_consumption_enabled%22%3Atrue%2C%22responsive_web_twitter_article_tweet_consumption_enabled%22%3Atrue%2C%22tweet_awards_web_tipping_enabled%22%3Afalse%2C%22freedom_of_speech_not_reach_fetch_enabled%22%3Atrue%2C%22standardized_nudges_misinfo%22%3Atrue%2C%22tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled%22%3Atrue%2C%22rweb_video_timestamps_enabled%22%3Atrue%2C%22longform_notetweets_rich_text_read_enabled%22%3Atrue%2C%22longform_notetweets_inline_media_enabled%22%3Atrue%2C%22responsive_web_enhance_cards_enabled%22%3Afalse%7D',
TweetDetail:
'https://twitter.com/i/api/graphql/xOhkmRac04YFZmOzU9PJHg/TweetDetail?variables=%7B%22focalTweetId%22%3A%221237110546383724547%22%2C%22with_rux_injections%22%3Afalse%2C%22includePromotedContent%22%3Atrue%2C%22withCommunity%22%3Atrue%2C%22withQuickPromoteEligibilityTweetFields%22%3Atrue%2C%22withBirdwatchNotes%22%3Atrue%2C%22withVoice%22%3Atrue%2C%22withV2Timeline%22%3Atrue%7D&features=%7B%22responsive_web_graphql_exclude_directive_enabled%22%3Atrue%2C%22verified_phone_label_enabled%22%3Afalse%2C%22creator_subscriptions_tweet_preview_api_enabled%22%3Atrue%2C%22responsive_web_graphql_timeline_navigation_enabled%22%3Atrue%2C%22responsive_web_graphql_skip_user_profile_image_extensions_enabled%22%3Afalse%2C%22tweetypie_unmention_optimization_enabled%22%3Atrue%2C%22responsive_web_edit_tweet_api_enabled%22%3Atrue%2C%22graphql_is_translatable_rweb_tweet_is_translatable_enabled%22%3Atrue%2C%22view_counts_everywhere_api_enabled%22%3Atrue%2C%22longform_notetweets_consumption_enabled%22%3Atrue%2C%22responsive_web_twitter_article_tweet_consumption_enabled%22%3Afalse%2C%22tweet_awards_web_tipping_enabled%22%3Afalse%2C%22freedom_of_speech_not_reach_fetch_enabled%22%3Atrue%2C%22standardized_nudges_misinfo%22%3Atrue%2C%22tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled%22%3Atrue%2C%22longform_notetweets_rich_text_read_enabled%22%3Atrue%2C%22longform_notetweets_inline_media_enabled%22%3Atrue%2C%22responsive_web_media_download_video_enabled%22%3Afalse%2C%22responsive_web_enhance_cards_enabled%22%3Afalse%7D&fieldToggles=%7B%22withArticleRichContentState%22%3Afalse%7D',
TweetDetailArticle:
'https://twitter.com/i/api/graphql/GtcBtFhtQymrpxAs5MALVA/TweetDetail?variables=%7B%22focalTweetId%22%3A%221765884209527394325%22%2C%22with_rux_injections%22%3Atrue%2C%22rankingMode%22%3A%22Relevance%22%2C%22includePromotedContent%22%3Atrue%2C%22withCommunity%22%3Atrue%2C%22withQuickPromoteEligibilityTweetFields%22%3Atrue%2C%22withBirdwatchNotes%22%3Atrue%2C%22withVoice%22%3Atrue%7D&features=%7B%22profile_label_improvements_pcf_label_in_post_enabled%22%3Afalse%2C%22rweb_tipjar_consumption_enabled%22%3Atrue%2C%22responsive_web_graphql_exclude_directive_enabled%22%3Atrue%2C%22verified_phone_label_enabled%22%3Afalse%2C%22creator_subscriptions_tweet_preview_api_enabled%22%3Atrue%2C%22responsive_web_graphql_timeline_navigation_enabled%22%3Atrue%2C%22responsive_web_graphql_skip_user_profile_image_extensions_enabled%22%3Afalse%2C%22premium_content_api_read_enabled%22%3Afalse%2C%22communities_web_enable_tweet_community_results_fetch%22%3Atrue%2C%22c9s_tweet_anatomy_moderator_badge_enabled%22%3Atrue%2C%22responsive_web_grok_analyze_button_fetch_trends_enabled%22%3Atrue%2C%22responsive_web_grok_analyze_post_followups_enabled%22%3Afalse%2C%22responsive_web_grok_share_attachment_enabled%22%3Atrue%2C%22articles_preview_enabled%22%3Atrue%2C%22responsive_web_edit_tweet_api_enabled%22%3Atrue%2C%22graphql_is_translatable_rweb_tweet_is_translatable_enabled%22%3Atrue%2C%22view_counts_everywhere_api_enabled%22%3Atrue%2C%22longform_notetweets_consumption_enabled%22%3Atrue%2C%22responsive_web_twitter_article_tweet_consumption_enabled%22%3Atrue%2C%22tweet_awards_web_tipping_enabled%22%3Afalse%2C%22creator_subscriptions_quote_tweet_preview_enabled%22%3Afalse%2C%22freedom_of_speech_not_reach_fetch_enabled%22%3Atrue%2C%22standardized_nudges_misinfo%22%3Atrue%2C%22tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled%22%3Atrue%2C%22rweb_video_timestamps_enabled%22%3Atrue%2C%22longform_notetweets_rich_text_read_enabled%22%3Atrue%2C%22longform_notetweets_inline_media_enabled%22%3Atrue%2C%22responsive_web_enhance_cards_enabled%22%3Afalse%7D&fieldToggles=%7B%22withArticleRichContentState%22%3Atrue%2C%22withArticlePlainText%22%3Afalse%2C%22withGrokAnalyze%22%3Afalse%2C%22withDisallowedReplyControls%22%3Afalse%7D',
TweetResultByRestId:
'https://twitter.com/i/api/graphql/DJS3BdhUhcaEpZ7B7irJDg/TweetResultByRestId?variables=%7B%22tweetId%22%3A%221237110546383724547%22%2C%22withCommunity%22%3Afalse%2C%22includePromotedContent%22%3Afalse%2C%22withVoice%22%3Afalse%7D&features=%7B%22creator_subscriptions_tweet_preview_api_enabled%22%3Atrue%2C%22tweetypie_unmention_optimization_enabled%22%3Atrue%2C%22responsive_web_edit_tweet_api_enabled%22%3Atrue%2C%22graphql_is_translatable_rweb_tweet_is_translatable_enabled%22%3Atrue%2C%22view_counts_everywhere_api_enabled%22%3Atrue%2C%22longform_notetweets_consumption_enabled%22%3Atrue%2C%22responsive_web_twitter_article_tweet_consumption_enabled%22%3Afalse%2C%22tweet_awards_web_tipping_enabled%22%3Afalse%2C%22freedom_of_speech_not_reach_fetch_enabled%22%3Atrue%2C%22standardized_nudges_misinfo%22%3Atrue%2C%22tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled%22%3Atrue%2C%22longform_notetweets_rich_text_read_enabled%22%3Atrue%2C%22longform_notetweets_inline_media_enabled%22%3Atrue%2C%22responsive_web_graphql_exclude_directive_enabled%22%3Atrue%2C%22verified_phone_label_enabled%22%3Afalse%2C%22responsive_web_media_download_video_enabled%22%3Afalse%2C%22responsive_web_graphql_skip_user_profile_image_extensions_enabled%22%3Afalse%2C%22responsive_web_graphql_timeline_navigation_enabled%22%3Atrue%2C%22responsive_web_enhance_cards_enabled%22%3Afalse%7D',
ListTweets:
Expand Down
34 changes: 28 additions & 6 deletions src/scraper.ts
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,13 @@ import {
retweet,
createCreateNoteTweetRequest,
createCreateLongTweetRequest,
getArticle,
} from './tweets';
import { parseTimelineTweetsV2, TimelineV2 } from './timeline-v2';
import {
parseTimelineTweetsV2,
TimelineArticle,
TimelineV2,
} from './timeline-v2';
import { fetchHomeTimeline } from './timeline-home';
import { fetchFollowingTimeline } from './timeline-following';
import {
Expand All @@ -75,9 +80,17 @@ import {
fetchAudioSpaceById,
fetchAuthenticatePeriscope,
fetchBrowseSpaceTopics,
fetchCommunitySelectQuery, fetchLiveVideoStreamStatus, fetchLoginTwitterToken
fetchCommunitySelectQuery,
fetchLiveVideoStreamStatus,
fetchLoginTwitterToken,
} from './spaces';
import {AudioSpace, Community, LiveVideoStreamStatus, LoginTwitterTokenResponse, Subtopic} from './types/spaces';
import {
AudioSpace,
Community,
LiveVideoStreamStatus,
LoginTwitterTokenResponse,
Subtopic,
} from './types/spaces';

const twUrl = 'https://twitter.com';
const UserTweetsUrl =
Expand Down Expand Up @@ -945,7 +958,7 @@ export class Scraper {
* @returns The status of the Audio Space stream.
*/
public async getAudioSpaceStreamStatus(
mediaKey: string,
mediaKey: string,
): Promise<LiveVideoStreamStatus> {
return await fetchLiveVideoStreamStatus(mediaKey, this.auth);
}
Expand All @@ -958,7 +971,7 @@ export class Scraper {
* @returns The status of the Audio Space stream.
*/
public async getAudioSpaceStatus(
audioSpaceId: string,
audioSpaceId: string,
): Promise<LiveVideoStreamStatus> {
const audioSpace = await this.getAudioSpaceById(audioSpaceId);

Expand All @@ -984,7 +997,7 @@ export class Scraper {
* @returns The response containing the cookie and user information.
*/
public async loginTwitterToken(
jwt: string,
jwt: string,
): Promise<LoginTwitterTokenResponse> {
return await fetchLoginTwitterToken(jwt, this.auth);
}
Expand All @@ -999,4 +1012,13 @@ export class Scraper {

return loginResponse.cookie;
}

/**
* Fetches a article (long form tweet) by its ID.
* @param id The ID of the article to fetch. In the format of (http://x.com/i/article/id)
* @returns The {@link TimelineArticle} object, or `null` if it couldn't be fetched.
*/
public getArticle(id: string): Promise<TimelineArticle | null> {
return getArticle(id, this.auth);
}
}
49 changes: 49 additions & 0 deletions src/timeline-v1.ts
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,50 @@ export interface SearchResultRaw {
legacy?: LegacyTweetRaw;
}

export interface TimelineArticleResultRaw {
id?: string;
rest_id?: string;
title?: string;
preview_text?: string;
cover_media?: {
media_id?: string;
media_info?: {
original_img_url?: string;
original_img_height?: number;
original_img_width?: number;
};
};
content_state?: {
blocks?: {
key?: string;
data?: string;
text?: string;
entityRanges?: {
key?: number;
length?: number;
offset?: number;
}[];
}[];
};
entityMap?: {
key?: string;
value?: {
type?: string; // LINK, MEDIA, TWEET
mutability?: string;
data?: {
entityKey?: string;
url?: string;
tweetId?: string;
mediaItems?: {
localMediaId?: string;
mediaCategory?: string;
mediaId?: string;
}[];
};
};
}[];
}

export interface TimelineResultRaw {
rest_id?: string;
__typename?: string;
Expand All @@ -97,6 +141,11 @@ export interface TimelineResultRaw {
};
};
};
article?: {
article_results?: {
result?: TimelineArticleResultRaw;
};
};
quoted_status_result?: {
result?: TimelineResultRaw;
};
Expand Down
38 changes: 38 additions & 0 deletions src/timeline-v2.ts
Original file line number Diff line number Diff line change
Expand Up @@ -423,3 +423,41 @@ export function parseThreadedConversation(

return tweets;
}

export interface TimelineArticle {
id: string;
articleId: string;
title: string;
previewText: string;
coverMediaUrl?: string;
text: string;
}

export function parseArticle(
conversation: ThreadedConversation,
): TimelineArticle[] {
const articles: TimelineArticle[] = [];
for (const instruction of conversation.data
?.threaded_conversation_with_injections_v2?.instructions ?? []) {
for (const entry of instruction.entries ?? []) {
const id = entry.content?.itemContent?.tweet_results?.result?.rest_id;
const article =
entry.content?.itemContent?.tweet_results?.result?.article
?.article_results?.result;
if (!id || !article) continue;
const text =
article.content_state?.blocks
?.map((block) => block.text)
.join('\n\n') ?? '';
articles.push({
id,
articleId: article.rest_id || '',
coverMediaUrl: article.cover_media?.media_info?.original_img_url,
previewText: article.preview_text || '',
text,
title: article.title || '',
});
}
}
return articles;
}
43 changes: 29 additions & 14 deletions src/tweets.test.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import { getScraper } from './test-utils';
import { QueryTweetsResponse } from './timeline-v1';
import { Mention, Tweet } from './tweets';
import { Mention, Tweet, getTweetAnonymous } from './tweets';
import fs from 'fs';
import path from 'path';

Expand Down Expand Up @@ -462,10 +462,10 @@ test('scraper can send a tweet with image and video', async () => {

// Read test image and video files from the test-assets directory
const imageBuffer = fs.readFileSync(
path.join(__dirname, '../test-assets/test-image.jpeg')
path.join(__dirname, '../test-assets/test-image.jpeg'),
);
const videoBuffer = fs.readFileSync(
path.join(__dirname, '../test-assets/test-video.mp4')
path.join(__dirname, '../test-assets/test-video.mp4'),
);

// Prepare media data array with both image and video
Expand Down Expand Up @@ -502,10 +502,10 @@ test('scraper can quote tweet with image and video', async () => {

// Read test image and video files from the test-assets directory
const imageBuffer = fs.readFileSync(
path.join(__dirname, '../test-assets/test-image.jpeg')
path.join(__dirname, '../test-assets/test-image.jpeg'),
);
const videoBuffer = fs.readFileSync(
path.join(__dirname, '../test-assets/test-video.mp4')
path.join(__dirname, '../test-assets/test-video.mp4'),
);

// Prepare media data array with both image and video
Expand All @@ -531,13 +531,11 @@ test('scraper can quote tweet with media', async () => {

// Read test image file
const imageBuffer = fs.readFileSync(
path.join(__dirname, '../test-assets/test-image.jpeg')
path.join(__dirname, '../test-assets/test-image.jpeg'),
);

// Prepare media data with the image
const mediaData = [
{ data: imageBuffer, mediaType: 'image/jpeg' },
];
const mediaData = [{ data: imageBuffer, mediaType: 'image/jpeg' }];

// Send a quote tweet with the image attachment
const response = await scraper.sendQuoteTweet(quoteText, quotedTweetId, {
Expand All @@ -555,13 +553,11 @@ test('sendTweetWithMedia successfully sends a tweet with media', async () => {

// Read a test image file
const imageBuffer = fs.readFileSync(
path.join(__dirname, '../test-assets/test-image.jpeg')
path.join(__dirname, '../test-assets/test-image.jpeg'),
);

// Prepare media data with the image
const mediaData = [
{ data: imageBuffer, mediaType: 'image/jpeg' },
];
const mediaData = [{ data: imageBuffer, mediaType: 'image/jpeg' }];

// Send a tweet with the image attachment
const result = await scraper.sendTweet(draftText, undefined, mediaData);
Expand Down Expand Up @@ -593,4 +589,23 @@ test('scraper can follow user', async () => {

// Test should not throw an error
await expect(scraper.followUser(username)).resolves.not.toThrow();
}, 30000);
}, 30000);

test('scraper cannot get article using getTweet', async () => {
const scraper = await getScraper();
// X introducing article: http://x.com/i/article/1765821414056120320
const tweet = await scraper.getTweet('1765884209527394325');

expect(tweet).not.toBeNull();
expect(tweet?.text).toMatch(/https?:\/\/t.co\//);
expect(tweet?.urls[0]).toMatch(/https?:\/\/x.com\/i\/article\//);
}, 30000);

test('scraper can get article using getArticle', async () => {
const scraper = await getScraper();
// X introducing article: http://x.com/i/article/1765821414056120320
const article = await scraper.getArticle('1765884209527394325');

expect(article).not.toBeNull();
expect(article?.title).toMatch(/Introducing Articles on X/);
}, 30000);
27 changes: 27 additions & 0 deletions src/tweets.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ import {
parseTimelineEntryItemContentRaw,
ThreadedConversation,
parseThreadedConversation,
parseArticle,
TimelineArticle,
} from './timeline-v2';
import { getTweetTimeline } from './timeline-async';
import { apiRequestFactory } from './api-data';
Expand Down Expand Up @@ -1479,3 +1481,28 @@ export async function createCreateLongTweetRequest(

return response;
}

export async function getArticle(
id: string,
auth: TwitterAuth,
): Promise<TimelineArticle | null> {
const tweetDetailRequest =
apiRequestFactory.createTweetDetailArticleRequest();
tweetDetailRequest.variables.focalTweetId = id;

const res = await requestApi<ThreadedConversation>(
tweetDetailRequest.toRequestUrl(),
auth,
);

if (!res.success) {
throw res.err;
}

if (!res.value) {
return null;
}

const articles = parseArticle(res.value);
return articles.find((article) => article.id === id) ?? null;
}

0 comments on commit 0fa3988

Please sign in to comment.