forked from webrecorder/browsertrix-behaviors
-
Notifications
You must be signed in to change notification settings - Fork 0
/
tiktok.ts
105 lines (92 loc) · 3.48 KB
/
tiktok.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
const Q = {
commentList: "//div[contains(@class, 'CommentListContainer')]",
commentItem: "div[contains(@class, 'CommentItemContainer')]",
viewMoreReplies: ".//p[contains(@class, 'ReplyActionText')]",
viewMoreThread: ".//p[starts-with(@data-e2e, 'view-more') and string-length(text()) > 0]",
profileVideoList: "//div[starts-with(@data-e2e, 'user-post-item-list')]",
profileVideoItem: "div[contains(@class, 'DivItemContainerV2')]",
backButton: "button[contains(@class, 'StyledCloseIconContainer')]"
};
export const BREADTH_ALL = Symbol("BREADTH_ALL");
export class TikTokVideoBehavior {
static id = "TikTokVideo";
static init() {
return {
state: { comments: 0 },
opts: { breadth: BREADTH_ALL }
};
}
static isMatch() {
const pathRegex = /https:\/\/(www\.)?tiktok\.com\/@.+\/video\/\d+\/?.*/;
return !!window.location.href.match(pathRegex);
}
breadthComplete({ opts: { breadth } }, iter) {
return breadth !== BREADTH_ALL && breadth <= iter;
}
async* crawlThread(ctx, parentNode, prev = null, iter = 0) {
const { waitUntilNode, scrollAndClick, getState } = ctx.Lib;
const next = await waitUntilNode(Q.viewMoreThread, parentNode, prev);
if (!next || this.breadthComplete(ctx, iter)) return;
await scrollAndClick(next, 500);
yield getState(ctx, "View more replies", "comments");
yield* this.crawlThread(ctx, parentNode, next, iter + 1);
}
async* expandThread(ctx, item) {
const { xpathNode, scrollAndClick, getState } = ctx.Lib;
const viewMore = xpathNode(Q.viewMoreReplies, item);
if (!viewMore) return;
await scrollAndClick(viewMore, 500);
yield getState(ctx, "View comment", "comments");
yield* this.crawlThread(ctx, item, null, 1);
}
async* run(ctx) {
const { xpathNode, iterChildMatches, scrollIntoView, getState } = ctx.Lib;
const commentList = xpathNode(Q.commentList);
const commentItems = iterChildMatches(Q.commentItem, commentList);
for await (const item of commentItems) {
scrollIntoView(item);
yield getState(ctx, "View comment", "comments");
if (this.breadthComplete(ctx, 0)) continue;
yield* this.expandThread(ctx, item);
}
yield getState(ctx, "TikTok Video Behavior Complete");
}
}
export class TikTokProfileBehavior {
static id = "TikTokProfile";
static isMatch() {
const pathRegex = /https:\/\/(www\.)?tiktok\.com\/@[a-zA-Z0-9]+(\/?$|\/\?.*)/;
return !!window.location.href.match(pathRegex);
}
static init() {
return {
state: { videos: 0, comments: 0 },
opts: { breadth: BREADTH_ALL }
};
}
async* openVideo(ctx, item) {
const { HistoryState, xpathNode, sleep } = ctx.Lib;
const link = xpathNode(".//a", item);
if (!link) return;
const viewState = new HistoryState(() => link.click());
await sleep(500);
if (viewState.changed) {
const videoBehavior = new TikTokVideoBehavior();
yield* videoBehavior.run(ctx);
await sleep(500);
await viewState.goBack(Q.backButton);
}
}
async* run(ctx) {
const { xpathNode, iterChildMatches, scrollIntoView, getState, sleep } = ctx.Lib;
const profileVideoList = xpathNode(Q.profileVideoList);
const profileVideos = iterChildMatches(Q.profileVideoItem, profileVideoList);
for await (const item of profileVideos) {
scrollIntoView(item);
yield getState(ctx, "View video", "videos");
yield* this.openVideo(ctx, item);
await sleep(500);
}
yield getState(ctx, "TikTok Profile Behavior Complete");
}
}