-
Notifications
You must be signed in to change notification settings - Fork 765
/
Copy pathpuppeteer-controller.ts
150 lines (124 loc) · 5.13 KB
/
puppeteer-controller.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
import { tryCancel } from '@apify/timeout';
import type { Cookie } from '@crawlee/types';
import type Puppeteer from 'puppeteer';
import type * as PuppeteerTypes from 'puppeteer';
import { BrowserController } from '../abstract-classes/browser-controller';
import { anonymizeProxySugar } from '../anonymize-proxy';
import { log } from '../logger';
export interface PuppeteerNewPageOptions extends PuppeteerTypes.BrowserContextOptions {
proxyUsername?: string;
proxyPassword?: string;
}
const PROCESS_KILL_TIMEOUT_MILLIS = 5000;
export class PuppeteerController extends BrowserController<
typeof Puppeteer,
PuppeteerTypes.LaunchOptions,
PuppeteerTypes.Browser,
PuppeteerNewPageOptions
> {
normalizeProxyOptions(proxyUrl: string | undefined, pageOptions: any): Record<string, unknown> {
if (!proxyUrl) {
return {};
}
const url = new URL(proxyUrl);
const username = decodeURIComponent(url.username);
const password = decodeURIComponent(url.password);
return {
proxyServer: url.origin,
proxyUsername: username,
proxyPassword: password,
proxyBypassList: pageOptions?.proxyBypassList,
};
}
protected async _newPage(contextOptions?: PuppeteerNewPageOptions): Promise<PuppeteerTypes.Page> {
if (contextOptions !== undefined) {
if (!this.launchContext.useIncognitoPages) {
throw new Error(
'A new page can be created with provided context only when using incognito pages or experimental containers.',
);
}
let close = async () => {};
if (contextOptions.proxyServer) {
const [anonymizedProxyUrl, closeProxy] = await anonymizeProxySugar(
contextOptions.proxyServer,
contextOptions.proxyUsername,
contextOptions.proxyPassword,
);
if (anonymizedProxyUrl) {
contextOptions.proxyServer = anonymizedProxyUrl;
delete contextOptions.proxyUsername;
delete contextOptions.proxyPassword;
}
close = closeProxy;
}
try {
// @ts-expect-error not exposed on type level
const { CdpBrowser } = await import('puppeteer');
const oldPuppeteerVersion = !CdpBrowser || 'createIncognitoBrowserContext' in CdpBrowser.prototype;
const method = oldPuppeteerVersion ? 'createIncognitoBrowserContext' : 'createBrowserContext';
const context = (await (this.browser as any)[method](contextOptions)) as PuppeteerTypes.BrowserContext;
tryCancel();
const page = await context.newPage();
tryCancel();
/*
// DO NOT USE YET! DOING SO DISABLES CACHE WHICH IS 50% PERFORMANCE HIT!
if (contextOptions.proxyUsername || contextOptions.proxyPassword) {
await page.authenticate({
username: contextOptions.proxyUsername ?? '',
password: contextOptions.proxyPassword ?? '',
});
tryCancel();
}
*/
page.once('close', async () => {
this.activePages--;
try {
await context.close();
} catch (error: any) {
log.exception(error, 'Failed to close context.');
} finally {
await close();
}
});
return page;
} catch (error) {
await close();
throw error;
}
}
const page = await this.browser.newPage();
tryCancel();
page.once('close', () => {
this.activePages--;
});
return page;
}
protected async _close(): Promise<void> {
await this.browser.close();
}
protected async _kill(): Promise<void> {
const browserProcess = this.browser.process();
if (!browserProcess) {
log.debug('Browser was connected using the `puppeteer.connect` method no browser to kill.');
return;
}
const timeout = setTimeout(() => {
// This is here because users reported that it happened
// that error `TypeError: Cannot read property 'kill' of null` was thrown.
// Likely Chrome process wasn't started due to some error ...
browserProcess?.kill('SIGKILL');
}, PROCESS_KILL_TIMEOUT_MILLIS);
try {
await this.browser.close();
clearTimeout(timeout);
} catch (error) {
log.debug('Browser was already killed.', { error });
}
}
protected async _getCookies(page: PuppeteerTypes.Page): Promise<Cookie[]> {
return page.cookies();
}
protected async _setCookies(page: PuppeteerTypes.Page, cookies: Cookie[]): Promise<void> {
return page.setCookie(...cookies);
}
}