-
Notifications
You must be signed in to change notification settings - Fork 748
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
chore: add
cheerio-impit-ts
e2e test
- Loading branch information
Showing
8 changed files
with
167 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
{ | ||
"actorSpecification": 1, | ||
"name": "test-cheerio-impit-ts", | ||
"version": "0.0", | ||
"buildTag": "latest", | ||
"env": null | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
{ | ||
"root": true, | ||
"extends": "../../.eslintrc.json", | ||
"parserOptions": { | ||
"project": "./test/e2e/cheerio-impit-ts/actor/tsconfig.json", | ||
"ecmaVersion": 2022 | ||
}, | ||
"rules": { | ||
"no-empty-function": "off", | ||
"@typescript-eslint/no-explicit-any": "off", | ||
"no-constant-condition": "off" | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
.idea | ||
.DS_Store | ||
node_modules | ||
package-lock.json | ||
apify_storage | ||
crawlee_storage | ||
storage | ||
main.d.ts | ||
main.d.ts.map | ||
main.js | ||
main.js.map |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
FROM node:20 AS builder | ||
|
||
COPY /packages ./packages | ||
COPY /package*.json ./ | ||
COPY /tsconfig.json ./ | ||
COPY /main.ts ./ | ||
RUN npm --quiet set progress=false \ | ||
&& npm install --only=prod --no-optional --no-audit \ | ||
&& npm update \ | ||
&& npm run build | ||
|
||
FROM lwthiker/curl-impersonate | ||
|
||
COPY --from=builder /usr/local/bin /usr/local/bin | ||
COPY --from=builder /usr/local/lib/node_modules/npm /usr/local/lib/node_modules/npm | ||
COPY --from=builder /node_modules ./node_modules | ||
COPY --from=builder /packages ./packages | ||
COPY --from=builder /package*.json ./ | ||
COPY --from=builder /main.js ./ | ||
COPY /.actor ./.actor | ||
|
||
RUN echo "Installed NPM packages:" \ | ||
&& (npm list --only=prod --no-optional --all || true) \ | ||
&& echo "Node.js version:" \ | ||
&& node --version \ | ||
&& echo "NPM version:" \ | ||
&& npm --version | ||
|
||
# run compiled code | ||
CMD npm run start:prod |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
import { CheerioCrawler, Dictionary } from '@crawlee/cheerio'; | ||
import { Actor } from 'apify'; | ||
import { ImpitHttpClient, Browser } from '@crawlee/impit-client'; | ||
|
||
if (process.env.STORAGE_IMPLEMENTATION === 'LOCAL') { | ||
// @ts-ignore | ||
await Actor.init({ storage: new (await import('@apify/storage-local')).ApifyStorageLocal() }); | ||
} else { | ||
await Actor.init(); | ||
} | ||
|
||
const crawler = new CheerioCrawler({ | ||
async requestHandler(context) { | ||
const { body: text } = await context.sendRequest({ | ||
url: 'https://httpbin.org/uuid', | ||
}); | ||
|
||
const { body: json } = await context.sendRequest({ | ||
url: 'https://httpbin.org/uuid', | ||
responseType: 'json', | ||
}); | ||
|
||
const { body: ua } = await context.sendRequest<Dictionary>({ | ||
url: 'https://httpbin.org/user-agent', | ||
responseType: 'json', | ||
}); | ||
|
||
await context.pushData({ | ||
body: context.body, | ||
title: context.$('title').text(), | ||
userAgent: ua['user-agent'], | ||
uuidTextResponse: text, | ||
uuidJsonResponse: json, | ||
}); | ||
}, | ||
httpClient: new ImpitHttpClient({ browser: Browser.Firefox }), | ||
}); | ||
|
||
await crawler.run(['https://httpbin.org/']); | ||
|
||
await Actor.exit({ exit: Actor.isAtHome() }); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
{ | ||
"name": "test-cheerio-impit-ts", | ||
"version": "0.0.1", | ||
"description": "Cheerio Crawler Test - Impit HTTP client", | ||
"dependencies": { | ||
"apify": "next", | ||
"@apify/storage-local": "^2.1.3", | ||
"@crawlee/basic": "file:./packages/basic-crawler", | ||
"@crawlee/browser-pool": "file:./packages/browser-pool", | ||
"@crawlee/http": "file:./packages/http-crawler", | ||
"@crawlee/cheerio": "file:./packages/cheerio-crawler", | ||
"@crawlee/core": "file:./packages/core", | ||
"@crawlee/memory-storage": "file:./packages/memory-storage", | ||
"@crawlee/types": "file:./packages/types", | ||
"@crawlee/utils": "file:./packages/utils", | ||
"@crawlee/impit-client": "file:./packages/impit-client" | ||
}, | ||
"overrides": { | ||
"apify": { | ||
"@crawlee/core": "file:./packages/core", | ||
"@crawlee/types": "file:./packages/types", | ||
"@crawlee/utils": "file:./packages/utils" | ||
} | ||
}, | ||
"devDependencies": { | ||
"@apify/tsconfig": "^0.1.0", | ||
"typescript": "^5.0.0" | ||
}, | ||
"scripts": { | ||
"start": "tsc && node main.js", | ||
"start:prod": "node main.js", | ||
"build": "tsc" | ||
}, | ||
"type": "module", | ||
"license": "ISC" | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
{ | ||
"extends": "@apify/tsconfig", | ||
"compilerOptions": { | ||
"module": "ES2022", | ||
"target": "ES2022", | ||
"lib": ["DOM"], | ||
"skipLibCheck": true, | ||
"incremental": false | ||
}, | ||
"include": ["./**/*.ts"] | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
import { initialize, getActorTestDir, runActor, expect } from '../tools.mjs'; | ||
|
||
const testActorDirname = getActorTestDir(import.meta.url); | ||
await initialize(testActorDirname); | ||
|
||
const { stats, datasetItems } = await runActor(testActorDirname); | ||
|
||
await expect(stats.requestsFinished > 0, 'All requests finished'); | ||
await expect(datasetItems.length === 1, 'A dataset item was pushed'); | ||
|
||
const result = datasetItems[0]; | ||
|
||
expect(result.body.length > 1000, 'HTML response is not empty'); | ||
expect(result.title === 'httpbin.org', 'HTML title is correct'); | ||
console.log(result.userAgent); | ||
expect(/Gecko\/\d{8} Firefox\/\d{2}/.test(result.userAgent), 'Impit correctly spoofs Firefox'); | ||
expect(result.uuidJsonResponse.uuid !== undefined, 'JSON response contains UUID'); | ||
expect(JSON.parse(result.uuidTextResponse).uuid !== undefined, 'Text response contains UUID'); |