Skip to content

Commit

Permalink
chore: add cheerio-impit-ts e2e test
Browse files Browse the repository at this point in the history
  • Loading branch information
barjin committed Jan 20, 2025
1 parent 32f9a2b commit 200a029
Show file tree
Hide file tree
Showing 8 changed files with 167 additions and 0 deletions.
7 changes: 7 additions & 0 deletions test/e2e/cheerio-impit-ts/actor/.actor/actor.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
{
"actorSpecification": 1,
"name": "test-cheerio-impit-ts",
"version": "0.0",
"buildTag": "latest",
"env": null
}
13 changes: 13 additions & 0 deletions test/e2e/cheerio-impit-ts/actor/.eslintrc.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
{
"root": true,
"extends": "../../.eslintrc.json",
"parserOptions": {
"project": "./test/e2e/cheerio-impit-ts/actor/tsconfig.json",
"ecmaVersion": 2022
},
"rules": {
"no-empty-function": "off",
"@typescript-eslint/no-explicit-any": "off",
"no-constant-condition": "off"
}
}
11 changes: 11 additions & 0 deletions test/e2e/cheerio-impit-ts/actor/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
.idea
.DS_Store
node_modules
package-lock.json
apify_storage
crawlee_storage
storage
main.d.ts
main.d.ts.map
main.js
main.js.map
30 changes: 30 additions & 0 deletions test/e2e/cheerio-impit-ts/actor/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
FROM node:20 AS builder

COPY /packages ./packages
COPY /package*.json ./
COPY /tsconfig.json ./
COPY /main.ts ./
RUN npm --quiet set progress=false \
&& npm install --only=prod --no-optional --no-audit \
&& npm update \
&& npm run build

FROM lwthiker/curl-impersonate

COPY --from=builder /usr/local/bin /usr/local/bin
COPY --from=builder /usr/local/lib/node_modules/npm /usr/local/lib/node_modules/npm
COPY --from=builder /node_modules ./node_modules
COPY --from=builder /packages ./packages
COPY --from=builder /package*.json ./
COPY --from=builder /main.js ./
COPY /.actor ./.actor

RUN echo "Installed NPM packages:" \
&& (npm list --only=prod --no-optional --all || true) \
&& echo "Node.js version:" \
&& node --version \
&& echo "NPM version:" \
&& npm --version

# run compiled code
CMD npm run start:prod
41 changes: 41 additions & 0 deletions test/e2e/cheerio-impit-ts/actor/main.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
import { CheerioCrawler, Dictionary } from '@crawlee/cheerio';
import { Actor } from 'apify';
import { ImpitHttpClient, Browser } from '@crawlee/impit-client';

Check failure on line 3 in test/e2e/cheerio-impit-ts/actor/main.ts

View workflow job for this annotation

GitHub Actions / Lint

`@crawlee/impit-client` import should occur before import of `apify`

if (process.env.STORAGE_IMPLEMENTATION === 'LOCAL') {
// @ts-ignore
await Actor.init({ storage: new (await import('@apify/storage-local')).ApifyStorageLocal() });
} else {
await Actor.init();
}

const crawler = new CheerioCrawler({
async requestHandler(context) {
const { body: text } = await context.sendRequest({
url: 'https://httpbin.org/uuid',
});

const { body: json } = await context.sendRequest({
url: 'https://httpbin.org/uuid',
responseType: 'json',
});

const { body: ua } = await context.sendRequest<Dictionary>({
url: 'https://httpbin.org/user-agent',
responseType: 'json',
});

await context.pushData({
body: context.body,
title: context.$('title').text(),
userAgent: ua['user-agent'],
uuidTextResponse: text,
uuidJsonResponse: json,
});
},
httpClient: new ImpitHttpClient({ browser: Browser.Firefox }),
});

await crawler.run(['https://httpbin.org/']);

await Actor.exit({ exit: Actor.isAtHome() });
36 changes: 36 additions & 0 deletions test/e2e/cheerio-impit-ts/actor/package.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
{
"name": "test-cheerio-impit-ts",
"version": "0.0.1",
"description": "Cheerio Crawler Test - Impit HTTP client",
"dependencies": {
"apify": "next",
"@apify/storage-local": "^2.1.3",
"@crawlee/basic": "file:./packages/basic-crawler",
"@crawlee/browser-pool": "file:./packages/browser-pool",
"@crawlee/http": "file:./packages/http-crawler",
"@crawlee/cheerio": "file:./packages/cheerio-crawler",
"@crawlee/core": "file:./packages/core",
"@crawlee/memory-storage": "file:./packages/memory-storage",
"@crawlee/types": "file:./packages/types",
"@crawlee/utils": "file:./packages/utils",
"@crawlee/impit-client": "file:./packages/impit-client"
},
"overrides": {
"apify": {
"@crawlee/core": "file:./packages/core",
"@crawlee/types": "file:./packages/types",
"@crawlee/utils": "file:./packages/utils"
}
},
"devDependencies": {
"@apify/tsconfig": "^0.1.0",
"typescript": "^5.0.0"
},
"scripts": {
"start": "tsc && node main.js",
"start:prod": "node main.js",
"build": "tsc"
},
"type": "module",
"license": "ISC"
}
11 changes: 11 additions & 0 deletions test/e2e/cheerio-impit-ts/actor/tsconfig.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
{
"extends": "@apify/tsconfig",
"compilerOptions": {
"module": "ES2022",
"target": "ES2022",
"lib": ["DOM"],
"skipLibCheck": true,
"incremental": false
},
"include": ["./**/*.ts"]
}
18 changes: 18 additions & 0 deletions test/e2e/cheerio-impit-ts/test.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
import { initialize, getActorTestDir, runActor, expect } from '../tools.mjs';

const testActorDirname = getActorTestDir(import.meta.url);
await initialize(testActorDirname);

const { stats, datasetItems } = await runActor(testActorDirname);

await expect(stats.requestsFinished > 0, 'All requests finished');
await expect(datasetItems.length === 1, 'A dataset item was pushed');

const result = datasetItems[0];

expect(result.body.length > 1000, 'HTML response is not empty');
expect(result.title === 'httpbin.org', 'HTML title is correct');
console.log(result.userAgent);
expect(/Gecko\/\d{8} Firefox\/\d{2}/.test(result.userAgent), 'Impit correctly spoofs Firefox');
expect(result.uuidJsonResponse.uuid !== undefined, 'JSON response contains UUID');
expect(JSON.parse(result.uuidTextResponse).uuid !== undefined, 'Text response contains UUID');

0 comments on commit 200a029

Please sign in to comment.