From 106f5e0af226c9dded6bf0cba7622899bbe6624f Mon Sep 17 00:00:00 2001 From: Inokentii Mazhara Date: Mon, 24 Jun 2024 19:46:31 +0300 Subject: [PATCH 1/5] TW-1459 Try to add Cyren API with custom categories --- .env.dist | 1 + .gitignore | 3 + CustomCategoryDefinition.ini | 11 ++ CustomCategoryIndex.idx | 2 + Dockerfile | 2 +- Dockerfile.ctwsd | 14 ++ README.md | 20 ++- cc_delta_1 | 0 cc_snapshot_1.txt | 2 + fuflo1.js | 116 ++++++++++++++++ package.json | 1 + setup_ctwsd.sh | 20 +++ src/config.ts | 3 +- src/index.ts | 15 ++ src/utils/MutexProtectedData.ts | 4 +- src/utils/PromisifiedSemaphore.ts | 8 +- src/utils/SingleQueryDataProvider.ts | 4 +- src/utils/cyren-api.ts | 198 +++++++++++++++++++++++++++ src/utils/express-helpers.ts | 29 +++- src/utils/schemas.ts | 4 + yarn.lock | 5 + 21 files changed, 446 insertions(+), 16 deletions(-) create mode 100644 CustomCategoryDefinition.ini create mode 100644 CustomCategoryIndex.idx create mode 100644 Dockerfile.ctwsd create mode 100644 cc_delta_1 create mode 100644 cc_snapshot_1.txt create mode 100644 fuflo1.js create mode 100755 setup_ctwsd.sh create mode 100644 src/utils/cyren-api.ts diff --git a/.env.dist b/.env.dist index 5f4b337..ced22c9 100644 --- a/.env.dist +++ b/.env.dist @@ -10,3 +10,4 @@ THREE_ROUTE_API_AUTH_TOKEN= REDIS_URL= ADMIN_USERNAME= ADMIN_PASSWORD= +CTWSD_CONTAINER_ADDRESS= diff --git a/.gitignore b/.gitignore index 9fb2dbd..52c847d 100644 --- a/.gitignore +++ b/.gitignore @@ -105,3 +105,6 @@ dist # IDE .idea + +# ctwsd +ctwsd*/ diff --git a/CustomCategoryDefinition.ini b/CustomCategoryDefinition.ini new file mode 100644 index 0000000..a8e35e3 --- /dev/null +++ b/CustomCategoryDefinition.ini @@ -0,0 +1,11 @@ +[501] +Name = LCC501 +Description = Local Custom Category 501 + +[502] +Name = LCC502 +Description = Local Custom Category 502 + +[503] +Name = LCC503 +Description = Local Custom Category 503 \ No newline at end of file diff --git a/CustomCategoryIndex.idx b/CustomCategoryIndex.idx new file mode 100644 index 0000000..aa4121a --- /dev/null +++ b/CustomCategoryIndex.idx @@ -0,0 +1,2 @@ +/opt/tmp/cc_snapshot_1.txt +/opt/tmp/cc_delta_1 \ No newline at end of file diff --git a/Dockerfile b/Dockerfile index e4444be..e59b560 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM node:14 +FROM node:20 WORKDIR /usr/src/app COPY package.json yarn.lock ./ RUN yarn diff --git a/Dockerfile.ctwsd b/Dockerfile.ctwsd new file mode 100644 index 0000000..faa6443 --- /dev/null +++ b/Dockerfile.ctwsd @@ -0,0 +1,14 @@ +FROM --platform=linux/amd64 ubuntu:22.04 +WORKDIR /usr/src/app +ARG CTWSD_FTP_URL +ARG CTWSD_FTP_USER +ARG CTWSD_FTP_PASS +ARG CTWSD_SERVER_ADDRESS +ARG CTWSD_LICENSE_KEY +RUN mkdir -p /opt/tmp +COPY ./setup_ctwsd.sh /usr/src/app/ +COPY ./cc_* ./CustomCategoryDefinition.ini ./CustomCategoryIndex.idx /opt/tmp/ +RUN CTWSD_FTP_URL=${CTWSD_FTP_URL} CTWSD_FTP_USER=${CTWSD_FTP_USER} CTWSD_FTP_PASS=${CTWSD_FTP_PASS} \ + CTWSD_LICENSE_KEY=${CTWSD_LICENSE_KEY} CTWSD_SERVER_ADDRESS=${CTWSD_SERVER_ADDRESS} ./setup_ctwsd.sh +EXPOSE 8080 +CMD [ "./ctwsd/bin/ctwsd", "-i", "-c", "./ctwsd/bin/ctwsd.conf" ] diff --git a/README.md b/README.md index 2c21d2e..766ff4d 100644 --- a/README.md +++ b/README.md @@ -26,11 +26,11 @@ Use `yarn run build` for building. ## Building and running using Docker -Follow the instructions on [Building your image](https://nodejs.org/en/docs/guides/nodejs-docker-webapp/#building-your-image) and [Running the image](https://nodejs.org/en/docs/guides/nodejs-docker-webapp/#run-the-image). Change the port number in both `.env` and `Dockerfile` if the webapp should be run on another port than 3000. +Follow the instructions on [Building your image](https://nodejs.org/en/docs/guides/nodejs-docker-webapp/#building-your-image) and [Running the image](https://nodejs.org/en/docs/guides/nodejs-docker-webapp/#run-the-image). Change the port number in both `.env` and `Dockerfile` if the webapp should be run on another port than 3000. You also need to build and run a container with `ctwsd`, see "Building and running `ctwsd` host". ## Running with pm2 -You can run the built backend using `pm2 restart templewallet-backend.json`. +You can run the built backend using `pm2 restart templewallet-backend.json`. You also need to build and run a container with `ctwsd`, see "Building and running `ctwsd` host". ## Upstreaming using nginx @@ -67,3 +67,19 @@ location /api/exchange-rates/ { ``` Replace 3000 with the respective port number if the backend is listening on a different one. Restart nginx using `sudo systemctl restart nginx` after changes are saved. + +## Building and running `ctwsd` host + +1. Build the image with command +``` +docker build \ + --build-arg CTWSD_FTP_URL= \ + --build-arg CTWSD_FTP_USER= \ + --build-arg CTWSD_FTP_PASS= \ + --build-arg CTWSD_SERVER_ADDRESS= \ + --build-arg CTWSD_LICENSE_KEY=: \ + --file Dockerfile.ctwsd . +``` +OEM id is an arbitrary alphanumeric string with up to 35 characters. +2. Get image ID with command `docker image ls` +3. Start a container with the new image using command `docker container run -d -p :8080 ` diff --git a/cc_delta_1 b/cc_delta_1 new file mode 100644 index 0000000..e69de29 diff --git a/cc_snapshot_1.txt b/cc_snapshot_1.txt new file mode 100644 index 0000000..7dee448 --- /dev/null +++ b/cc_snapshot_1.txt @@ -0,0 +1,2 @@ +i www.ynet.co.il 503 +i www.walla.co.il 501,502 diff --git a/fuflo1.js b/fuflo1.js new file mode 100644 index 0000000..fb5d0de --- /dev/null +++ b/fuflo1.js @@ -0,0 +1,116 @@ +const sites = [ + "www.youtube.com", + "www.coingecko.com", + "etherscan.io", + /* "www.dextools.io", + "coinmarketcap.com", + "dexscreener.com", + "neuralwriter.com", */ + "bscscan.com", + "www.chess.com", + // "www.photopea.com", + "tzkt.io", + /* "polygonscan.com", + "www.freepik.com", */ + "rollercoin.com", + /* "mobalytics.gg", + "www.solitalian.it", + "stackoverflow.com", + "ezgif.com", + "udn.com", + "mail01.orange.fr", + "www.mlb.com", + "solscan.io", + "www.leagueofgraphs.com", + "captcha.bot", + "www.op.gg", + "www.sahibinden.com", + "www.fmkorea.com", + "www.sozcu.com.tr", + "www.dailymotion.com", + "www.leboncoin.fr", + "www.espn.com", + "www.geckoterminal.com", */ + "claimbits.net", + /* "quizlet.com", + "onlinezen.online", + "www.yahoo.com", + "www.pixilart.com", + "www.thingiverse.com", + "www.speedtest.net", + "www.quora.com", + "www.geny.com", */ + "www.w3schools.com", + /* "www.merriam-webster.com", + "universitieshub.store", + "eksisozluk.com", + "industi.online", + "slickdeals.net", + "socialcounts.org", + "www.ivoox.com", + "temp-mail.org", + "tw.news.yahoo.com", + "tw.nextapple.com", + "www.gazzetta.it", + "w2g.tv", + "devlopweb.online", + "www.dailymail.co.uk", + "www.dcard.tw", + "www.geeksforgeeks.org", + "basescan.org", + "pomofocus.io", + "tw.yahoo.com", + "deepai.org", + "5tars.io", + "as.com", + "blogchain.eu.org", + "vnexpress.net", + "tureng.com", + "autofaucet.dutchycorp.space", + "www.infobae.com", + "www.cricbuzz.com", + "www.cnn.com", + "dgb.lol", + "u.gg", + "inconvertiblemoney.online", + "www.procyclingstats.com", */ + "firefaucet.win", + "www.blockchain.com", + /* "www.marca.com", + "www.zillow.com", + "www.draftkings.com", + "www.javatpoint.com", + "www.publish0x.com", + "www.arkadium.com", + "kiddyearner.com", + "t.17track.net", + "www.coindesk.com", + "www.programiz.com", + "www.pixels.tips", + "genius.com", + "tw.stock.yahoo.com", + "www.ebay.com", + "poki.com", + "www.olx.ua", + "www.kleinanzeigen.de", + "www.ynet.co.il", + "www.news.com.au", + "earn-pepe.com", + "www.slideshare.net", */ + "www.chinatimes.com" +]; + +const axios = require('axios'); + +(async () => { + for (let i = 0; i < sites.length; i++) { + const site = sites[i]; + const t0 = Date.now(); + try { + const response = await axios.get(`http://localhost:3001/api/get-ad-category`, { params: { url: `https://${site}` } }); + console.log(site, `${Date.now() - t0}ms`, response.data.join(', ')); + } catch (e) { + console.error(site, e); + } + } +})(); diff --git a/package.json b/package.json index e7a064d..9354e00 100644 --- a/package.json +++ b/package.json @@ -33,6 +33,7 @@ "pino-http": "^5.5.0", "pino-pretty": "^4.7.1", "qs": "^6.10.3", + "rate-limiter-flexible": "^5.0.3", "semaphore": "^1.1.0", "semver": "^7.6.0", "swagger-jsdoc": "^6.2.8", diff --git a/setup_ctwsd.sh b/setup_ctwsd.sh new file mode 100755 index 0000000..e088774 --- /dev/null +++ b/setup_ctwsd.sh @@ -0,0 +1,20 @@ +#!/bin/bash + +CONFDIR=/opt/tmp +ESCAPED_CONFDIR=$(echo $CONFDIR | sed 's/\//\\\//g') +DEBIAN_FRONTEND=noninteractive apt-get update && apt-get install -y wget tar +wget $CTWSD_FTP_URL --user=$CTWSD_FTP_USER --password=$CTWSD_FTP_PASS --no-check-certificate -O ./ctwsd.tar.gz +tar -xzf ctwsd.tar.gz +rm ctwsd.tar.gz +mv ./ctwsd* ./ctwsd + +sed -i "s/ServerAddress \= xxxxxxxxx/ServerAddress = ${CTWSD_SERVER_ADDRESS}/" ./ctwsd/bin/ctwsd.conf +sed -i "s/LicenseKey \= xxxxxxxxx/LicenseKey = ${CTWSD_LICENSE_KEY}/" ./ctwsd/bin/ctwsd.conf +sed -i "s/#LocalCustomCategories-Enabled=0/LocalCustomCategories-Enabled=1/" ./ctwsd/bin/ctwsd.conf +sed -i "s/#CustomCategoriesCacheMaxEntries=10000/CustomCategoriesCacheMaxEntries=10000/" ./ctwsd/bin/ctwsd.conf +sed -i ${sed_mac_arg:+""} \ + "s/#LocalCustomCategories-Uri=/LocalCustomCategories-Uri=${ESCAPED_CONFDIR}\/CustomCategoryIndex.idx/" \ + ./ctwsd/bin/ctwsd.conf +sed -i ${sed_mac_arg:+""} \ + "s/#LocalCustomCategoriesDefinitionFileURI=/LocalCustomCategoriesDefinitionFileURI=${ESCAPED_CONFDIR}\/CustomCategoryDefinition.ini/" \ + ./ctwsd/bin/ctwsd.conf diff --git a/src/config.ts b/src/config.ts index f5f3a20..094a540 100644 --- a/src/config.ts +++ b/src/config.ts @@ -13,7 +13,8 @@ export const EnvVars = { REDIS_URL: getEnv('REDIS_URL'), ADMIN_USERNAME: getEnv('ADMIN_USERNAME'), ADMIN_PASSWORD: getEnv('ADMIN_PASSWORD'), - COVALENT_API_KEY: getEnv('COVALENT_API_KEY') + COVALENT_API_KEY: getEnv('COVALENT_API_KEY'), + CTWSD_CONTAINER_ADDRESS: getEnv('CTWSD_CONTAINER_ADDRESS') }; for (const name in EnvVars) { diff --git a/src/index.ts b/src/index.ts index 8b1d9ff..47fa0d4 100644 --- a/src/index.ts +++ b/src/index.ts @@ -30,11 +30,14 @@ import { getAliceBobEstimationPayload } from './utils/alice-bob/get-alice-bob-es import { getAliceBobOrderInfo } from './utils/alice-bob/get-alice-bob-order-info'; import { getAliceBobPairInfo } from './utils/alice-bob/get-alice-bob-pair-info'; import { getAliceBobPairsInfo } from './utils/alice-bob/get-alice-bob-pairs-info'; +import { getSiteCategories } from './utils/cyren-api'; import { CodedError } from './utils/errors'; +import { withCodedExceptionHandler, withQueryParamsValidation } from './utils/express-helpers'; import { coinGeckoTokens } from './utils/gecko-tokens'; import { getExternalApiErrorPayload, isDefined, isNonEmptyString } from './utils/helpers'; import logger from './utils/logger'; import { getSignedMoonPayUrl } from './utils/moonpay/get-signed-moonpay-url'; +import { adCategoryQueryParamsSchema } from './utils/schemas'; import { getSigningNonce } from './utils/signing-nonce'; import SingleQueryDataProvider from './utils/SingleQueryDataProvider'; import { tezExchangeRateProvider } from './utils/tezos'; @@ -66,6 +69,7 @@ const app = express(); app.use(pinoHttp(PINO_LOGGER)); app.use(cors()); app.use(bodyParser.json()); +app.use(bodyParser.urlencoded({ extended: true })); const androidApp = firebaseAdmin.initializeApp( { @@ -335,6 +339,17 @@ app.get('/api/advertising-info', (_req, res) => { app.use('/api/slise-ad-rules', adRulesRouter); +app.get( + '/api/get-ad-category', + withCodedExceptionHandler( + withQueryParamsValidation(adCategoryQueryParamsSchema, async (req, res) => { + const categories = await getSiteCategories(req.query.url); + + res.status(200).send(categories); + }) + ) +); + app.use('/api/evm', evmRouter); app.post('/api/magic-square-quest/start', async (req, res) => { diff --git a/src/utils/MutexProtectedData.ts b/src/utils/MutexProtectedData.ts index 5140731..d2cab85 100644 --- a/src/utils/MutexProtectedData.ts +++ b/src/utils/MutexProtectedData.ts @@ -7,8 +7,8 @@ export default class MutexProtectedData { this.mutex = new PromisifiedSemaphore(); } - exec(task: () => void | Promise) { - return this.mutex.exec(task); + exec(task: (data: T, setData: (value: T) => void) => U | Promise) { + return this.mutex.exec(() => task(this.data, value => void (this.data = value))); } setData(newData: T) { diff --git a/src/utils/PromisifiedSemaphore.ts b/src/utils/PromisifiedSemaphore.ts index af05643..a7ac435 100644 --- a/src/utils/PromisifiedSemaphore.ts +++ b/src/utils/PromisifiedSemaphore.ts @@ -15,12 +15,12 @@ export default class PromisifiedSemaphore { return this.semaphore.available(n); } - exec(task: () => void | Promise, n = 1) { - return new Promise((resolve, reject) => { + exec(task: () => T | Promise, n = 1) { + return new Promise((resolve, reject) => { this.semaphore.take(n, async () => { try { - await task(); - resolve(); + const value = await task(); + resolve(value); } catch (e) { reject(e); } finally { diff --git a/src/utils/SingleQueryDataProvider.ts b/src/utils/SingleQueryDataProvider.ts index 49bf65c..9e7b463 100644 --- a/src/utils/SingleQueryDataProvider.ts +++ b/src/utils/SingleQueryDataProvider.ts @@ -42,7 +42,9 @@ export default class SingleQueryDataProvider { ) { this.fetchMutex = new PromisifiedSemaphore(); this.readyMutex = new PromisifiedSemaphore(); - this.state = new MutexProtectedData({ error: new Error('This error should not be displayed') }); + this.state = new MutexProtectedData>({ + error: new Error('This error should not be displayed') + }); this.init(); } diff --git a/src/utils/cyren-api.ts b/src/utils/cyren-api.ts new file mode 100644 index 0000000..c1f2bc6 --- /dev/null +++ b/src/utils/cyren-api.ts @@ -0,0 +1,198 @@ +import axios from 'axios'; +import { StatusCodes } from 'http-status-codes'; +import { RateLimiterMemory, RateLimiterRedis } from 'rate-limiter-flexible'; + +import { EnvVars } from '../config'; +import { redisClient } from '../redis'; +import { CodedError } from './errors'; +import MutexProtectedData from './MutexProtectedData'; +import SingleQueryDataProvider from './SingleQueryDataProvider'; + +type ResponseErrorBlock = [[string]]; +type ResponseDataBlock = [string, string][]; +type ResponseBlock = ResponseErrorBlock | ResponseDataBlock; +type ParsedCyrenApiErrorResponse = [ResponseDataBlock, ResponseErrorBlock]; +type ParsedCyrenApiSuccessResponse = [ResponseDataBlock, ...ResponseDataBlock[]]; +type ParsedCyrenApiResponse = ParsedCyrenApiErrorResponse | ParsedCyrenApiSuccessResponse; + +interface SiteCategory { + id: number; + name: string; + description: string; +} + +const isErrorResponse = (response: ParsedCyrenApiResponse): response is ParsedCyrenApiErrorResponse => + response[1][0].length === 1; + +const secondlyRateLimiter = new RateLimiterMemory({ + points: 50, + duration: 1, + keyPrefix: 'secondlyRateLimiter' +}); + +const dailyRateLimiter = new RateLimiterRedis({ + storeClient: redisClient, + keyPrefix: 'dailyRateLimiter', + points: 5000, + duration: 60 * 60 * 24 +}); + +const cyrenApi = axios.create({ + baseURL: `${EnvVars.CTWSD_CONTAINER_ADDRESS}`, + headers: { + Accept: '*/*', + 'Accept-Language': 'en-us', + 'User-Agent': 'Cyren HTTP Client', + 'Content-Type': 'text/plain' + } +}); + +const requestIdProxy = new MutexProtectedData(0); +const getRequestId = () => + requestIdProxy.exec((data, setData) => { + setData((data + 1) % Number.MAX_SAFE_INTEGER); + + return data; + }); + +const parseCyrenApiResponse = (response: string) => { + let remainingLines = response.split('\n').map(line => line.trim()); + const blocks: ResponseBlock[] = []; + while (remainingLines.length > 0) { + const emptyLineIndex = remainingLines.indexOf(''); + const blockLines = emptyLineIndex === -1 ? remainingLines : remainingLines.slice(0, emptyLineIndex); + const block = blockLines.map((line): [string] | [string, string] => { + if (line.startsWith('x-ctch')) { + const separatorIndex = line.indexOf(':'); + + return [line.slice(0, separatorIndex).trim(), line.slice(separatorIndex + 1).trim()]; + } + + return [line]; + }) as ResponseBlock; + if (block.length > 0) { + blocks.push(block); + } + remainingLines = emptyLineIndex === -1 ? [] : remainingLines.slice(emptyLineIndex + 1); + } + + return blocks as ParsedCyrenApiResponse; +}; + +const categoriesProvider = new SingleQueryDataProvider(Infinity, async () => { + const response = await cyrenApi.post( + '/ctwsd/websec', + `x-ctch-request-id: ${await getRequestId()} +x-ctch-request-type: getcatlist +x-ctch-pver: 1.0 +` + ); + const parsedResponse = parseCyrenApiResponse(response.data); + + if (isErrorResponse(parsedResponse)) { + throw new Error(`Failed to fetch categories: ${parsedResponse[1][0]}`); + } + + const [, defaultCategoriesBlock, customCategoriesBlock = [['x-ctch-custom-cat-count', '0']]] = parsedResponse; + + const [defaultCategories, customCategories] = [defaultCategoriesBlock, customCategoriesBlock].map( + ([counterLine, ...restLines]) => { + const categoriesCount = Number.parseInt(counterLine[1]); + + const categories: SiteCategory[] = []; + for (let i = 0; i < categoriesCount; i++) { + const category = { + id: 0, + name: '', + description: '' + }; + const dataLines = restLines.slice(i * 3, (i + 1) * 3); + dataLines.forEach(([name, value]) => { + switch (name) { + case 'x-ctch-cat-id': + category.id = Number.parseInt(value); + break; + case 'x-ctch-cat-name': + category.name = value; + break; + default: + category.description = value; + } + }); + categories.push(category); + } + + return Object.fromEntries(categories.map(category => [category.id, category])); + } + ); + + return { defaultCategories, customCategories }; +}); + +export const getSiteCategories = async (url: string) => { + const { data: categories, error } = await categoriesProvider.getState(); + + if (error) { + throw error; + } + + try { + await secondlyRateLimiter.consume('twbackend', 1); + await dailyRateLimiter.consume('twbackend', 1); + + const response = await cyrenApi.post( + '/ctwsd/websec', + `x-ctch-request-id: ${await getRequestId()} +x-ctch-request-type: classifyurl +x-ctch-pver: 1.0 + +x-ctch-url: ${url}` + ); + const parsedResponse = parseCyrenApiResponse(response.data); + + if (isErrorResponse(parsedResponse)) { + throw new Error(`Failed to classify URL: ${parsedResponse[1][0]}`); + } + + const { defaultCategories, customCategories } = categories; + const [, categoriesBlock] = parsedResponse; + const siteCategories: string[] = []; + let cacheWasUsed = true; + + categoriesBlock.forEach(([name, value]) => { + switch (name) { + case 'x-ctch-categories': + siteCategories.push( + ...value + .split(',') + .map(categoryId => defaultCategories[categoryId]?.name) + .filter(Boolean) + ); + break; + case 'x-ctch-flags': + cacheWasUsed = Number.parseInt(value) % 2 === 0; + break; + default: + siteCategories.push( + ...value + .split(',') + .map(categoryId => customCategories[categoryId]?.name) + .filter(Boolean) + ); + } + }); + + if (cacheWasUsed) { + await secondlyRateLimiter.reward('twbackend', 1); + await dailyRateLimiter.reward('twbackend', 1); + } + + return siteCategories; + } catch (e) { + if (e instanceof Error) { + throw e; + } + + throw new CodedError(StatusCodes.TOO_MANY_REQUESTS, 'Too Many Requests'); + } +}; diff --git a/src/utils/express-helpers.ts b/src/utils/express-helpers.ts index 9ad300f..ebbcdfc 100644 --- a/src/utils/express-helpers.ts +++ b/src/utils/express-helpers.ts @@ -1,4 +1,5 @@ import { NextFunction, Request, RequestHandler, Response, Router } from 'express'; +import { ParsedQs } from 'qs'; import { ArraySchema as IArraySchema, ObjectSchema as IObjectSchema, Schema, ValidationError } from 'yup'; import { basicAuth } from '../middlewares/basic-auth.middleware'; @@ -13,11 +14,7 @@ interface ObjectStorageMethods { removeValues: (keys: string[]) => Promise; } -type TypedBodyRequestHandler = ( - req: Request, unknown, T>, - res: Response, - next: NextFunction -) => void; +type TypedBodyRequestHandler = (req: Request, res: Response, next: NextFunction) => void; export const withBodyValidation = (schema: Schema, handler: TypedBodyRequestHandler): RequestHandler => @@ -35,6 +32,28 @@ export const withBodyValidation = return handler(req, res, next); }; +type TypedQueryParamsRequestHandler = ( + req: Request, + res: Response, + next: NextFunction +) => void; + +export const withQueryParamsValidation = + (schema: Schema, handler: TypedQueryParamsRequestHandler): RequestHandler => + async (req, res, next) => { + try { + req.query = await schema.validate(req.query); + } catch (error) { + if (error instanceof ValidationError) { + return res.status(400).send({ error: error.message }); + } + + throw error; + } + + return handler(req as Request & { query: ParsedQs & T }, res, next); + }; + interface EvmQueryParams { walletAddress: string; chainId: string; diff --git a/src/utils/schemas.ts b/src/utils/schemas.ts index 21e0253..0b4df57 100644 --- a/src/utils/schemas.ts +++ b/src/utils/schemas.ts @@ -231,3 +231,7 @@ export const replaceUrlsBlacklistDictionarySchema: IObjectSchema = objectSchema() + .shape({ url: stringSchema().url().required() }) + .required(); diff --git a/yarn.lock b/yarn.lock index ac2c2e5..f78c1d0 100644 --- a/yarn.lock +++ b/yarn.lock @@ -4089,6 +4089,11 @@ range-parser@~1.2.1: resolved "https://registry.yarnpkg.com/range-parser/-/range-parser-1.2.1.tgz#3cf37023d199e1c24d1a55b84800c2f3e6468031" integrity sha512-Hrgsx+orqoygnmhFbKaHE6c296J+HTAQXoxEF6gNupROmmGJRoyzfG3ccAveqCBrwr/2yxQ5BVd/GTl5agOwSg== +rate-limiter-flexible@^5.0.3: + version "5.0.3" + resolved "https://registry.yarnpkg.com/rate-limiter-flexible/-/rate-limiter-flexible-5.0.3.tgz#bfbfd7585e09073ebe22d177126116862b1024ae" + integrity sha512-lWx2y8NBVlTOLPyqs+6y7dxfEpT6YFqKy3MzWbCy95sTTOhOuxufP2QvRyOHpfXpB9OUJPbVLybw3z3AVAS5fA== + raw-body@2.5.1: version "2.5.1" resolved "https://registry.yarnpkg.com/raw-body/-/raw-body-2.5.1.tgz#fe1b1628b181b700215e5fd42389f98b71392857" From d65c892bec9fd6d95a99af90208c8c253ffeb627 Mon Sep 17 00:00:00 2001 From: Inokentii Mazhara Date: Tue, 25 Jun 2024 09:11:07 +0300 Subject: [PATCH 2/5] TW-1459 Try to add 'Crypto' category --- CustomCategoryDefinition.ini | 12 ++--------- cc_snapshot_1.txt | 39 ++++++++++++++++++++++++++++++++++-- 2 files changed, 39 insertions(+), 12 deletions(-) diff --git a/CustomCategoryDefinition.ini b/CustomCategoryDefinition.ini index a8e35e3..0111117 100644 --- a/CustomCategoryDefinition.ini +++ b/CustomCategoryDefinition.ini @@ -1,11 +1,3 @@ [501] -Name = LCC501 -Description = Local Custom Category 501 - -[502] -Name = LCC502 -Description = Local Custom Category 502 - -[503] -Name = LCC503 -Description = Local Custom Category 503 \ No newline at end of file +Name = Crypto +Description = Cryptocurrencies, blockchain and so on \ No newline at end of file diff --git a/cc_snapshot_1.txt b/cc_snapshot_1.txt index 7dee448..f72e451 100644 --- a/cc_snapshot_1.txt +++ b/cc_snapshot_1.txt @@ -1,2 +1,37 @@ -i www.ynet.co.il 503 -i www.walla.co.il 501,502 +ieh ether|bsc|blockchain|bitcoin|btc|^(www\.)?dex|litecoin|ripple|crypto|solana|altcoin|stablecoin|arbitrum|xtz|dapp|airdrop 501 +i autofaucet.dutchycorp.space 501 +i basescan.org 501 +i bitdegree.org 501 +i blockchair.com 501 +i blockworks.co 501 +i coin360.com 501 +i coinarbitragebot.com 501 +i coincarp.com 501 +i coincheckup.com 501 +i coincodex.com 501 +i coincost.net 501 +i coinedition.com 501 +i coingape.com 501 +i coingolive.com 501 +i coinlisting.info 501 +i coinmarketcap.com 501 +i coinpaprika.com 501 +i coinpedia.org 501 +i coinspeaker.com 501 +i cointelegraph.com 501 +i cryptfaucet.com 501 +i decrypt.co 501 +i earn-pepe.com 501 +i eth-converter.com 501 +i nft.news 501 +i polygonscan.com 501 +i rollercoin.com 501 +i solscan.io 501 +i theblock.co 501 +i thecoinrise.com 501 +i tzkt.io 501 +i u.today 501 +i walletinvestor.com 501 +i www.coindesk.com 501 +i www.coingecko.com 501 +i www.geckoterminal.com 501 \ No newline at end of file From 6e2af7f8451dee7754ff278b1e9e7f1b48c06f82 Mon Sep 17 00:00:00 2001 From: Inokentii Mazhara Date: Tue, 25 Jun 2024 10:43:58 +0300 Subject: [PATCH 3/5] TW-1459 Finish the setup of CTWSD demo --- .gitignore | 3 - CustomCategoryDefinition.ini | 3 - Dockerfile.ctwsd | 2 +- ctwsd-config/CustomCategoryDefinition.ini | 3 + .../CustomCategoryIndex.idx | 0 cc_delta_1 => ctwsd-config/cc_delta_1 | 0 .../cc_snapshot_1.txt | 0 src/utils/cyren-api.ts | 152 +++++++++++++----- 8 files changed, 112 insertions(+), 51 deletions(-) delete mode 100644 CustomCategoryDefinition.ini create mode 100644 ctwsd-config/CustomCategoryDefinition.ini rename CustomCategoryIndex.idx => ctwsd-config/CustomCategoryIndex.idx (100%) rename cc_delta_1 => ctwsd-config/cc_delta_1 (100%) rename cc_snapshot_1.txt => ctwsd-config/cc_snapshot_1.txt (100%) diff --git a/.gitignore b/.gitignore index 52c847d..9fb2dbd 100644 --- a/.gitignore +++ b/.gitignore @@ -105,6 +105,3 @@ dist # IDE .idea - -# ctwsd -ctwsd*/ diff --git a/CustomCategoryDefinition.ini b/CustomCategoryDefinition.ini deleted file mode 100644 index 0111117..0000000 --- a/CustomCategoryDefinition.ini +++ /dev/null @@ -1,3 +0,0 @@ -[501] -Name = Crypto -Description = Cryptocurrencies, blockchain and so on \ No newline at end of file diff --git a/Dockerfile.ctwsd b/Dockerfile.ctwsd index faa6443..82a7b75 100644 --- a/Dockerfile.ctwsd +++ b/Dockerfile.ctwsd @@ -7,7 +7,7 @@ ARG CTWSD_SERVER_ADDRESS ARG CTWSD_LICENSE_KEY RUN mkdir -p /opt/tmp COPY ./setup_ctwsd.sh /usr/src/app/ -COPY ./cc_* ./CustomCategoryDefinition.ini ./CustomCategoryIndex.idx /opt/tmp/ +COPY ./ctwsd-config/* /opt/tmp/ RUN CTWSD_FTP_URL=${CTWSD_FTP_URL} CTWSD_FTP_USER=${CTWSD_FTP_USER} CTWSD_FTP_PASS=${CTWSD_FTP_PASS} \ CTWSD_LICENSE_KEY=${CTWSD_LICENSE_KEY} CTWSD_SERVER_ADDRESS=${CTWSD_SERVER_ADDRESS} ./setup_ctwsd.sh EXPOSE 8080 diff --git a/ctwsd-config/CustomCategoryDefinition.ini b/ctwsd-config/CustomCategoryDefinition.ini new file mode 100644 index 0000000..47219c2 --- /dev/null +++ b/ctwsd-config/CustomCategoryDefinition.ini @@ -0,0 +1,3 @@ +[501] +Name = Crypto +Description = Cryptocurrencies, blockchain, and so on \ No newline at end of file diff --git a/CustomCategoryIndex.idx b/ctwsd-config/CustomCategoryIndex.idx similarity index 100% rename from CustomCategoryIndex.idx rename to ctwsd-config/CustomCategoryIndex.idx diff --git a/cc_delta_1 b/ctwsd-config/cc_delta_1 similarity index 100% rename from cc_delta_1 rename to ctwsd-config/cc_delta_1 diff --git a/cc_snapshot_1.txt b/ctwsd-config/cc_snapshot_1.txt similarity index 100% rename from cc_snapshot_1.txt rename to ctwsd-config/cc_snapshot_1.txt diff --git a/src/utils/cyren-api.ts b/src/utils/cyren-api.ts index c1f2bc6..96b90d1 100644 --- a/src/utils/cyren-api.ts +++ b/src/utils/cyren-api.ts @@ -1,10 +1,14 @@ import axios from 'axios'; +import { readFile } from 'fs/promises'; import { StatusCodes } from 'http-status-codes'; +import path from 'path'; import { RateLimiterMemory, RateLimiterRedis } from 'rate-limiter-flexible'; import { EnvVars } from '../config'; import { redisClient } from '../redis'; import { CodedError } from './errors'; +import { isDefined } from './helpers'; +import logger from './logger'; import MutexProtectedData from './MutexProtectedData'; import SingleQueryDataProvider from './SingleQueryDataProvider'; @@ -21,6 +25,9 @@ interface SiteCategory { description: string; } +const iniFileCategoryIdRegex = /^\[(\d+)\]/; +const iniFilePropertyRegex = /^(\w+)\s*=\s*(.+)/; + const isErrorResponse = (response: ParsedCyrenApiResponse): response is ParsedCyrenApiErrorResponse => response[1][0].length === 1; @@ -56,24 +63,29 @@ const getRequestId = () => }); const parseCyrenApiResponse = (response: string) => { - let remainingLines = response.split('\n').map(line => line.trim()); const blocks: ResponseBlock[] = []; - while (remainingLines.length > 0) { - const emptyLineIndex = remainingLines.indexOf(''); - const blockLines = emptyLineIndex === -1 ? remainingLines : remainingLines.slice(0, emptyLineIndex); - const block = blockLines.map((line): [string] | [string, string] => { - if (line.startsWith('x-ctch')) { - const separatorIndex = line.indexOf(':'); - - return [line.slice(0, separatorIndex).trim(), line.slice(separatorIndex + 1).trim()]; - } + let currentBlock: ([string] | [string, string])[] = []; + + response.split('\n').forEach(line => { + const trimmedLine = line.trim(); + + if (trimmedLine === '' && currentBlock.length > 0) { + blocks.push(currentBlock as ResponseDataBlock); + currentBlock = []; + } else if (trimmedLine.startsWith('x-ctch')) { + const separatorIndex = trimmedLine.indexOf(':'); + const parsedBlockLine: [string, string] = [ + trimmedLine.slice(0, separatorIndex).trim(), + trimmedLine.slice(separatorIndex + 1).trim() + ]; - return [line]; - }) as ResponseBlock; - if (block.length > 0) { - blocks.push(block); + currentBlock.push(parsedBlockLine); + } else { + currentBlock.push([trimmedLine]); } - remainingLines = emptyLineIndex === -1 ? [] : remainingLines.slice(emptyLineIndex + 1); + }); + if (currentBlock.length > 0) { + blocks.push(currentBlock as ResponseDataBlock); } return blocks as ParsedCyrenApiResponse; @@ -93,43 +105,95 @@ x-ctch-pver: 1.0 throw new Error(`Failed to fetch categories: ${parsedResponse[1][0]}`); } - const [, defaultCategoriesBlock, customCategoriesBlock = [['x-ctch-custom-cat-count', '0']]] = parsedResponse; - - const [defaultCategories, customCategories] = [defaultCategoriesBlock, customCategoriesBlock].map( - ([counterLine, ...restLines]) => { - const categoriesCount = Number.parseInt(counterLine[1]); - - const categories: SiteCategory[] = []; - for (let i = 0; i < categoriesCount; i++) { - const category = { - id: 0, - name: '', - description: '' - }; - const dataLines = restLines.slice(i * 3, (i + 1) * 3); - dataLines.forEach(([name, value]) => { - switch (name) { - case 'x-ctch-cat-id': - category.id = Number.parseInt(value); - break; - case 'x-ctch-cat-name': - category.name = value; - break; - default: - category.description = value; - } - }); - categories.push(category); + // TODO: use the block with custom categories and remove parsing ini file after they are fixed + const [, defaultCategoriesBlock] = parsedResponse; + const [counterLine, ...restLines] = defaultCategoriesBlock; + + const categoriesCount = Number.parseInt(counterLine[1]); + + const categories: SiteCategory[] = []; + for (let i = 0; i < categoriesCount; i++) { + const category = { + id: 0, + name: '', + description: '' + }; + const dataLines = restLines.slice(i * 3, (i + 1) * 3); + dataLines.forEach(([name, value]) => { + switch (name) { + case 'x-ctch-cat-id': + category.id = Number.parseInt(value); + break; + case 'x-ctch-cat-name': + category.name = value; + break; + default: + category.description = value; } + }); + categories.push(category); + } - return Object.fromEntries(categories.map(category => [category.id, category])); - } + const defaultCategories = Object.fromEntries(categories.map(category => [category.id, category])); + + const rawCustomCategoryDefinitions = await readFile( + path.resolve(__dirname, '../../ctwsd-config/CustomCategoryDefinition.ini'), + { encoding: 'utf-8' } ); + const customCategories: Record = {}; + let nextCategory: SiteCategory = { + id: -1, + name: '', + description: '' + }; + rawCustomCategoryDefinitions.split('\n').forEach(line => { + const trimmedLine = line.trim(); + + if (trimmedLine === '' && nextCategory.id !== -1 && nextCategory.name !== '') { + customCategories[nextCategory.id] = nextCategory; + nextCategory = { + id: -1, + name: '', + description: '' + }; + + return; + } + + const categoryIdExecResult = iniFileCategoryIdRegex.exec(trimmedLine); + if (isDefined(categoryIdExecResult)) { + nextCategory.id = Number.parseInt(categoryIdExecResult[1]); + + return; + } + + const propertyExecResult = iniFilePropertyRegex.exec(trimmedLine); + if (propertyExecResult) { + switch (propertyExecResult[1]) { + case 'Name': + nextCategory.name = propertyExecResult[2]; + break; + case 'Description': + nextCategory.description = propertyExecResult[2]; + break; + default: + logger.warn(`Unknown property in custom category definition: ${propertyExecResult[1]}`); + } + } + }); + if (nextCategory.id !== -1 && nextCategory.name !== '') { + customCategories[nextCategory.id] = nextCategory; + } return { defaultCategories, customCategories }; }); export const getSiteCategories = async (url: string) => { + // TODO: remove this after the issue with HTTPS URLs is fixed + if (url.startsWith('https://')) { + url = url.slice(8); + } + const { data: categories, error } = await categoriesProvider.getState(); if (error) { From e183cce51b3ed08c2664ebe8b5f3e96958aaf75e Mon Sep 17 00:00:00 2001 From: Inokentii Mazhara Date: Tue, 25 Jun 2024 11:32:32 +0300 Subject: [PATCH 4/5] TW-1459 Remove an unused script --- fuflo1.js | 116 ------------------------------------------------------ 1 file changed, 116 deletions(-) delete mode 100644 fuflo1.js diff --git a/fuflo1.js b/fuflo1.js deleted file mode 100644 index fb5d0de..0000000 --- a/fuflo1.js +++ /dev/null @@ -1,116 +0,0 @@ -const sites = [ - "www.youtube.com", - "www.coingecko.com", - "etherscan.io", - /* "www.dextools.io", - "coinmarketcap.com", - "dexscreener.com", - "neuralwriter.com", */ - "bscscan.com", - "www.chess.com", - // "www.photopea.com", - "tzkt.io", - /* "polygonscan.com", - "www.freepik.com", */ - "rollercoin.com", - /* "mobalytics.gg", - "www.solitalian.it", - "stackoverflow.com", - "ezgif.com", - "udn.com", - "mail01.orange.fr", - "www.mlb.com", - "solscan.io", - "www.leagueofgraphs.com", - "captcha.bot", - "www.op.gg", - "www.sahibinden.com", - "www.fmkorea.com", - "www.sozcu.com.tr", - "www.dailymotion.com", - "www.leboncoin.fr", - "www.espn.com", - "www.geckoterminal.com", */ - "claimbits.net", - /* "quizlet.com", - "onlinezen.online", - "www.yahoo.com", - "www.pixilart.com", - "www.thingiverse.com", - "www.speedtest.net", - "www.quora.com", - "www.geny.com", */ - "www.w3schools.com", - /* "www.merriam-webster.com", - "universitieshub.store", - "eksisozluk.com", - "industi.online", - "slickdeals.net", - "socialcounts.org", - "www.ivoox.com", - "temp-mail.org", - "tw.news.yahoo.com", - "tw.nextapple.com", - "www.gazzetta.it", - "w2g.tv", - "devlopweb.online", - "www.dailymail.co.uk", - "www.dcard.tw", - "www.geeksforgeeks.org", - "basescan.org", - "pomofocus.io", - "tw.yahoo.com", - "deepai.org", - "5tars.io", - "as.com", - "blogchain.eu.org", - "vnexpress.net", - "tureng.com", - "autofaucet.dutchycorp.space", - "www.infobae.com", - "www.cricbuzz.com", - "www.cnn.com", - "dgb.lol", - "u.gg", - "inconvertiblemoney.online", - "www.procyclingstats.com", */ - "firefaucet.win", - "www.blockchain.com", - /* "www.marca.com", - "www.zillow.com", - "www.draftkings.com", - "www.javatpoint.com", - "www.publish0x.com", - "www.arkadium.com", - "kiddyearner.com", - "t.17track.net", - "www.coindesk.com", - "www.programiz.com", - "www.pixels.tips", - "genius.com", - "tw.stock.yahoo.com", - "www.ebay.com", - "poki.com", - "www.olx.ua", - "www.kleinanzeigen.de", - "www.ynet.co.il", - "www.news.com.au", - "earn-pepe.com", - "www.slideshare.net", */ - "www.chinatimes.com" -]; - -const axios = require('axios'); - -(async () => { - for (let i = 0; i < sites.length; i++) { - const site = sites[i]; - const t0 = Date.now(); - try { - const response = await axios.get(`http://localhost:3001/api/get-ad-category`, { params: { url: `https://${site}` } }); - console.log(site, `${Date.now() - t0}ms`, response.data.join(', ')); - } catch (e) { - console.error(site, e); - } - } -})(); From 948017d5703a5f96dbc89346bf70b01819d29104 Mon Sep 17 00:00:00 2001 From: Inokentii Mazhara Date: Tue, 25 Jun 2024 13:40:52 +0300 Subject: [PATCH 5/5] TW-1459 Simplify Docker file for CTWSD container --- Dockerfile.ctwsd | 4 +--- ctwsd-config/CustomCategoryIndex.idx | 4 ++-- setup_ctwsd.sh | 27 ++++++++++++++------------- 3 files changed, 17 insertions(+), 18 deletions(-) diff --git a/Dockerfile.ctwsd b/Dockerfile.ctwsd index 82a7b75..4113499 100644 --- a/Dockerfile.ctwsd +++ b/Dockerfile.ctwsd @@ -5,9 +5,7 @@ ARG CTWSD_FTP_USER ARG CTWSD_FTP_PASS ARG CTWSD_SERVER_ADDRESS ARG CTWSD_LICENSE_KEY -RUN mkdir -p /opt/tmp -COPY ./setup_ctwsd.sh /usr/src/app/ -COPY ./ctwsd-config/* /opt/tmp/ +COPY ./setup_ctwsd.sh ./ctwsd-config/* /usr/src/app/ RUN CTWSD_FTP_URL=${CTWSD_FTP_URL} CTWSD_FTP_USER=${CTWSD_FTP_USER} CTWSD_FTP_PASS=${CTWSD_FTP_PASS} \ CTWSD_LICENSE_KEY=${CTWSD_LICENSE_KEY} CTWSD_SERVER_ADDRESS=${CTWSD_SERVER_ADDRESS} ./setup_ctwsd.sh EXPOSE 8080 diff --git a/ctwsd-config/CustomCategoryIndex.idx b/ctwsd-config/CustomCategoryIndex.idx index aa4121a..e1cc893 100644 --- a/ctwsd-config/CustomCategoryIndex.idx +++ b/ctwsd-config/CustomCategoryIndex.idx @@ -1,2 +1,2 @@ -/opt/tmp/cc_snapshot_1.txt -/opt/tmp/cc_delta_1 \ No newline at end of file +/usr/src/app/cc_snapshot_1.txt +/usr/src/app/cc_delta_1 \ No newline at end of file diff --git a/setup_ctwsd.sh b/setup_ctwsd.sh index e088774..796e4c2 100755 --- a/setup_ctwsd.sh +++ b/setup_ctwsd.sh @@ -1,20 +1,21 @@ #!/bin/bash -CONFDIR=/opt/tmp -ESCAPED_CONFDIR=$(echo $CONFDIR | sed 's/\//\\\//g') -DEBIAN_FRONTEND=noninteractive apt-get update && apt-get install -y wget tar +CC_CONFDIR=/usr/src/app +ESCAPED_CC_CONFDIR=$(echo $CC_CONFDIR | sed 's/\//\\\//g') +DEBIAN_FRONTEND=noninteractive apt-get update && apt-get install -y wget wget $CTWSD_FTP_URL --user=$CTWSD_FTP_USER --password=$CTWSD_FTP_PASS --no-check-certificate -O ./ctwsd.tar.gz tar -xzf ctwsd.tar.gz rm ctwsd.tar.gz mv ./ctwsd* ./ctwsd -sed -i "s/ServerAddress \= xxxxxxxxx/ServerAddress = ${CTWSD_SERVER_ADDRESS}/" ./ctwsd/bin/ctwsd.conf -sed -i "s/LicenseKey \= xxxxxxxxx/LicenseKey = ${CTWSD_LICENSE_KEY}/" ./ctwsd/bin/ctwsd.conf -sed -i "s/#LocalCustomCategories-Enabled=0/LocalCustomCategories-Enabled=1/" ./ctwsd/bin/ctwsd.conf -sed -i "s/#CustomCategoriesCacheMaxEntries=10000/CustomCategoriesCacheMaxEntries=10000/" ./ctwsd/bin/ctwsd.conf -sed -i ${sed_mac_arg:+""} \ - "s/#LocalCustomCategories-Uri=/LocalCustomCategories-Uri=${ESCAPED_CONFDIR}\/CustomCategoryIndex.idx/" \ - ./ctwsd/bin/ctwsd.conf -sed -i ${sed_mac_arg:+""} \ - "s/#LocalCustomCategoriesDefinitionFileURI=/LocalCustomCategoriesDefinitionFileURI=${ESCAPED_CONFDIR}\/CustomCategoryDefinition.ini/" \ - ./ctwsd/bin/ctwsd.conf +CONF_FILE_PATH=./ctwsd/bin/ctwsd.conf +CC_CACHE_MAX_ENTRIES=10000 +sed -i "s/ServerAddress \= xxxxxxxxx/ServerAddress = ${CTWSD_SERVER_ADDRESS}/" $CONF_FILE_PATH +sed -i "s/LicenseKey \= xxxxxxxxx/LicenseKey = ${CTWSD_LICENSE_KEY}/" $CONF_FILE_PATH +sed -i "s/#LocalCustomCategories-Enabled=0/LocalCustomCategories-Enabled=1/" $CONF_FILE_PATH +sed -i "s/#CustomCategoriesCacheMaxEntries=10000/CustomCategoriesCacheMaxEntries=${CC_CACHE_MAX_ENTRIES}/" \ + $CONF_FILE_PATH +sed -i "s/#LocalCustomCategories-Uri=/LocalCustomCategories-Uri=${ESCAPED_CC_CONFDIR}\/CustomCategoryIndex.idx/" \ + $CONF_FILE_PATH +sed -i "s/#LocalCustomCategoriesDefinitionFileURI=/LocalCustomCategoriesDefinitionFileURI=\ +${ESCAPED_CC_CONFDIR}\/CustomCategoryDefinition.ini/" $CONF_FILE_PATH