From 0db008233f27d459fbbfbef11e57f664be843596 Mon Sep 17 00:00:00 2001 From: Carlos del Prado Date: Tue, 24 Nov 2020 16:57:21 +0000 Subject: [PATCH] doc: poc documentation --- .gitignore | 117 ++++++++++++++++++++++++++++++ README.md | 52 ++++++++++++- utils/json-generator/main.js | 20 +++++ utils/json-generator/package.json | 12 +++ 4 files changed, 200 insertions(+), 1 deletion(-) create mode 100644 utils/json-generator/main.js create mode 100644 utils/json-generator/package.json diff --git a/.gitignore b/.gitignore index 2106a33..d4b4c56 100644 --- a/.gitignore +++ b/.gitignore @@ -11,3 +11,120 @@ project/plugins/project/ .bsp .DS_Store data/* + +# Logs +logs +*.log +npm-debug.log* +yarn-debug.log* +yarn-error.log* +lerna-debug.log* + +# Diagnostic reports (https://nodejs.org/api/report.html) +report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json + +# Runtime data +pids +*.pid +*.seed +*.pid.lock + +# Directory for instrumented libs generated by jscoverage/JSCover +lib-cov + +# Coverage directory used by tools like istanbul +coverage +*.lcov + +# nyc test coverage +.nyc_output + +# Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files) +.grunt + +# Bower dependency directory (https://bower.io/) +bower_components + +# node-waf configuration +.lock-wscript + +# Compiled binary addons (https://nodejs.org/api/addons.html) +build/Release + +# Dependency directories +node_modules/ +jspm_packages/ + +# Snowpack dependency directory (https://snowpack.dev/) +web_modules/ + +# TypeScript cache +*.tsbuildinfo + +# Optional npm cache directory +.npm + +# Optional eslint cache +.eslintcache + +# Microbundle cache +.rpt2_cache/ +.rts2_cache_cjs/ +.rts2_cache_es/ +.rts2_cache_umd/ + +# Optional REPL history +.node_repl_history + +# Output of 'npm pack' +*.tgz + +# Yarn Integrity file +.yarn-integrity + +# dotenv environment variables file +.env +.env.test + +# parcel-bundler cache (https://parceljs.org/) +.cache +.parcel-cache + +# Next.js build output +.next +out + +# Nuxt.js build / generate output +.nuxt +dist + +# Gatsby files +.cache/ +# Comment in the public line in if your project uses Gatsby and not Next.js +# https://nextjs.org/blog/next-9-1#public-directory-support +# public + +# vuepress build output +.vuepress/dist + +# Serverless directories +.serverless/ + +# FuseBox cache +.fusebox/ + +# DynamoDB Local files +.dynamodb/ + +# TernJS port file +.tern-port + +# Stores VSCode versions used for testing VSCode extensions +.vscode-test + +# yarn v2 +.yarn/cache +.yarn/unplugged +.yarn/build-state.yml +.yarn/install-state.gz +.pnp.* diff --git a/README.md b/README.md index 6115982..d028a1f 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# baikal-api +# event-hub-kafka-poc POC of Producer/Consumer implementation for Azure EventHub Kafka interface. @@ -31,3 +31,53 @@ EventHub consumer/producer using Kafka interface - Consumer example: --mode=consumer --namespace=ns-test --eventhub=eh-test --sas="Endpoint=sb://ns-test.servicebus.windows.net/;SharedAccessKeyName=sas-keys;SharedAccessKey=xxx;EntityPath=eh-test" --output=/path/to/data/out.json ``` +## Getting started: EventHub Throughput POC + +1. Create a sample of data using the `utils/json-generator` util: +- Resolve util dependencies: `npm i utils/json-generator` +- Build data sample running: `node utils/json-generator/main.js -r 409600` +- Display util options: `node utils/json-generator/main.js help` +```bash +Usage: main.js [options] [command] + +Commands: + help Display help + +Options: + -h, --help Output usage information + -o, --out [value] Output file (defaults to "event-hub-kafka-poc/data/data.json") + -r, --records Amount of records (defaults to 409600; 409600 == 1GB) +``` + +2. Assembly application jar: +```bash +sbt assembly +``` +The output jar will be located in `dist/event-hub-kafka-poc.jar` + +3. Create an EventHub with manage shared access policies at the Azure developers console: + - Standard EventHub: + - 1 TU + - Enable Auto-Inflate + - Auto-Inflate Maximum Throughput Units: 20 + - Partition count: 32 + - Dedicated EventHub: + - 1 CU + - Partition count: 600 +4. Run application: +```bash +export NAMESPACE="namespace-name" +export EVENTHUB="eventhub-name" +export SASK_KEY="eventhub-shared-connection-string" + +java -jar /opt/event-hub-kafka-poc/event-hub-kafka-poc.jar \ + --mode=producer \ + --namespace=${NAMESPACE} \ + --eventhub=${EVENTHUB} \ + --sas="${SASK_KEY}" \ + --input=event-hub-kafka-poc/data/data.json +``` + +5. Check the EventHub namespace throughput metrics + + diff --git a/utils/json-generator/main.js b/utils/json-generator/main.js new file mode 100644 index 0000000..755007c --- /dev/null +++ b/utils/json-generator/main.js @@ -0,0 +1,20 @@ +#!/usr/bin/env node + +const faker = require('faker'); +const fs = require('fs'); +const args = require('args') + +args + .option('out', 'Output file', `${__dirname}/../../data/data.json`) + .option('records', 'Amount of records', 409600) + +const options = args.parse(process.argv) + +var stream = fs.createWriteStream(options.out, { flags: 'a' }); + +const records = Number(options.records) +for (let i = 0; i < records; i++) { + stream.write(JSON.stringify(faker.helpers.createCard()) + '\n'); +} + +stream.end(); diff --git a/utils/json-generator/package.json b/utils/json-generator/package.json new file mode 100644 index 0000000..9bd8d58 --- /dev/null +++ b/utils/json-generator/package.json @@ -0,0 +1,12 @@ +{ + "name": "json-generator", + "version": "0.1.0", + "description": "", + "main": "main.js", + "author": "", + "license": "ISC", + "dependencies": { + "args": "^5.0.1", + "faker": "^5.1.0" + } +}