-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit 131d47a
Showing
21 changed files
with
12,495 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
node_modules | ||
dist |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
{ | ||
"root": true, | ||
"parser": "@typescript-eslint/parser", | ||
"parserOptions": { | ||
"project": "./tsconfig.json" | ||
}, | ||
"plugins": ["@typescript-eslint", "prettier"], | ||
"extends": [ | ||
"eslint:recommended", | ||
"plugin:@typescript-eslint/eslint-recommended", | ||
"plugin:@typescript-eslint/recommended", | ||
"prettier" | ||
], | ||
"env": { | ||
"jest": true | ||
}, | ||
"rules": { | ||
"no-console": 1, // Means warning | ||
"prettier/prettier": 2, // Means error | ||
"@typescript-eslint/ban-ts-comment": "off", | ||
"@typescript-eslint/no-floating-promises": ["error"], | ||
"@typescript-eslint/no-misused-promises": ["error"], | ||
"@typescript-eslint/promise-function-async": ["error"], | ||
"@typescript-eslint/require-await": ["error"], | ||
// note you must disable the base rule as it can report incorrect errors | ||
"no-return-await": "off", | ||
"@typescript-eslint/return-await": ["error"], | ||
// Don't allow awaiting non-Promises | ||
"@typescript-eslint/await-thenable": "error" | ||
}, | ||
"ignorePatterns": ["dist", "cdk.out"] | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
name: Build - CI | ||
|
||
on: | ||
push: | ||
branches: | ||
- main | ||
pull_request: | ||
branches: | ||
- main | ||
|
||
jobs: | ||
build: | ||
runs-on: ubuntu-latest | ||
steps: | ||
- name: Checkout code | ||
uses: actions/checkout@v3 | ||
|
||
- name: Use Node.js 16 | ||
uses: actions/setup-node@v3 | ||
with: | ||
node-version: 16 | ||
|
||
- name: Install Modules | ||
run: npm ci | ||
|
||
- name: Build | ||
run: npm run build | ||
|
||
- name: Build Docs | ||
run: npm run build:docs | ||
|
||
- name: Lint | ||
run: npm run lint | ||
|
||
- name: Test | ||
run: npm run test |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
name: Publish Docs | ||
|
||
on: | ||
release: | ||
types: [published] | ||
|
||
workflow_dispatch: | ||
|
||
jobs: | ||
build: | ||
runs-on: ubuntu-latest | ||
steps: | ||
- name: Checkout code | ||
uses: actions/checkout@v3 | ||
|
||
- name: Use Node.js 16 | ||
uses: actions/setup-node@v3 | ||
with: | ||
node-version: 16 | ||
|
||
- name: Install Modules | ||
run: npm ci | ||
|
||
- name: Build Docs | ||
run: npm run build:docs | ||
|
||
- name: Deploy to GitHub Pages | ||
uses: peaceiris/actions-gh-pages@v3 | ||
with: | ||
github_token: ${{ secrets.GITHUB_TOKEN }} | ||
publish_dir: ./docs |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
name: Package and Publish | ||
|
||
on: | ||
release: | ||
types: [published] | ||
|
||
jobs: | ||
build: | ||
runs-on: ubuntu-latest | ||
steps: | ||
- name: Checkout code | ||
uses: actions/checkout@v3 | ||
|
||
- name: Use Node.js 16 | ||
uses: actions/setup-node@v3 | ||
with: | ||
node-version: 16 | ||
|
||
- name: Use the Release Tag Version | ||
run: | | ||
npm version from-git --allow-same-version --no-git-tag-version | ||
- name: Install Modules | ||
run: npm ci | ||
|
||
- name: Build | ||
run: npm run build | ||
|
||
- name: Lint | ||
run: npm run lint | ||
|
||
- name: Test | ||
run: npm run test | ||
|
||
- name: NPM registry authentication | ||
run: npm set //registry.npmjs.org/:_authToken ${{ secrets.NPMJSORG_PUBLISH_TOKEN }} | ||
|
||
- name: Publish with CLI to Code Artifact | ||
run: | | ||
npm publish --access public --ignore-scripts --dry-run |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
.idea/ | ||
node_modules | ||
dist/ | ||
*.tsbuildinfo | ||
*.log | ||
.nyc_output/ | ||
coverage/ | ||
.DS_Store | ||
docs/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
v16.17.1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
{ | ||
"semi": true, | ||
"trailingComma": "all", | ||
"singleQuote": true, | ||
"printWidth": 100 | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
{ | ||
"editor.defaultFormatter": "esbenp.prettier-vscode", | ||
"editor.formatOnSave": true, | ||
"editor.insertSpaces": true, | ||
"editor.tabSize": 2, | ||
"files.associations": { | ||
"Dockerfile*": "dockerfile" | ||
}, | ||
"[typescript]": { | ||
"editor.defaultFormatter": "esbenp.prettier-vscode" | ||
}, | ||
"yaml.customTags": [ | ||
"!And", | ||
"!And sequence", | ||
"!If", | ||
"!If sequence", | ||
"!Not", | ||
"!Not sequence", | ||
"!Equals", | ||
"!Equals sequence", | ||
"!Or", | ||
"!Or sequence", | ||
"!FindInMap", | ||
"!FindInMap sequence", | ||
"!Base64", | ||
"!Join", | ||
"!Join sequence", | ||
"!Cidr", | ||
"!Ref", | ||
"!Sub", | ||
"!Sub sequence", | ||
"!GetAtt", | ||
"!GetAZs", | ||
"!ImportValue", | ||
"!ImportValue sequence", | ||
"!Select", | ||
"!Select sequence", | ||
"!Split", | ||
"!Split sequence" | ||
], | ||
"[xml]": { | ||
"editor.defaultFormatter": "DotJoshJohnson.xml" | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
# Contributor Code of Conduct | ||
|
||
The Shutterstock team is committed to fostering a welcoming community. | ||
|
||
This project is governed by Shutterstock’s [Code of Conduct](https://github.com/shutterstock/code-of-conduct). All contributors and participants agree to abide by its terms. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
### Expected behavior | ||
|
||
### Actual behavior | ||
|
||
### Steps to reproduce the behavior | ||
|
||
### Additional specs (e.g. browser, version, etc.) | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
The MIT License (MIT) | ||
===================== | ||
Copyright (c) `2022` `Shutterstock, Inc.` | ||
|
||
Permission is hereby granted, free of charge, to any person obtaining a copy of | ||
this software and associated documentation files (the "Software"), to deal in | ||
the Software without restriction, including without limitation the rights to | ||
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies | ||
of the Software, and to permit persons to whom the Software is furnished to do | ||
so, subject to the following conditions: | ||
|
||
The above copyright notice and this permission notice shall be included in all | ||
copies or substantial portions of the Software. | ||
|
||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | ||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
SOFTWARE. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
[![License: MIT](https://img.shields.io/badge/License-MIT-green.svg)](https://opensource.org/licenses/MIT) [![API Docs](https://img.shields.io/badge/API%20Docs-View%20Here-blue)](https://tech.shutterstock.com/chunker/) [![Build - CI](https://github.com/shutterstock/chunker/actions/workflows/ci.yml/badge.svg)](https://github.com/shutterstock/chunker/actions/workflows/ci.yml) [![Package and Publish](https://github.com/shutterstock/chunker/actions/workflows/publish.yml/badge.svg)](https://github.com/shutterstock/chunker/actions/workflows/publish.yml) [![Publish Docs](https://github.com/shutterstock/chunker/actions/workflows/docs.yml/badge.svg)](https://github.com/shutterstock/chunker/actions/workflows/docs.yml) | ||
|
||
# Overview | ||
|
||
`@shutterstock/chunker` calls a blocking async callback _before_ adding an item that would exceed a user-defined size limit OR when the count of items limit is reached. | ||
|
||
A common use case for `@shutterstock/chunker` is as a "batch accumulator" that gathers up items to be processed in a batch where the batch has specific count and size constraints that must be followed. For example, sending batches to an AWS Kinesis Data Stream requires that there be 500 or less records totalling 500 MB or less in size. The record count part is easy, but the record size check and handling both is more difficult. | ||
|
||
# Getting Started | ||
|
||
## Installation | ||
|
||
The package is available on npm as [@shutterstock/chunker](https://www.npmjs.com/package/@shutterstock/chunker) | ||
|
||
`npm i @shutterstock/chunker` | ||
|
||
## Importing | ||
|
||
```typescript | ||
import { Chunker } from '@shutterstock/chunker'; | ||
``` | ||
|
||
## API Documentation | ||
|
||
After installing the package, you might want to look at our [API Documentation](https://tech.shutterstock.com/chunker/) to learn about all the features available. | ||
|
||
# `Chunker` | ||
|
||
`Chunker` has a `BlockingQueue` that it uses to store items until the size or count limits are reached. When the limits are reached, the `Chunker` calls the user-provided callback with the items in the queue. The callback is expected to return a `Promise` that resolves when the items have been processed. The `Chunker` will wait for the `Promise` to resolve before continuing. | ||
|
||
See below for an example of using `Chunker` to write batches of records to an AWS Kinesis Data Stream. | ||
|
||
# Contributing | ||
|
||
## Setting up Build Environment | ||
|
||
- `nvm use` | ||
- `npm i` | ||
- `npm run build` | ||
- `npm run lint` | ||
- `npm run test` | ||
|
||
## Running Examples | ||
|
||
### aws-kinesis-writer | ||
|
||
1. Create Kinesis Data Stream using AWS Console or any other method | ||
1. Default name is `chunker-test-stream` | ||
2. 1 shard is sufficient | ||
3. 1 day retention is sufficient | ||
4. No encryption is sufficient | ||
5. On-demand throughput is sufficient | ||
2. `npm run example:aws-kinesis-writer` | ||
1. If the stream name was changed: `KINESIS_STREAM_NAME=my-stream-name npm run example:aws-kinesis-writer` | ||
3. Observe in the log output that the `enqueue` method intermittently blocks when the count or size constraints would be breached. During the block the records are written to the Kinesis Data Stream, after which the block is released and the new item is added to the next batch. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,92 @@ | ||
/* eslint-disable no-console */ | ||
import * as kinesis from '@aws-sdk/client-kinesis'; | ||
import { Chunker } from '@shutterstock/chunker'; | ||
|
||
const kinesisClient = new kinesis.KinesisClient({}); | ||
const { KINESIS_STREAM_NAME = 'chunker-test-stream', RECORDS_TO_WRITE = '1500' } = process.env; | ||
const RECORDS_TO_WRITE_NUM = parseInt(RECORDS_TO_WRITE, 10); | ||
|
||
async function main() { | ||
// AWS Kinesis payloads must have 500 or less items | ||
// and must be 5 MB or less in total size. | ||
// Chunker will call the callback when the metrics are | ||
// reached so we can flush the batch to Kinesis without | ||
// ever exceeding the limits. | ||
const chunker = new Chunker({ | ||
countLimit: 500, | ||
// Only go up to 95% of the limit of the Kinesis payload size | ||
sizeLimit: 5 * 1024 * 1024 * 0.95, | ||
/** | ||
* Compute the item size | ||
* @param record The record to compute the size of | ||
* @returns | ||
*/ | ||
sizer: (record: { item: kinesis.PutRecordsRequestEntry; index: number }): number => { | ||
const { item, index } = record; | ||
const itemJSON = JSON.stringify(item); | ||
// Return the real size of the record if below 500 items | ||
if (index < 500) { | ||
return itemJSON.length; | ||
} | ||
|
||
// Lie about the record size above 500 items | ||
// to force flushes based on size | ||
// Records will be between 500 KB and 1024 KB if count is 1000 | ||
return index * 1024; | ||
}, | ||
/** | ||
* Write the items to Kinesis when called | ||
* @param records The records to write | ||
* @returns The result of the Kinesis PutRecordsCommand | ||
*/ | ||
writer: async ( | ||
records: { index: number; item: kinesis.PutRecordsRequestEntry }[], | ||
): Promise<void> => { | ||
console.log( | ||
`Writing to Kinesis - Start - Records: ${records.length}, First Index: ${ | ||
records[0].index | ||
}, Last Index: ${records[records.length - 1].index}`, | ||
); | ||
// Send the records in a batch since we know we will be under the batch limits | ||
await kinesisClient.send( | ||
new kinesis.PutRecordsCommand({ | ||
StreamName: KINESIS_STREAM_NAME, | ||
Records: records.map((record) => record.item), | ||
}), | ||
); | ||
console.log( | ||
`Writing to Kinesis - Done - Records: ${records.length}, First Index: ${ | ||
records[0].index | ||
}, Last Index: ${records[records.length - 1].index}`, | ||
); | ||
}, | ||
}); | ||
|
||
try { | ||
for (let i = 0; i < RECORDS_TO_WRITE_NUM; i++) { | ||
const niceNumberStr = `${i.toString().padStart(5, '0')}`; | ||
const partitionKey = `${(i % 100).toString().padStart(5, '0')}`; | ||
const record: kinesis.PutRecordsRequestEntry = { | ||
Data: Buffer.from(niceNumberStr, 'utf-8'), | ||
PartitionKey: partitionKey, | ||
}; | ||
|
||
console.log( | ||
`Writing to Chunker - Start - Record: ${i}, Data: ${niceNumberStr}, PartitionKey: ${partitionKey}`, | ||
); | ||
const start = Date.now(); | ||
await chunker.enqueue({ item: record, index: i }); | ||
console.log( | ||
`Writing to Chunker - Done - Record: ${i}, Data: ${niceNumberStr}, PartitionKey: ${partitionKey}, Duration: ${ | ||
Date.now() - start | ||
} ms`, | ||
); | ||
} | ||
} finally { | ||
console.log('Waiting for Chunker to finish - Start'); | ||
await chunker.onIdle(); | ||
console.log('Waiting for Chunker to finish - Done'); | ||
} | ||
} | ||
|
||
void main(); |
Oops, something went wrong.