Skip to content

Commit

Permalink
perf: Optimize apng detection algorithm
Browse files Browse the repository at this point in the history
- Reduce memory allocations by avoiding decoding buffer
- Increase algorithm performance by:
 - replacing regex matching with index search
 - going through buffer one time instead of two
  • Loading branch information
madtisa committed Aug 9, 2024
1 parent 3200f4c commit 3e50b23
Show file tree
Hide file tree
Showing 3 changed files with 86 additions and 122 deletions.
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "is-apng",
"version": "1.1.0",
"version": "1.1.1",
"description": "Check if a Buffer/Uint8Array is a APNG (Animated PNG) image",
"license": "MIT",
"keywords": [
Expand Down
4 changes: 2 additions & 2 deletions readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ isApng(new Uint8Array(buffer))
#### As old-school global script tag

Url for latest version: `https://unpkg.com/is-apng`<br>
Url for specific version: `https://unpkg.com/[email protected].0/dist/index.js`
Url for specific version: `https://unpkg.com/[email protected].1/dist/index.js`

```html
<script src="https://unpkg.com/is-apng" type="text/javascript"></script>
Expand All @@ -53,7 +53,7 @@ Url for specific version: `https://unpkg.com/[email protected]/dist/index.js`
#### As module

Url for latest version: `https://unpkg.com/is-apng/dist/index.mjs`<br>
Url for specific version: `https://unpkg.com/[email protected].0/dist/index.mjs`
Url for specific version: `https://unpkg.com/[email protected].1/dist/index.mjs`

```html
<script type="module">
Expand Down
202 changes: 83 additions & 119 deletions src/index.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,82 @@
// import { Buffer } from 'node:buffer'

/**
* Returns the index of the first occurrence of a sequence in an typed array, or -1 if it is not present.
*
* Works similar to `Array.prototype.indexOf()`, but it searches for a sequence of array values (bytes).
* The bytes in the `haystack` array are decoded (UTF-8) and then used to search for `needle`.
*
* @param buffer
* Array to search in.
*
* @param searchSequence
* The value to locate in the array.
*
* @param fromIndex
* The array index at which to begin the search.
*
* @param stopSequence
* Byte sequence to stop search
*
* @returns boolean
* Whether the array holds Animated PNG data.
*/
function hasSequence(
buffer: Buffer | Uint8Array,
searchSequence: Uint8Array,
fromIndex: number,
stopSequence: Uint8Array,
): boolean {
function validateSequence(sequence: Uint8Array): void {
if (!sequence.length) {
throw new Error('Sequence is empty')
}

// Search only unique symbols to simplify the algorithm
if (new Set(sequence).size !== sequence.length) {
throw new Error('Sequence must consist of unique symbols')
}
}

validateSequence(searchSequence)
validateSequence(stopSequence)

if (fromIndex >= buffer.length) {
return false
}
buffer = buffer.subarray(fromIndex)

let matchSearchIndex = 0
let matchStopIndex = 0
for (let i = 0; i < buffer.length; i++) {
if (buffer[i] === searchSequence[matchSearchIndex]) {
matchSearchIndex++
if (matchSearchIndex === searchSequence.length) {
return true
}
} else {
matchSearchIndex = 0
}

if (buffer[i] === stopSequence[matchStopIndex]) {
matchStopIndex++
if (matchStopIndex === stopSequence.length) {
return false
}
} else {
matchStopIndex = 0
}
}

return false
}

const encoder = new TextEncoder()
const sequences = {
animationControlChunk: encoder.encode('acTL'),
imageDataChunk: encoder.encode('IDAT'),
}

export default function isApng(buffer: Buffer | Uint8Array): boolean {
if (
!buffer ||
Expand All @@ -26,130 +103,17 @@ export default function isApng(buffer: Buffer | Uint8Array): boolean {
return false
}

/**
* Returns the index of the first occurrence of a sequence in an typed array, or -1 if it is not present.
*
* Works similar to `Array.prototype.indexOf()`, but it searches for a sequence of array values (bytes).
* The bytes in the `haystack` array are decoded (UTF-8) and then used to search for `needle`.
*
* @param haystack `Uint8Array`
* Array to search in.
*
* @param needle `string | RegExp`
* The value to locate in the array.
*
* @param fromIndex `number`
* The array index at which to begin the search.
*
* @param upToIndex `number`
* The array index up to which to search.
* If omitted, search until the end.
*
* @param chunksize `number`
* Size of the chunks used when searching (default 1024).
*
* @returns boolean
* Whether the array holds Animated PNG data.
*/
function indexOfSubstring(
haystack: Uint8Array,
needle: string | RegExp,
fromIndex: number,
upToIndex?: number,
chunksize = 1024 /* Bytes */,
) {
/**
* Adopted from: https://stackoverflow.com/a/67771214/2142071
*/

if (!needle) {
return -1
}
needle = new RegExp(needle, 'g')

// The needle could get split over two chunks.
// So, at every chunk we prepend the last few characters
// of the last chunk.
const needle_length = needle.source.length
const decoder = new TextDecoder()

// Handle search offset in line with
// `Array.prototype.indexOf()` and `TypedArray.prototype.subarray()`.
const full_haystack_length = haystack.length
if (typeof upToIndex === 'undefined') {
upToIndex = full_haystack_length
}
if (
fromIndex >= full_haystack_length ||
upToIndex <= 0 ||
fromIndex >= upToIndex
) {
return -1
}
haystack = haystack.subarray(fromIndex, upToIndex)

let position = -1
let current_index = 0
let full_length = 0
let needle_buffer = ''

outer: while (current_index < haystack.length) {
const next_index = current_index + chunksize
// subarray doesn't copy
const chunk = haystack.subarray(current_index, next_index)
const decoded = decoder.decode(chunk, { stream: true })

const text = needle_buffer + decoded

let match: RegExpExecArray | null
let last_index = -1
while ((match = needle.exec(text)) !== null) {
last_index = match.index - needle_buffer.length
position = full_length + last_index
break outer
}

current_index = next_index
full_length += decoded.length

// Check that the buffer doesn't itself include the needle
// this would cause duplicate finds (we could also use a Set to avoid that).
const needle_index =
last_index > -1
? last_index + needle_length
: decoded.length - needle_length
needle_buffer = decoded.slice(needle_index)
}

// Correct for search offset.
if (position >= 0) {
position += fromIndex >= 0 ? fromIndex : full_haystack_length + fromIndex
}

return position
}

// APNGs have an animation control chunk ('acTL') preceding the IDATs.
// See: https://en.wikipedia.org/wiki/APNG#File_format
const arr = new Uint8Array(buffer)
const idatIdx = indexOfSubstring(arr, 'IDAT', 12)
if (idatIdx >= 12) {
const actlIdx = indexOfSubstring(arr, 'acTL', 8, idatIdx)
return actlIdx >= 8
}

return false
return hasSequence(
buffer,
sequences.animationControlChunk,
8,
sequences.imageDataChunk,
)
}

// globalThis.isApng = isApng

// (new TextEncoder()).encode('IDAT')
// Decimal: [73, 68, 65, 84]
// Hex: [0x49, 0x44, 0x41, 0x54]

// (new TextEncoder()).encode('acTL')
// Decimal: [97, 99, 84, 76]
// Hex: [0x61, 0x63, 0x54, 0x4C]

// const idatIdx = buffer.indexOf('IDAT')
// const actlIdx = buffer.indexOf('acTL')

0 comments on commit 3e50b23

Please sign in to comment.