Skip to content

Commit

Permalink
feat(text/unstable): add reverse function
Browse files Browse the repository at this point in the history
Co-authored-by: Mathias Bynens <[email protected]>
  • Loading branch information
scarf005 and mathiasbynens committed Feb 17, 2025
1 parent 2829d6d commit ed38858
Show file tree
Hide file tree
Showing 6 changed files with 255 additions and 0 deletions.
5 changes: 5 additions & 0 deletions text/_test_util.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
export function generateRandomString(min: number, max: number): string {
return Array.from({ length: Math.floor(Math.random() * (max - min) + min) })
.map(() => String.fromCharCode(Math.floor(Math.random() * 26) + 97))
.join("");
}
10 changes: 10 additions & 0 deletions text/_test_util_test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
import { assertEquals } from "../assert/equals.ts";
import { generateRandomString } from "./_test_util.ts";

Deno.test({
name: "generateRandomString() generates a string of the correct length",
fn() {
assertEquals(generateRandomString(0, 0), "");
assertEquals(generateRandomString(10, 10).length, 10);
},
});
1 change: 1 addition & 0 deletions text/deno.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
"./closest-string": "./closest_string.ts",
"./compare-similarity": "./compare_similarity.ts",
"./levenshtein-distance": "./levenshtein_distance.ts",
"./unstable-reverse": "./unstable_reverse.ts",
"./unstable-slugify": "./unstable_slugify.ts",
"./to-camel-case": "./to_camel_case.ts",
"./unstable-to-constant-case": "./unstable_to_constant_case.ts",
Expand Down
90 changes: 90 additions & 0 deletions text/unstable_reverse.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
// Copyright 2018-2025 the Deno authors. MIT license.
// This module is browser compatible.

// Copyright Mathias Bynens <https://mathiasbynens.be/>
//
// Permission is hereby granted, free of charge, to any person obtaining
// a copy of this software and associated documentation files (the
// "Software"), to deal in the Software without restriction, including
// without limitation the rights to use, copy, modify, merge, publish,
// distribute, sublicense, and/or sell copies of the Software, and to
// permit persons to whom the Software is furnished to do so, subject to
// the following conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

const REGEX_SYMBOL_WITH_COMBINING_MARKS =
/([\0-\u02FF\u0370-\u1AAF\u1B00-\u1DBF\u1E00-\u20CF\u2100-\uD7FF\uE000-\uFE1F\uFE30-\uFFFF]|[\uD800-\uDBFF][\uDC00-\uDFFF]|[\uD800-\uDBFF](?![\uDC00-\uDFFF])|(?:[^\uD800-\uDBFF]|^)[\uDC00-\uDFFF])([\u0300-\u036F\u1AB0-\u1AFF\u1DC0-\u1DFF\u20D0-\u20FF\uFE20-\uFE2F]+)/g;
const REGEX_SURROGATE_PAIR = /([\uD800-\uDBFF])([\uDC00-\uDFFF])/g;

/** Options for {@linkcode reverse} */
export type ReverseOptions = {
/**
* Whether to handle Unicode symbols such as 🦕 at the cost of ~60% slowdown.
*
* Check {@link ./unstable_reverse_bench.ts} for performance comparison.
*
* @default {true}
*/
handleUnicode: boolean;
};

/**
* Performs a Unicode-aware string reversal.
*
* @experimental **UNSTABLE**: New API, yet to be vetted.
*
* @param input - The input string to be reversed.
* @param options The options for the reverse function.
* @returns The reversed string.
*
* @example Standard usage
* ```ts
* import { reverse } from "@std/text/unstable-reverse";
* import { assertEquals } from "@std/assert";
*
* assertEquals(reverse("Hello, world!"), "!dlrow ,olleH");
* assertEquals(reverse("🦕Deno♥"), "♥oneD🦕");
* ```
*
* @example Performance optimization with disabled Unicode handling
* ```ts
* import { reverse } from "@std/text/unstable-reverse";
* import { assertEquals } from "@std/assert";
*
* assertEquals(reverse("Hello, world!", { handleUnicode: false }), "!dlrow ,olleH");
* ```
*/
export function reverse(
input: string,
options?: Partial<ReverseOptions>,
): string {
if (options?.handleUnicode !== false) {
// Step 1: deal with combining marks and astral symbols (surrogate pairs)
input = input
// Swap symbols with their combining marks so the combining marks go first
.replace(REGEX_SYMBOL_WITH_COMBINING_MARKS, (_, $1, $2) => {
// Reverse the combining marks so they will end up in the same order
// later on (after another round of reversing)
return reverse($2) + $1;
})
// Swap high and low surrogates so the low surrogates go first
.replace(REGEX_SURROGATE_PAIR, "$2$1");
}

// Step 2: reverse the code units in the string
let result = "";
for (let index = input.length; index--;) {
result += input.charAt(index);
}
return result;
}
104 changes: 104 additions & 0 deletions text/unstable_reverse_bench.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
// Copyright 2018-2025 the Deno authors. MIT license.
import { generateRandomString } from "./_test_util.ts";
import { reverse } from "./unstable_reverse.ts";

function splitReverseJoin(str: string) {
return str.split("").reverse().join("");
}

function forOf(str: string) {
let reversed = "";
for (const character of str) {
reversed = character + reversed;
}
return reversed;
}

function reduce(str: string) {
return str.split("").reduce(
(reversed, character) => character + reversed,
"",
);
}

function spreadReverseJoin(str: string) {
return [...str].reverse().join("");
}

function forLoop(str: string) {
let x = "";

for (let i = str.length - 1; i >= 0; --i) {
x += str[i];
}

return x;
}

const strings = Array.from({ length: 10000 }).map(() =>
generateRandomString(0, 100)
);

Deno.bench({
group: "reverseString",
name: "splitReverseJoin",
fn: () => {
for (let i = 0; i < strings.length; i++) {
splitReverseJoin(strings[i]!);
}
},
});
Deno.bench({
group: "reverseString",
name: "forOf",
fn: () => {
for (let i = 0; i < strings.length; i++) {
forOf(strings[i]!);
}
},
});
Deno.bench({
group: "reverseString",
name: "reduce",
fn: () => {
for (let i = 0; i < strings.length; i++) {
reduce(strings[i]!);
}
},
});
Deno.bench({
group: "reverseString",
name: "spreadReverseJoin",
fn: () => {
for (let i = 0; i < strings.length; i++) {
spreadReverseJoin(strings[i]!);
}
},
});
Deno.bench({
group: "reverseString",
name: "forLoop",
fn: () => {
for (let i = 0; i < strings.length; i++) {
forLoop(strings[i]!);
}
},
});
Deno.bench({
group: "reverseString",
name: "esrever",
fn: () => {
for (let i = 0; i < strings.length; i++) {
reverse(strings[i]!);
}
},
});
Deno.bench({
group: "reverseString",
name: "esrever (no unicode)",
fn: () => {
for (let i = 0; i < strings.length; i++) {
reverse(strings[i]!, { handleUnicode: false });
}
},
});
45 changes: 45 additions & 0 deletions text/unstable_reverse_test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
// Copyright 2018-2025 the Deno authors. MIT license.
import { assertEquals } from "@std/assert/equals";
import { reverse } from "./unstable_reverse.ts";

function testBothAsciiAndUnicode(expected: string, input: string) {
testAscii(expected, input);
testUnicode(expected, input);
}
function testUnicode(expected: string, input: string) {
assertEquals(expected, reverse(input));
// check idempotency
assertEquals(input, reverse(reverse(input)));
// check empty object handling
assertEquals(expected, reverse(input, {}));
}
function testAscii(expected: string, input: string) {
assertEquals(expected, reverse(input, { handleUnicode: false }));
// check idempotency
assertEquals(
input,
reverse(reverse(input, { handleUnicode: false }), { handleUnicode: false }),
);
// check empty object handling
assertEquals(expected, reverse(input, {}));
}

Deno.test("reverse() handles empty string", () => {
testBothAsciiAndUnicode("", "");
});

Deno.test("reverse() reverses a string", () => {
testBothAsciiAndUnicode("olleh", "hello");
testBothAsciiAndUnicode("dlrow olleh", "hello world");
});

// CREDIT: https://github.com/mathiasbynens/esrever/blob/14b34013dad49106ca08c0e65919f1fc8fea5331/README.md
Deno.test("reverse() handles unicode strings", () => {
testUnicode("Lorem ipsum 𝌆 dolor sit ameͨ͆t.", ".teͨ͆ma tis rolod 𝌆 muspi meroL");
testUnicode("mañana mañana", "anañam anañam");

testUnicode("H̹̙̦̮͉̩̗̗ͧ̇̏̊̾Eͨ͆͒̆ͮ̃͏̷̮̣̫̤̣ ̵̞̹̻̀̉̓ͬ͑͡ͅCͯ̂͐͏̨̛͔̦̟͈̻O̜͎͍͙͚̬̝̣̽ͮ͐͗̀ͤ̍̀͢M̴̡̲̭͍͇̼̟̯̦̉̒͠Ḛ̛̙̞̪̗ͥͤͩ̾͑̔͐ͅṮ̴̷̷̗̼͍̿̿̓̽͐H̙̙̔̄͜", "H̙̙̔̄͜Ṯ̴̷̷̗̼͍̿̿̓̽͐Ḛ̛̙̞̪̗ͥͤͩ̾͑̔͐ͅM̴̡̲̭͍͇̼̟̯̦̉̒͠O̜͎͍͙͚̬̝̣̽ͮ͐͗̀ͤ̍̀͢Cͯ̂͐͏̨̛͔̦̟͈̻ ̵̞̹̻̀̉̓ͬ͑͡ͅEͨ͆͒̆ͮ̃͏̷̮̣̫̤̣H̹̙̦̮͉̩̗̗ͧ̇̏̊̾");

testUnicode("🦕Deno♥", "♥oneD🦕");
testUnicode("안녕하세요", "요세하녕안");
});

0 comments on commit ed38858

Please sign in to comment.