Skip to content

Commit

Permalink
feat: [sc-25504] Proposed "maturity" next steps for NameGuard JS (#347)
Browse files Browse the repository at this point in the history
* rename endpoint to nameguardEndpoint

* document emoji functions

* move normalized graphemes to data

* format docstrings

* document countGraphemes

* rename impersonation status to impersonation estimate

* remove magic number

* use isCharacter

* remove charCount re-export

* warn about unset env vars

* fix nameguard tests

* nameguard js lazy init

* fix pnpm build

* organize nameguard sdk tests

* Add changesets

* test isCombiningChar

* explain impersonation tests

* interface for impersonation tests

* fix renamed impersonation status

---------

Co-authored-by: kwrobel.eth <[email protected]>
Co-authored-by: lightwalker.eth <[email protected]>
  • Loading branch information
3 people authored Oct 30, 2024
1 parent f6cbe5d commit 47de3ad
Show file tree
Hide file tree
Showing 29 changed files with 497 additions and 326 deletions.
7 changes: 7 additions & 0 deletions .changeset/fluffy-laws-grow.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
---
"@namehash/nameguard": minor
---

- Rename `ImpersonationStatus` to `ImpersonationEstimate` to better manage expectations.
- Rename `endpoint` param to `nameguardEndpoint` when creating a NameGuard Client for more self-documenting code.
- Refined unit tests.
9 changes: 9 additions & 0 deletions .changeset/late-phones-rush.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
---
"@namehash/nameguard-js": minor
---

- Optimize startup time by lazily initializing in-memory data models.
- Refine documentation.
- Refine unit tests.
- Warn about likely timeout errors if Etherum provider environment variable is not set when - running unit tests.
- Upgrade to the latest NameGuard SDK.
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@ type ImpersonationReportProps = {
export function ImpersonationReport({ data }: ImpersonationReportProps) {
if (
!data ||
data?.impersonation_status === null ||
data?.impersonation_status === "unlikely"
data?.impersonation_estimate === null ||
data?.impersonation_estimate === "unlikely"
)
return null;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,16 +28,16 @@ export async function SecurePrimaryName({ address }: Props) {
const data = await nameguard.getSecurePrimaryName(address);

const pillColor =
data.impersonation_status === "potential" ?
data.impersonation_estimate === "potential" ?
"bg-red-300" :
data.impersonation_status === "unlikely" ?
data.impersonation_estimate === "unlikely" ?
"bg-green-300" :
"bg-yellow-300";

const pillText =
data.impersonation_status === "potential" ?
data.impersonation_estimate === "potential" ?
"Potential impersonation!" :
data.impersonation_status === "unlikely" ?
data.impersonation_estimate === "unlikely" ?
"Name is secure" :
"No primary name!";

Expand Down
7 changes: 6 additions & 1 deletion packages/nameguard-js/src/confusables.test.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,12 @@
import { describe, it, expect } from "vitest";
import { describe, it, expect, beforeAll } from "vitest";
import { isGraphemeConfusable, getCanonical } from "./confusables";
import { initializeData } from "./data";

describe("confusables", () => {
beforeAll(() => {
initializeData();
});

it("should check if ASCII is confusable", () => {
expect(isGraphemeConfusable("a")).toBe(false);
expect(isGraphemeConfusable("A")).toBe(true);
Expand Down
19 changes: 11 additions & 8 deletions packages/nameguard-js/src/data/canonicals.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,3 @@
// for compression the json type is:
// { [key: string]: [string, number] }
import GRAPHEME_CANONICALS_ from "./canonicals.json";

/**
* Stores information about a potentially confusable grapheme and its canonical form.
*/
Expand All @@ -22,7 +18,14 @@ export interface GraphemeCanonical {
* Map containing graphemes and their canonical forms.
* This data is taken from the NameHash ens-label-inspector Python package.
*/
export const GRAPHEME_CANONICALS: Map<string, GraphemeCanonical> =
new Map(
Object.entries(GRAPHEME_CANONICALS_ as { [key: string]: [string, number] })
.map(([k, v]) => [k, { canonicalGrapheme: v[0], numConfusables: v[1] }]));
export let GRAPHEME_CANONICALS: Map<string, GraphemeCanonical> | null = null;

export function initializeCanonicals() {
// The json stores the data as a map of grapheme -> [canonicalGrapheme, numConfusables]
const GRAPHEME_CANONICALS_: { [key: string]: [string, number] } = require("./canonicals.json");
GRAPHEME_CANONICALS =
new Map(
Object.entries(GRAPHEME_CANONICALS_)
.map(([k, v]) => [k, { canonicalGrapheme: v[0], numConfusables: v[1] }])
);
}
10 changes: 6 additions & 4 deletions packages/nameguard-js/src/data/combining.ts
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
// string[]
import COMBINING_ from "./combining.json";

/**
* Array of characters classified as "Combining" according to the Unicode Standard version 15.1.0.
* Data is taken from https://unicode.org/.
*/
export const COMBINING: Set<string> = new Set(COMBINING_ as string[]);
export let COMBINING: Set<string> | null = null;

export function initializeCombining() {
const COMBINING_: string[] = require("./combining.json");
COMBINING = new Set(COMBINING_);
}
9 changes: 6 additions & 3 deletions packages/nameguard-js/src/data/hangul.ts
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
import HANGUL_JAMO_ from './hangul_jamo.json';

/**
* Contains all Hangul Jamo characters.
* Data is taken from https://unicode.org/ using Unicode version 15.1.0.
* This set is used in grapheme splitting to handle arbitrary Jamo sequences.
*/
export const HANGUL_JAMO: Set<string> = new Set(HANGUL_JAMO_);
export let HANGUL_JAMO: Set<string> | null = null;

export function initializeHangulJamo() {
const HANGUL_JAMO_: string[] = require("./hangul_jamo.json");
HANGUL_JAMO = new Set(HANGUL_JAMO_);
}
24 changes: 24 additions & 0 deletions packages/nameguard-js/src/data/index.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
import { initializeCanonicals } from "./canonicals";
import { initializeCombining } from "./combining";
import { initializeHangulJamo } from "./hangul";
import { initializeInvisibleJoiners } from "./invisible_joiners";
import { initializeUnicode } from "./unicode";

let INITIALIZED = false;

/**
* Initializes all data structures.
* This function should be called before any other functions in this module.
* It is a no-op if it has already been called.
*/
export function initializeData() {
if (INITIALIZED) {
return;
}
initializeCanonicals();
initializeCombining();
initializeHangulJamo();
initializeInvisibleJoiners();
initializeUnicode();
INITIALIZED = true;
}
9 changes: 6 additions & 3 deletions packages/nameguard-js/src/data/invisible_joiners.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
import INVISIBLE_JOINERS_ from './invisible_joiners.json';

/**
* Contains invisible characters which are joined with preceding graphemes.
* Data is taken from the NameHash ens-label-inspector Python package.
*/
export const INVISIBLE_JOINERS: Set<string> = new Set(INVISIBLE_JOINERS_);
export let INVISIBLE_JOINERS: Set<string> | null = null;

export function initializeInvisibleJoiners() {
const INVISIBLE_JOINERS_: string[] = require("./invisible_joiners.json");
INVISIBLE_JOINERS = new Set(INVISIBLE_JOINERS_);
}
25 changes: 16 additions & 9 deletions packages/nameguard-js/src/data/unicode.ts
Original file line number Diff line number Diff line change
@@ -1,23 +1,18 @@
import EMOJI_SEQUENCES_ from './emoji_sequences.json';
import EMOJI_ZWJ_SEQUENCES_ from './emoji_zwj_sequences.json';
import EMOJI_BLOCK_STARTS_ from './emojis_starts.json';
import EMOJI_BLOCK_IS_EMOJI_ from './emojis_is_emoji.json';

/**
* This is a set of strings where each string represents a Unicode emoji.
* It provides a quick lookup to determine if a given string is an emoji.
* This does not include emojis with Zero Width Joiner (ZWJ) characters.
* The data is taken directly from Unicode version 15.1.0 at https://unicode.org/.
*/
export const EMOJI_SEQUENCES: Set<string> = new Set(EMOJI_SEQUENCES_);
export let EMOJI_SEQUENCES: Set<string> | null = null;

/**
* This is a set of strings where each string represents a Unicode emoji formed with Zero Width Joiner (ZWJ) characters.
* It provides a quick lookup to determine if a given string is an emoji with a ZWJ character.
* This does not include emojis without ZWJ characters.
* The data is taken directly from Unicode version 15.1.0 at https://unicode.org/.
*/
export const EMOJI_ZWJ_SEQUENCES: Set<string> = new Set(EMOJI_ZWJ_SEQUENCES_);
export let EMOJI_ZWJ_SEQUENCES: Set<string> | null = null;

/**
* The following two fields are used to determine if a given code point is an emoji using binary search.
Expand All @@ -28,5 +23,17 @@ export const EMOJI_ZWJ_SEQUENCES: Set<string> = new Set(EMOJI_ZWJ_SEQUENCES_);
* All characters in a block are either all emojis or all non-emojis.
* The blocks are generated from Unicode version 15.1.0 at https://unicode.org/.
*/
export const EMOJI_BLOCK_STARTS: number[] = EMOJI_BLOCK_STARTS_;
export const EMOJI_BLOCK_IS_EMOJI: boolean[] = EMOJI_BLOCK_IS_EMOJI_;
export let EMOJI_BLOCK_STARTS: number[] | null = null;
export let EMOJI_BLOCK_IS_EMOJI: boolean[] | null = null;

export function initializeUnicode() {
const EMOJI_SEQUENCES_: string[] = require("./emoji_sequences.json");
const EMOJI_ZWJ_SEQUENCES_: string[] = require("./emoji_zwj_sequences.json");
const EMOJI_BLOCK_STARTS_: number[] = require("./emojis_starts.json");
const EMOJI_BLOCK_IS_EMOJI_: boolean[] = require("./emojis_is_emoji.json");

EMOJI_SEQUENCES = new Set(EMOJI_SEQUENCES_);
EMOJI_ZWJ_SEQUENCES = new Set(EMOJI_ZWJ_SEQUENCES_);
EMOJI_BLOCK_STARTS = EMOJI_BLOCK_STARTS_;
EMOJI_BLOCK_IS_EMOJI = EMOJI_BLOCK_IS_EMOJI_;
}
13 changes: 11 additions & 2 deletions packages/nameguard-js/src/graphemes.test.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import { describe, it, expect } from "vitest";
import { describe, it, expect, beforeAll } from "vitest";
import { splitGraphemes, countGraphemes } from "./graphemes";
import jsonNamehashExamples from "../utils/normalized_graphemes.json";
import jsonNamehashExamples from "./data/normalized_graphemes.json";
import { initializeData } from "./data";

const grapehemeTestInputs = [
"",
Expand Down Expand Up @@ -34,6 +35,10 @@ const graphemeTestOutputs = [
];

describe("countGraphemes", () => {
beforeAll(() => {
initializeData();
});

it("should count graphemes in a string", () => {
for (const example_idx in grapehemeTestInputs) {
expect(countGraphemes(grapehemeTestInputs[example_idx])).toBe(
Expand All @@ -44,6 +49,10 @@ describe("countGraphemes", () => {
});

describe("splitGraphemes", () => {
beforeAll(() => {
initializeData();
});

it("should split strings into graphemes", () => {
for (const example_idx in grapehemeTestInputs) {
expect(splitGraphemes(grapehemeTestInputs[example_idx])).toStrictEqual(
Expand Down
28 changes: 21 additions & 7 deletions packages/nameguard-js/src/graphemes.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,17 @@ import { INVISIBLE_JOINERS } from "./data/invisible_joiners";
/**
* Splits the input string into what users perceive as "characters", called graphemes.
*
* This function extends the official Unicode grapheme splitting algorithm with additional features.
* It matches the algorithm used by NameGuard which introduces user-friendly features like Hangul and invisible character splitting.
* This function extends the official Unicode grapheme splitting algorithm
* with additional features. It matches the algorithm used by NameGuard
* which introduces user-friendly features like Hangul and invisible character splitting.
*
* Splitting is performed using the [text-segmentation](https://github.com/niklasvh/text-segmentation) library with added special Hangul treatment.
* This makes it possible to handle strings with arbitrary Hangul Jamo sequences that most operating systems render as distinct graphemes.
* Without this fix, some Hangul Jamo would be merged into one grapheme which would seem confusing to the user who sees them as separate.
* See splitGraphemes.test.ts for examples.
* This function also handles invisible characters within graphemes, ensuring they are split into separate graphemes for better clarity.
* Splitting is performed using the [text-segmentation](https://github.com/niklasvh/text-segmentation)
* library with added special Hangul treatment. This makes it possible to handle strings
* with arbitrary Hangul Jamo sequences that most operating systems render as distinct graphemes.
* Without this fix, some Hangul Jamo would be merged into one grapheme which would
* seem confusing to the user who sees them as separate. See splitGraphemes.test.ts for examples.
* This function also handles invisible characters within graphemes,
* ensuring they are split into separate graphemes for better clarity.
*
* This implementation is safe to use in all modern web browsers,
* unlike the related browser API for splitting graphemes according to the Unicode standard,
Expand Down Expand Up @@ -77,6 +80,17 @@ export function splitGraphemes(name: string): string[] {
return graphemes;
}

/**
* Counts the number of graphemes in a given string.
*
* This function uses the `splitGraphemes` function to split the input string
* into its constituent graphemes and then returns the count of these graphemes.
* The count will include all characters, including invisible characters
* and label separators.
*
* @param name - The input string to count graphemes from.
* @returns The number of graphemes in the input string.
*/
export function countGraphemes(name: string): number {
return splitGraphemes(name).length;
}
50 changes: 28 additions & 22 deletions packages/nameguard-js/src/impersonation.test.ts
Original file line number Diff line number Diff line change
@@ -1,25 +1,31 @@
import { describe, it, expect } from "vitest";
import { computeImpersonationStatus } from "./impersonation";
import { describe, it, expect, beforeAll } from "vitest";
import { computeImpersonationEstimate } from "./impersonation";
import { initializeData } from "./data";

describe("Impersonation", () => {
it("should return impersonation status", () => {
expect(computeImpersonationStatus("nick.eth")).toBe("unlikely");
expect(computeImpersonationStatus("nićk.eth")).toBe("potential");
expect(computeImpersonationStatus("vitalik.eth")).toBe("unlikely");
expect(computeImpersonationStatus("vitalìk.eth")).toBe("potential");
expect(computeImpersonationStatus("٧٣٧.eth")).toBe("unlikely");
expect(computeImpersonationStatus("poet.base.eth")).toBe("unlikely");
expect(computeImpersonationStatus("exampleprimary.cb.id")).toBe("unlikely");
expect(computeImpersonationStatus("888‍‍.eth")).toBe("potential");
expect(computeImpersonationStatus("‍‍❤‍‍.eth")).toBe("potential");
expect(computeImpersonationStatus("٠٠۱.eth")).toBe("potential");
expect(computeImpersonationStatus("۸۸۷۵۴۲.eth")).toBe("potential");
expect(computeImpersonationStatus("୨୨୨୨୨.eth")).toBe("potential");
expect(computeImpersonationStatus("┣▇▇▇═─.eth")).toBe("potential");
expect(computeImpersonationStatus("сбер.eth")).toBe("potential");
expect(computeImpersonationStatus("vitȧlik.eth")).toBe("potential");
expect(computeImpersonationStatus("vıtalik.eth")).toBe("potential");
expect(computeImpersonationStatus("vincξnt.eth")).toBe("unlikely");
expect(computeImpersonationStatus("hello<world>!.eth")).toBe("potential");
describe("computeImpersonationEstimate", () => {
beforeAll(() => {
initializeData();
});

it("should return impersonation estimate", () => {
// examples taken from Python Nameguard API tests
expect(computeImpersonationEstimate("nick.eth")).toBe("unlikely");
expect(computeImpersonationEstimate("nićk.eth")).toBe("potential");
expect(computeImpersonationEstimate("vitalik.eth")).toBe("unlikely");
expect(computeImpersonationEstimate("vitalìk.eth")).toBe("potential");
expect(computeImpersonationEstimate("٧٣٧.eth")).toBe("unlikely");
expect(computeImpersonationEstimate("poet.base.eth")).toBe("unlikely");
expect(computeImpersonationEstimate("exampleprimary.cb.id")).toBe("unlikely");
expect(computeImpersonationEstimate("888‍‍.eth")).toBe("potential");
expect(computeImpersonationEstimate("‍‍❤‍‍.eth")).toBe("potential");
expect(computeImpersonationEstimate("٠٠۱.eth")).toBe("potential");
expect(computeImpersonationEstimate("۸۸۷۵۴۲.eth")).toBe("potential");
expect(computeImpersonationEstimate("୨୨୨୨୨.eth")).toBe("potential");
expect(computeImpersonationEstimate("┣▇▇▇═─.eth")).toBe("potential");
expect(computeImpersonationEstimate("сбер.eth")).toBe("potential");
expect(computeImpersonationEstimate("vitȧlik.eth")).toBe("potential");
expect(computeImpersonationEstimate("vıtalik.eth")).toBe("potential");
expect(computeImpersonationEstimate("vincξnt.eth")).toBe("unlikely");
expect(computeImpersonationEstimate("hello<world>!.eth")).toBe("potential");
});
});
4 changes: 2 additions & 2 deletions packages/nameguard-js/src/impersonation.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { ImpersonationStatus } from "@namehash/nameguard";
import { ImpersonationEstimate } from "@namehash/nameguard";
import { getNormalizedCanonicalLabel } from "./canonical";

const LABELHASH_REGEX = /^\[[0-9a-f]{64}\]$/;
Expand All @@ -19,7 +19,7 @@ function isLabelhash(label: string): boolean {
* @param name - The name to analyze.
* @returns The impersonation estimate for the given name.
*/
export function computeImpersonationStatus(name: string): ImpersonationStatus {
export function computeImpersonationEstimate(name: string): ImpersonationEstimate {
// We do not need codepoint splitting here, as we only check for empty names.
// If the name is empty, it has 0 labels and .split would return an array with one empty string.
const labels = name.length === 0 ? [] : name.split(".");
Expand Down
12 changes: 4 additions & 8 deletions packages/nameguard-js/src/nameguard-js.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,11 @@ const PROVIDER_URI_MAINNET = process.env.PROVIDER_URI_MAINNET;
const PROVIDER_URI_SEPOLIA = process.env.PROVIDER_URI_SEPOLIA;

if (!PROVIDER_URI_MAINNET) {
throw new Error(
"The PROVIDER_URI_MAINNET environment variable is not defined.",
);
console.warn("PROVIDER_URI_MAINNET is not defined. Defaulting to viem's default provider, which may have rate limiting and other performance limitations.");
}

if (!PROVIDER_URI_SEPOLIA) {
throw new Error(
"The PROVIDER_URI_SEPOLIA environment variable is not defined.",
);
console.warn("PROVIDER_URI_SEPOLIA is not defined. Defaulting to viem's default provider, which may have rate limiting and other performance limitations.");
}

/**
Expand All @@ -34,7 +30,7 @@ describe("NameGuardJS", () => {

const localNameguard = createClient({
// not a real endpoint, will error if used
endpoint: INVALID_NAMEGUARD_API_ENDPOINT,
nameguardEndpoint: INVALID_NAMEGUARD_API_ENDPOINT,
publicClient,
});

Expand All @@ -53,7 +49,7 @@ describe("NameGuardJS", () => {

const localNameguard = createClient({
// not a real endpoint, will error if used
endpoint: INVALID_NAMEGUARD_API_ENDPOINT,
nameguardEndpoint: INVALID_NAMEGUARD_API_ENDPOINT,
publicClient
});

Expand Down
Loading

0 comments on commit 47de3ad

Please sign in to comment.