feat: [sc-25504] Proposed "maturity" next steps for NameGuard JS (#347)

* rename endpoint to nameguardEndpoint * document emoji functions * move normalized graphemes to data * format docstrings * document countGraphemes * rename impersonation status to impersonation estimate * remove magic number * use isCharacter * remove charCount re-export * warn about unset env vars * fix nameguard tests * nameguard js lazy init * fix pnpm build * organize nameguard sdk tests * Add changesets * test isCombiningChar * explain impersonation tests * interface for impersonation tests * fix renamed impersonation status --------- Co-authored-by: kwrobel.eth <[email protected]> Co-authored-by: lightwalker.eth <[email protected]>
namehash · Oct 30, 2024 · 47de3ad · 47de3ad
1 parent f6cbe5d
commit 47de3ad
Show file tree

Hide file tree

Showing 29 changed files with 497 additions and 326 deletions.
diff --git a/.changeset/fluffy-laws-grow.md b/.changeset/fluffy-laws-grow.md
@@ -0,0 +1,7 @@
+---
+"@namehash/nameguard": minor
+---
+
+- Rename `ImpersonationStatus` to `ImpersonationEstimate` to better manage expectations.
+- Rename `endpoint` param to `nameguardEndpoint` when creating a NameGuard Client for more self-documenting code.
+- Refined unit tests.
diff --git a/.changeset/late-phones-rush.md b/.changeset/late-phones-rush.md
@@ -0,0 +1,9 @@
+---
+"@namehash/nameguard-js": minor
+---
+
+- Optimize startup time by lazily initializing in-memory data models.
+- Refine documentation.
+- Refine unit tests.
+- Warn about likely timeout errors if Etherum provider environment variable is not set when - running unit tests.
+- Upgrade to the latest NameGuard SDK.
diff --git a/apps/examples.nameguard.io/src/app/components/ImpersonationReport.tsx b/apps/examples.nameguard.io/src/app/components/ImpersonationReport.tsx
@@ -17,8 +17,8 @@ type ImpersonationReportProps = {
 export function ImpersonationReport({ data }: ImpersonationReportProps) {
   if (
     !data ||
-    data?.impersonation_status === null ||
-    data?.impersonation_status === "unlikely"
+    data?.impersonation_estimate === null ||
+    data?.impersonation_estimate === "unlikely"
   )
     return null;
 

diff --git a/apps/examples.nameguard.io/src/app/components/SecurePrimaryName.tsx b/apps/examples.nameguard.io/src/app/components/SecurePrimaryName.tsx
@@ -28,16 +28,16 @@ export async function SecurePrimaryName({ address }: Props) {
   const data = await nameguard.getSecurePrimaryName(address);
 
   const pillColor =
-    data.impersonation_status === "potential" ?
+    data.impersonation_estimate === "potential" ?
       "bg-red-300" :
-      data.impersonation_status === "unlikely" ?
+      data.impersonation_estimate === "unlikely" ?
         "bg-green-300" :
         "bg-yellow-300";
 
   const pillText =
-    data.impersonation_status === "potential" ?
+    data.impersonation_estimate === "potential" ?
       "Potential impersonation!" :
-      data.impersonation_status === "unlikely" ?
+      data.impersonation_estimate === "unlikely" ?
         "Name is secure" :
         "No primary name!";
 

diff --git a/packages/nameguard-js/src/confusables.test.ts b/packages/nameguard-js/src/confusables.test.ts
@@ -1,7 +1,12 @@
-import { describe, it, expect } from "vitest";
+import { describe, it, expect, beforeAll } from "vitest";
 import { isGraphemeConfusable, getCanonical } from "./confusables";
+import { initializeData } from "./data";
 
 describe("confusables", () => {
+  beforeAll(() => {
+    initializeData();
+  });
+
   it("should check if ASCII is confusable", () => {
     expect(isGraphemeConfusable("a")).toBe(false);
     expect(isGraphemeConfusable("A")).toBe(true);

diff --git a/packages/nameguard-js/src/data/canonicals.ts b/packages/nameguard-js/src/data/canonicals.ts
@@ -1,7 +1,3 @@
-// for compression the json type is:
-// { [key: string]: [string, number] }
-import GRAPHEME_CANONICALS_ from "./canonicals.json";
-
 /**
  * Stores information about a potentially confusable grapheme and its canonical form.
  */
@@ -22,7 +18,14 @@ export interface GraphemeCanonical {
  * Map containing graphemes and their canonical forms.
  * This data is taken from the NameHash ens-label-inspector Python package.
  */
-export const GRAPHEME_CANONICALS: Map<string, GraphemeCanonical> =
-  new Map(
-    Object.entries(GRAPHEME_CANONICALS_ as { [key: string]: [string, number] })
-    .map(([k, v]) => [k, { canonicalGrapheme: v[0], numConfusables: v[1] }]));
+export let GRAPHEME_CANONICALS: Map<string, GraphemeCanonical> | null = null;
+
+export function initializeCanonicals() {
+  // The json stores the data as a map of grapheme -> [canonicalGrapheme, numConfusables]
+  const GRAPHEME_CANONICALS_: { [key: string]: [string, number] } = require("./canonicals.json");
+  GRAPHEME_CANONICALS =
+    new Map(
+      Object.entries(GRAPHEME_CANONICALS_)
+        .map(([k, v]) => [k, { canonicalGrapheme: v[0], numConfusables: v[1] }])
+    );
+}
diff --git a/packages/nameguard-js/src/data/combining.ts b/packages/nameguard-js/src/data/combining.ts
@@ -1,8 +1,10 @@
-// string[]
-import COMBINING_ from "./combining.json";
-
 /**
  * Array of characters classified as "Combining" according to the Unicode Standard version 15.1.0.
  * Data is taken from https://unicode.org/.
  */
-export const COMBINING: Set<string> = new Set(COMBINING_ as string[]);
+export let COMBINING: Set<string> | null = null;
+
+export function initializeCombining() {
+  const COMBINING_: string[] = require("./combining.json");
+  COMBINING = new Set(COMBINING_);
+}
diff --git a/packages/nameguard-js/src/data/hangul.ts b/packages/nameguard-js/src/data/hangul.ts
@@ -1,8 +1,11 @@
-import HANGUL_JAMO_ from './hangul_jamo.json';
-
 /**
  * Contains all Hangul Jamo characters.
  * Data is taken from https://unicode.org/ using Unicode version 15.1.0.
  * This set is used in grapheme splitting to handle arbitrary Jamo sequences.
  */
-export const HANGUL_JAMO: Set<string> = new Set(HANGUL_JAMO_);
+export let HANGUL_JAMO: Set<string> | null = null;
+
+export function initializeHangulJamo() {
+  const HANGUL_JAMO_: string[] = require("./hangul_jamo.json");
+  HANGUL_JAMO = new Set(HANGUL_JAMO_);
+}
diff --git a/packages/nameguard-js/src/data/index.ts b/packages/nameguard-js/src/data/index.ts
@@ -0,0 +1,24 @@
+import { initializeCanonicals } from "./canonicals";
+import { initializeCombining } from "./combining";
+import { initializeHangulJamo } from "./hangul";
+import { initializeInvisibleJoiners } from "./invisible_joiners";
+import { initializeUnicode } from "./unicode";
+
+let INITIALIZED = false;
+
+/**
+ * Initializes all data structures.
+ * This function should be called before any other functions in this module.
+ * It is a no-op if it has already been called.
+ */
+export function initializeData() {
+  if (INITIALIZED) {
+    return;
+  }
+  initializeCanonicals();
+  initializeCombining();
+  initializeHangulJamo();
+  initializeInvisibleJoiners();
+  initializeUnicode();
+  INITIALIZED = true;
+}
diff --git a/packages/nameguard-js/src/data/invisible_joiners.ts b/packages/nameguard-js/src/data/invisible_joiners.ts
@@ -1,7 +1,10 @@
-import INVISIBLE_JOINERS_ from './invisible_joiners.json';
-
 /**
  * Contains invisible characters which are joined with preceding graphemes.
  * Data is taken from the NameHash ens-label-inspector Python package.
  */
-export const INVISIBLE_JOINERS: Set<string> = new Set(INVISIBLE_JOINERS_);
+export let INVISIBLE_JOINERS: Set<string> | null = null;
+
+export function initializeInvisibleJoiners() {
+  const INVISIBLE_JOINERS_: string[] = require("./invisible_joiners.json");
+  INVISIBLE_JOINERS = new Set(INVISIBLE_JOINERS_);
+}
diff --git a/...eguard-js/utils/normalized_graphemes.json → ...ard-js/src/data/normalized_graphemes.json b/...eguard-js/utils/normalized_graphemes.json → ...ard-js/src/data/normalized_graphemes.json
diff --git a/packages/nameguard-js/src/data/unicode.ts b/packages/nameguard-js/src/data/unicode.ts
@@ -1,23 +1,18 @@
-import EMOJI_SEQUENCES_ from './emoji_sequences.json';
-import EMOJI_ZWJ_SEQUENCES_ from './emoji_zwj_sequences.json';
-import EMOJI_BLOCK_STARTS_ from './emojis_starts.json';
-import EMOJI_BLOCK_IS_EMOJI_ from './emojis_is_emoji.json';
-
 /**
  * This is a set of strings where each string represents a Unicode emoji.
  * It provides a quick lookup to determine if a given string is an emoji.
  * This does not include emojis with Zero Width Joiner (ZWJ) characters.
  * The data is taken directly from Unicode version 15.1.0 at https://unicode.org/.
  */
-export const EMOJI_SEQUENCES: Set<string> = new Set(EMOJI_SEQUENCES_);
+export let EMOJI_SEQUENCES: Set<string> | null = null;
 
 /**
  * This is a set of strings where each string represents a Unicode emoji formed with Zero Width Joiner (ZWJ) characters.
  * It provides a quick lookup to determine if a given string is an emoji with a ZWJ character.
  * This does not include emojis without ZWJ characters.
  * The data is taken directly from Unicode version 15.1.0 at https://unicode.org/.
  */
-export const EMOJI_ZWJ_SEQUENCES: Set<string> = new Set(EMOJI_ZWJ_SEQUENCES_);
+export let EMOJI_ZWJ_SEQUENCES: Set<string> | null = null;
 
 /**
  * The following two fields are used to determine if a given code point is an emoji using binary search.
@@ -28,5 +23,17 @@ export const EMOJI_ZWJ_SEQUENCES: Set<string> = new Set(EMOJI_ZWJ_SEQUENCES_);
  * All characters in a block are either all emojis or all non-emojis.
  * The blocks are generated from Unicode version 15.1.0 at https://unicode.org/.
  */
-export const EMOJI_BLOCK_STARTS: number[] = EMOJI_BLOCK_STARTS_;
-export const EMOJI_BLOCK_IS_EMOJI: boolean[] = EMOJI_BLOCK_IS_EMOJI_;
+export let EMOJI_BLOCK_STARTS: number[] | null = null;
+export let EMOJI_BLOCK_IS_EMOJI: boolean[] | null = null;
+
+export function initializeUnicode() {
+  const EMOJI_SEQUENCES_: string[] = require("./emoji_sequences.json");
+  const EMOJI_ZWJ_SEQUENCES_: string[] = require("./emoji_zwj_sequences.json");
+  const EMOJI_BLOCK_STARTS_: number[] = require("./emojis_starts.json");
+  const EMOJI_BLOCK_IS_EMOJI_: boolean[] = require("./emojis_is_emoji.json");
+
+  EMOJI_SEQUENCES = new Set(EMOJI_SEQUENCES_);
+  EMOJI_ZWJ_SEQUENCES = new Set(EMOJI_ZWJ_SEQUENCES_);
+  EMOJI_BLOCK_STARTS = EMOJI_BLOCK_STARTS_;
+  EMOJI_BLOCK_IS_EMOJI = EMOJI_BLOCK_IS_EMOJI_;
+}
diff --git a/packages/nameguard-js/src/graphemes.test.ts b/packages/nameguard-js/src/graphemes.test.ts
@@ -1,6 +1,7 @@
-import { describe, it, expect } from "vitest";
+import { describe, it, expect, beforeAll } from "vitest";
 import { splitGraphemes, countGraphemes } from "./graphemes";
-import jsonNamehashExamples from "../utils/normalized_graphemes.json";
+import jsonNamehashExamples from "./data/normalized_graphemes.json";
+import { initializeData } from "./data";
 
 const grapehemeTestInputs = [
   "",
@@ -34,6 +35,10 @@ const graphemeTestOutputs = [
 ];
 
 describe("countGraphemes", () => {
+  beforeAll(() => {
+    initializeData();
+  });
+
   it("should count graphemes in a string", () => {
     for (const example_idx in grapehemeTestInputs) {
       expect(countGraphemes(grapehemeTestInputs[example_idx])).toBe(
@@ -44,6 +49,10 @@ describe("countGraphemes", () => {
 });
 
 describe("splitGraphemes", () => {
+  beforeAll(() => {
+    initializeData();
+  });
+
   it("should split strings into graphemes", () => {
     for (const example_idx in grapehemeTestInputs) {
       expect(splitGraphemes(grapehemeTestInputs[example_idx])).toStrictEqual(

diff --git a/packages/nameguard-js/src/graphemes.ts b/packages/nameguard-js/src/graphemes.ts
@@ -7,14 +7,17 @@ import { INVISIBLE_JOINERS } from "./data/invisible_joiners";
 /**
  * Splits the input string into what users perceive as "characters", called graphemes.
  *
- * This function extends the official Unicode grapheme splitting algorithm with additional features.
- * It matches the algorithm used by NameGuard which introduces user-friendly features like Hangul and invisible character splitting.
+ * This function extends the official Unicode grapheme splitting algorithm
+ * with additional features. It matches the algorithm used by NameGuard
+ * which introduces user-friendly features like Hangul and invisible character splitting.
  *
- * Splitting is performed using the [text-segmentation](https://github.com/niklasvh/text-segmentation) library with added special Hangul treatment.
- * This makes it possible to handle strings with arbitrary Hangul Jamo sequences that most operating systems render as distinct graphemes.
- * Without this fix, some Hangul Jamo would be merged into one grapheme which would seem confusing to the user who sees them as separate.
- * See splitGraphemes.test.ts for examples.
- * This function also handles invisible characters within graphemes, ensuring they are split into separate graphemes for better clarity.
+ * Splitting is performed using the [text-segmentation](https://github.com/niklasvh/text-segmentation)
+ * library with added special Hangul treatment. This makes it possible to handle strings
+ * with arbitrary Hangul Jamo sequences that most operating systems render as distinct graphemes.
+ * Without this fix, some Hangul Jamo would be merged into one grapheme which would
+ * seem confusing to the user who sees them as separate. See splitGraphemes.test.ts for examples.
+ * This function also handles invisible characters within graphemes,
+ * ensuring they are split into separate graphemes for better clarity.
  *
  * This implementation is safe to use in all modern web browsers,
  * unlike the related browser API for splitting graphemes according to the Unicode standard,
@@ -77,6 +80,17 @@ export function splitGraphemes(name: string): string[] {
   return graphemes;
 }
 
+/**
+ * Counts the number of graphemes in a given string.
+ *
+ * This function uses the `splitGraphemes` function to split the input string
+ * into its constituent graphemes and then returns the count of these graphemes.
+ * The count will include all characters, including invisible characters
+ * and label separators.
+ *
+ * @param name - The input string to count graphemes from.
+ * @returns The number of graphemes in the input string.
+ */
 export function countGraphemes(name: string): number {
   return splitGraphemes(name).length;
 }
diff --git a/packages/nameguard-js/src/impersonation.test.ts b/packages/nameguard-js/src/impersonation.test.ts
@@ -1,25 +1,31 @@
-import { describe, it, expect } from "vitest";
-import { computeImpersonationStatus } from "./impersonation";
+import { describe, it, expect, beforeAll } from "vitest";
+import { computeImpersonationEstimate } from "./impersonation";
+import { initializeData } from "./data";
 
-describe("Impersonation", () => {
-  it("should return impersonation status", () => {
-    expect(computeImpersonationStatus("nick.eth")).toBe("unlikely");
-    expect(computeImpersonationStatus("nićk.eth")).toBe("potential");
-    expect(computeImpersonationStatus("vitalik.eth")).toBe("unlikely");
-    expect(computeImpersonationStatus("vitalìk.eth")).toBe("potential");
-    expect(computeImpersonationStatus("٧٣٧.eth")).toBe("unlikely");
-    expect(computeImpersonationStatus("poet.base.eth")).toBe("unlikely");
-    expect(computeImpersonationStatus("exampleprimary.cb.id")).toBe("unlikely");
-    expect(computeImpersonationStatus("888‍‍.eth")).toBe("potential");
-    expect(computeImpersonationStatus("‍‍❤‍‍.eth")).toBe("potential");
-    expect(computeImpersonationStatus("٠٠۱.eth")).toBe("potential");
-    expect(computeImpersonationStatus("۸۸۷۵۴۲.eth")).toBe("potential");
-    expect(computeImpersonationStatus("୨୨୨୨୨.eth")).toBe("potential");
-    expect(computeImpersonationStatus("┣▇▇▇═─.eth")).toBe("potential");
-    expect(computeImpersonationStatus("сбер.eth")).toBe("potential");
-    expect(computeImpersonationStatus("vitȧlik.eth")).toBe("potential");
-    expect(computeImpersonationStatus("vıtalik.eth")).toBe("potential");
-    expect(computeImpersonationStatus("vincξnt.eth")).toBe("unlikely");
-    expect(computeImpersonationStatus("hello<world>!.eth")).toBe("potential");
+describe("computeImpersonationEstimate", () => {
+  beforeAll(() => {
+    initializeData();
+  });
+
+  it("should return impersonation estimate", () => {
+    // examples taken from Python Nameguard API tests
+    expect(computeImpersonationEstimate("nick.eth")).toBe("unlikely");
+    expect(computeImpersonationEstimate("nićk.eth")).toBe("potential");
+    expect(computeImpersonationEstimate("vitalik.eth")).toBe("unlikely");
+    expect(computeImpersonationEstimate("vitalìk.eth")).toBe("potential");
+    expect(computeImpersonationEstimate("٧٣٧.eth")).toBe("unlikely");
+    expect(computeImpersonationEstimate("poet.base.eth")).toBe("unlikely");
+    expect(computeImpersonationEstimate("exampleprimary.cb.id")).toBe("unlikely");
+    expect(computeImpersonationEstimate("888‍‍.eth")).toBe("potential");
+    expect(computeImpersonationEstimate("‍‍❤‍‍.eth")).toBe("potential");
+    expect(computeImpersonationEstimate("٠٠۱.eth")).toBe("potential");
+    expect(computeImpersonationEstimate("۸۸۷۵۴۲.eth")).toBe("potential");
+    expect(computeImpersonationEstimate("୨୨୨୨୨.eth")).toBe("potential");
+    expect(computeImpersonationEstimate("┣▇▇▇═─.eth")).toBe("potential");
+    expect(computeImpersonationEstimate("сбер.eth")).toBe("potential");
+    expect(computeImpersonationEstimate("vitȧlik.eth")).toBe("potential");
+    expect(computeImpersonationEstimate("vıtalik.eth")).toBe("potential");
+    expect(computeImpersonationEstimate("vincξnt.eth")).toBe("unlikely");
+    expect(computeImpersonationEstimate("hello<world>!.eth")).toBe("potential");
   });
 });
diff --git a/packages/nameguard-js/src/impersonation.ts b/packages/nameguard-js/src/impersonation.ts
@@ -1,4 +1,4 @@
-import { ImpersonationStatus } from "@namehash/nameguard";
+import { ImpersonationEstimate } from "@namehash/nameguard";
 import { getNormalizedCanonicalLabel } from "./canonical";
 
 const LABELHASH_REGEX = /^\[[0-9a-f]{64}\]$/;
@@ -19,7 +19,7 @@ function isLabelhash(label: string): boolean {
  * @param name - The name to analyze.
  * @returns The impersonation estimate for the given name.
  */
-export function computeImpersonationStatus(name: string): ImpersonationStatus {
+export function computeImpersonationEstimate(name: string): ImpersonationEstimate {
   // We do not need codepoint splitting here, as we only check for empty names.
   // If the name is empty, it has 0 labels and .split would return an array with one empty string.
   const labels = name.length === 0 ? [] : name.split(".");

diff --git a/packages/nameguard-js/src/nameguard-js.test.ts b/packages/nameguard-js/src/nameguard-js.test.ts
@@ -7,15 +7,11 @@ const PROVIDER_URI_MAINNET = process.env.PROVIDER_URI_MAINNET;
 const PROVIDER_URI_SEPOLIA = process.env.PROVIDER_URI_SEPOLIA;
 
 if (!PROVIDER_URI_MAINNET) {
-  throw new Error(
-    "The PROVIDER_URI_MAINNET environment variable is not defined.",
-  );
+  console.warn("PROVIDER_URI_MAINNET is not defined. Defaulting to viem's default provider, which may have rate limiting and other performance limitations.");
 }
 
 if (!PROVIDER_URI_SEPOLIA) {
-  throw new Error(
-    "The PROVIDER_URI_SEPOLIA environment variable is not defined.",
-  );
+  console.warn("PROVIDER_URI_SEPOLIA is not defined. Defaulting to viem's default provider, which may have rate limiting and other performance limitations.");
 }
 
 /**
@@ -34,7 +30,7 @@ describe("NameGuardJS", () => {
 
     const localNameguard = createClient({
       // not a real endpoint, will error if used
-      endpoint: INVALID_NAMEGUARD_API_ENDPOINT,
+      nameguardEndpoint: INVALID_NAMEGUARD_API_ENDPOINT,
       publicClient,
     });
 
@@ -53,7 +49,7 @@ describe("NameGuardJS", () => {
 
     const localNameguard = createClient({
       // not a real endpoint, will error if used
-      endpoint: INVALID_NAMEGUARD_API_ENDPOINT,
+      nameguardEndpoint: INVALID_NAMEGUARD_API_ENDPOINT,
       publicClient
     });