diff --git a/packages/orama/src/components/tokenizer/languages.ts b/packages/orama/src/components/tokenizer/languages.ts index 77345c201..857d834a1 100644 --- a/packages/orama/src/components/tokenizer/languages.ts +++ b/packages/orama/src/components/tokenizer/languages.ts @@ -2,6 +2,7 @@ export const STEMMERS: Record = { arabic: 'ar', armenian: 'am', bulgarian: 'bg', + czech: 'cz', danish: 'dk', dutch: 'nl', english: 'en', @@ -59,7 +60,8 @@ export const SPLITTERS: Record = { slovenian: /[^a-z0-9螚ȎŠ]+/gim, bulgarian: /[^a-z0-9а-яА-Я]+/gim, tamil: /[^a-z0-9அ-ஹ]+/gim, - sanskrit: /[^a-z0-9A-Zāīūṛḷṃṁḥśṣṭḍṇṅñḻḹṝ]+/gim + sanskrit: /[^a-z0-9A-Zāīūṛḷṃṁḥśṣṭḍṇṅñḻḹṝ]+/gim, + czech: /[^A-Z0-9a-zěščřžýáíéúůóťďĚŠČŘŽÝÁÍÉÓÚŮŤĎ-]+/gim } export const SUPPORTED_LANGUAGES = Object.keys(STEMMERS)