Skip to content

Commit

Permalink
feat: add czech language support (#879)
Browse files Browse the repository at this point in the history
  • Loading branch information
xhejtman authored Feb 28, 2025
1 parent 4a5b063 commit edb9d98
Showing 1 changed file with 3 additions and 1 deletion.
4 changes: 3 additions & 1 deletion packages/orama/src/components/tokenizer/languages.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ export const STEMMERS: Record<string, string> = {
arabic: 'ar',
armenian: 'am',
bulgarian: 'bg',
czech: 'cz',
danish: 'dk',
dutch: 'nl',
english: 'en',
Expand Down Expand Up @@ -59,7 +60,8 @@ export const SPLITTERS: Record<Language, RegExp> = {
slovenian: /[^a-z0-9螚ȎŠ]+/gim,
bulgarian: /[^a-z0-9а-яА-Я]+/gim,
tamil: /[^a-z0-9-]+/gim,
sanskrit: /[^a-z0-9A-Zāīūśñ]+/gim
sanskrit: /[^a-z0-9A-Zāīūśñ]+/gim,
czech: /[^A-Z0-9a-zěščřžýáíéúůóťďĚŠČŘŽÝÁÍÉÓÚŮŤĎ-]+/gim
}

export const SUPPORTED_LANGUAGES = Object.keys(STEMMERS)
Expand Down

0 comments on commit edb9d98

Please sign in to comment.