Skip to content

Commit

Permalink
[Py] add missing unicode categories in python library (#3872)
Browse files Browse the repository at this point in the history
* add missing unicode categories in python library

* update changelogs

---------

Co-authored-by: Maxime Mangel <me@mangelmaxime.fr>
  • Loading branch information
joprice and MangelMaxime authored Jan 19, 2025
1 parent 79a2610 commit 4f8930b
Show file tree
Hide file tree
Showing 4 changed files with 72 additions and 1 deletion.
4 changes: 4 additions & 0 deletions src/Fable.Cli/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
* [All] Updated Fable-FCS to latest F# 9.0 (by @ncave)
* [All] Updated metadata to latest .NET 9.0 (by @ncave)

### Fixed

* [Py] Add missing unicode categories in python library (by @joprice)

## 5.0.0-alpha.5 - 2025-01-09

### Added
Expand Down
4 changes: 4 additions & 0 deletions src/Fable.Compiler/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
* [All] Updated Fable-FCS to latest F# 9.0 (by @ncave)
* [All] Updated metadata to latest .NET 9.0 (by @ncave)

### Fixed

* [Py] Add missing unicode categories in python library (by @joprice)

## 5.0.0-alpha.5 - 2025-01-09

### Added
Expand Down
14 changes: 13 additions & 1 deletion src/fable-library-py/fable_library/char.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,19 @@ class UnicodeCategory(IntEnum):
"Sk": UnicodeCategory.ModifierSymbol,
"Mn": UnicodeCategory.NonSpacingMark,
"Lo": UnicodeCategory.OtherLetter,
"No": UnicodeCategory.OtherLetter,
"No": UnicodeCategory.OtherNumber,
"Lt": UnicodeCategory.TitlecaseLetter,
"Cn": UnicodeCategory.OtherNotAssigned,
"Co": UnicodeCategory.PrivateUse,
"Cs": UnicodeCategory.Surrogate,
"Zp": UnicodeCategory.ParagraphSeparator,
"Lm": UnicodeCategory.ModifierLetter,
"Mc": UnicodeCategory.SpacingCombiningMark,
"Me": UnicodeCategory.EnclosingMark,
"Pe": UnicodeCategory.ClosePunctuation,
"Pf": UnicodeCategory.FinalQuotePunctuation,
"Ps": UnicodeCategory.OpenPunctuation,
"So": UnicodeCategory.OtherSymbol,
}


Expand Down
51 changes: 51 additions & 0 deletions tests/Python/TestString.fs
Original file line number Diff line number Diff line change
Expand Up @@ -991,3 +991,54 @@ let ``test calling ToString(CultureInfo.InvariantCulture) works`` () =
(1).ToString(CultureInfo.InvariantCulture) |> equal "1"
(7923209L).ToString(CultureInfo.InvariantCulture) |> equal "7923209"
(7923209UL).ToString(CultureInfo.InvariantCulture) |> equal "7923209"


#if FABLE_COMPILER
open Fable.Core

[<Import("category", "unicodedata")>]
let unicodeCategory: char -> string = nativeOnly

[<Fact>]
let ``test unicode categories`` () =
let chars = [
"\x00", "Cc"
" ", "Zs"
"!", "Po"
"$", "Sc"
"(", "Ps"
")", "Pe"
"+", "Sm"
"-", "Pd"
"0", "Nd"
"A", "Lu"
"^", "Sk"
"_", "Pc"
"a", "Ll"
"¦", "So"
"ª", "Lo"
"«", "Pi"
"\xad", "Cf"
"²", "No"
"»", "Pf"
"Dž", "Lt"
"ʰ", "Lm"
"", "Mn"
"\u0378", "Cn"
"\u0488", "Me"
"\u0903", "Mc"
"\u16ee", "Nl"
"\u2028", "Zl"
"\u2029", "Zp"
//TODO: this fails with error EXCEPTION: Unable to translate Unicode character \\uD800 at index 116 to specified code page.
//"\ud800" , "Cs"
"\ue000", "Co"
]
for (s, cat) in chars do
s
|> String.iter (fun c ->
// this ensures that the character is from the expected category
cat |> equal (unicodeCategory c)
Char.IsLetterOrDigit c |> ignore
)
#endif

0 comments on commit 4f8930b

Please sign in to comment.