Skip to content

Commit

Permalink
A cute numeral decoding trick (#67).
Browse files Browse the repository at this point in the history
It turns out that you can map uppercase letters to lowercase letters, and vice versa, with a bitwise operation (e.g. x | 32). It works because they are offset by a power of two relative to each other in ASCII. This trick turns 3 branches into 2, which makes hexadecimal decoding a few % faster. Note that the uppercase to lowercase transformation is simpler to represent in code, becaue it's easier to set a bit than it is to clear a bit using integer literals. I also reworked the numeral decoding and encoding test suite while I was at it.
  • Loading branch information
oscbyspro committed Aug 18, 2024
1 parent 0a24a7b commit 581bfe7
Show file tree
Hide file tree
Showing 4 changed files with 102 additions and 61 deletions.
31 changes: 24 additions & 7 deletions Sources/CoreKit/Models/TextInt+Numerals.swift
Original file line number Diff line number Diff line change
Expand Up @@ -85,16 +85,33 @@ extension TextInt {
// MARK: Utilities
//=--------------------------------------------------------------------=

@inlinable public func decode(_ text: consuming U8) throws -> U8 {
text &-= 48; if text < self.i00x10 { return text }
text &-= 17; if text < self.i10x36 { return text &+ 10 }
text &-= 32; if text < self.i10x36 { return text &+ 10 }
@inlinable public func decode(_ data: U8) throws -> U8 {
var next = data &- 48

if next < self.i00x10 {
return next
}

next = (data | 32) &- 97

if next < self.i10x36 {
return next &+ 10
}

throw TextInt.Error.invalid
}

@inlinable public func encode(_ data: consuming U8) throws -> U8 {
((((( ))))); if data < self.i00x10 { return data &+ self.o00x10 }
data &-= 10; if data < self.i10x36 { return data &+ self.o10x36 }
@inlinable public func encode(_ data: U8) throws -> U8 {
if data < self.i00x10 {
return data &+ self.o00x10
}

let next = data &- 10

if next < self.i10x36 {
return next &+ self.o10x36
}

throw TextInt.Error.invalid
}

Expand Down
4 changes: 2 additions & 2 deletions Tests/Benchmarks/TextInt+Base10.swift
Original file line number Diff line number Diff line change
Expand Up @@ -41,15 +41,15 @@ final class TextIntBenchmarksOnRadix10: XCTestCase {
func testDecodingOneMillionTimesBinaryIntegerAsUX() throws {
let encoded = blackHoleIdentity(Self.formatter.encode(UX.max))

for _ in 0 as UX ..< 1_000_000 {
for _ in 0 as UX ..< 1_000_000 {
precondition((try? Self.formatter.decode(encoded) as UX) != nil)
}
}

func testDecodingOneMillionTimesBinaryIntegerAsUXL() throws {
let encoded = blackHoleIdentity(Self.formatter.encode(UX.max))

for _ in 0 as UX ..< 1_000_000 {
for _ in 0 as UX ..< 1_000_000 {
precondition((try? Self.formatter.decode(encoded) as UXL) != nil)
}
}
Expand Down
122 changes: 76 additions & 46 deletions Tests/CoreKitTests/TextInt+Numerals.swift
Original file line number Diff line number Diff line change
Expand Up @@ -14,61 +14,91 @@ import TestKit
// MARK: * Text Int x Numerals
//*============================================================================*

extension TextIntTests {

final class TextIntTestsOnNumerals: XCTestCase {

//=------------------------------------------------------------------------=
// MARK: Metadata
//=------------------------------------------------------------------------=

static let radices: Range<UX> = 0..<37

static let letters: [TextInt.Letters] = [.uppercase, .lowercase]

//=------------------------------------------------------------------------=
// MARK: Tests
//=------------------------------------------------------------------------=

func testNumerals() {
//=--------------------------------------=
// test: the maximum radix is 36
//=--------------------------------------=
Test().failure({ try T.Numerals(37, letters: .lowercase) }, E.invalid)
Test().failure({ try T.Numerals(37, letters: .uppercase) }, E.invalid)
//=--------------------------------------=
// test: for each radix in 0 through 36
//=--------------------------------------=
for radix: UX in 0 ... 36 {
guard let lowercase = Test().success({ try T.Numerals(radix, letters: .lowercase) }) else { break }
guard let uppercase = Test().success({ try T.Numerals(radix, letters: .uppercase) }) else { break }
//=----------------------------------=
Test().same(lowercase.radix, U8(load: radix))
Test().same(lowercase.letters, TextInt.Letters.lowercase)
Test().same(uppercase.radix, U8(load: radix))
Test().same(uppercase.letters, TextInt.Letters.uppercase)
//=----------------------------------=
// test: decoding
//=----------------------------------=
for coder in [lowercase, uppercase] {
for byte in Self.numerals.invalid {
Test().failure({ try coder.decode(U8(byte)) }, E.invalid)
}
func testInitialization() throws {
for radix in Self.radices {
for letters in Self.letters {
let numerals = try TextInt.Numerals(radix, letters: letters)
Test().same(numerals.radix, U8(radix))
Test().same(numerals.letters, letters)
}
}

for radix in Self.radices.upperBound...255 {
for letters in Self.letters {
Test().failure({ try TextInt.Numerals(radix, letters: letters) }, TextInt.Error.invalid)
}
}
}

func testDecodingEachByte() throws {
var expectation: [U8: U8] = [:]

for source in [Self.numerals.lowercase, Self.numerals.uppercase] {
for index in source.indices {
let text = U8(source[ index])
let data = U8(load: IX(index))
if data < radix {
Test().success({ try coder.decode(text) }, data)
} else {
Test().failure({ try coder.decode(U8(source[index])) }, E.invalid)
}
Test().same(UInt8(ascii: "0"), 048)
Test().same(UInt8(ascii: "9"), 057)

for key: U8 in 048...057 {
expectation[key] = key - 48
}

Test().same(UInt8(ascii: "A"), 065)
Test().same(UInt8(ascii: "Z"), 090)

for key: U8 in 065...090 {
expectation[key] = key - 55
}

Test().same(UInt8(ascii: "a"), 097)
Test().same(UInt8(ascii: "z"), 122)

for key: U8 in 097...122 {
expectation[key] = key - 87
}

for radix in Self.radices {
for letters in Self.letters {
let numerals = try TextInt.Numerals(radix, letters: letters)

for key in U8.min...U8.max {
if let value = expectation[key], value < radix {
Test().success({ try numerals.decode(key) }, value)
} else {
Test().failure({ try numerals.decode(key) }, TextInt.Error.invalid)
}
}
}
//=----------------------------------=
// test: encoding
//=----------------------------------=
for value in U8.min ..< U8(load: radix) {
Test().success({ try lowercase.encode(value) }, U8(Self.numerals.lowercase[Int(IX(load: value))]))
Test().success({ try uppercase.encode(value) }, U8(Self.numerals.uppercase[Int(IX(load: value))]))
}

for value in U8(load: radix) ... U8.max {
Test().failure({ try lowercase.encode(value) }, E.invalid)
Test().failure({ try uppercase.encode(value) }, E.invalid)
}
}

func testEncodingEachByte() throws {
func whereIs(letters: TextInt.Letters, expectation: [U8]) throws {
for radix in Self.radices {
let numerals = try TextInt.Numerals(radix, letters: letters)

for data in U8.min..<U8(radix) {
Test().success({ try numerals.encode(data) }, expectation[Int(IX(data))])
}

for data in U8(radix)..<U8.max {
Test().failure({ try numerals.encode(data) }, TextInt.Error.invalid)
}
}
}

try whereIs(letters: .uppercase, expectation: [48...57, 65...090].flatMap({ $0 }))
try whereIs(letters: .lowercase, expectation: [48...57, 97...122].flatMap({ $0 }))
}
}
6 changes: 0 additions & 6 deletions Tests/CoreKitTests/TextInt.swift
Original file line number Diff line number Diff line change
Expand Up @@ -31,12 +31,6 @@ final class TextIntTests: XCTestCase {

static let masks: [(data: Bit?, text: String)] = [(nil, ""), (Bit .zero, "#"), (Bit .one, "&")]

static let numerals: (lowercase: [UInt8], uppercase: [UInt8], invalid: [UInt8]) = (
[48...57, 97...122].flatMap({ $0 }),
[48...57, 65...090].flatMap({ $0 }),
[00...47, 58...64, 91...96, 123...255].flatMap({ $0 })
)

//=------------------------------------------------------------------------=
// MARK: Tests
//=------------------------------------------------------------------------=
Expand Down

0 comments on commit 581bfe7

Please sign in to comment.