Skip to content

Commit

Permalink
utf8
Browse files Browse the repository at this point in the history
  • Loading branch information
serprex committed Mar 27, 2024
0 parents commit de3aa55
Show file tree
Hide file tree
Showing 8 changed files with 499 additions and 0 deletions.
15 changes: 15 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
name: go test

on: [push]

jobs:
build:
runs-on: ubicloud-standard-2-ubuntu-2204-arm
steps:
- uses: actions/checkout@v4
- name: Setup Go
uses: actions/setup-go@v5
with:
go-version: "1.22"
- run: go get .
- run: go test
14 changes: 14 additions & 0 deletions LICENSE
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
BSD Zero Clause License

Copyright (c) 2024 PeerDB

Permission to use, copy, modify, and/or distribute this software for any
purpose with or without fee is hereby granted.

THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH
REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT,
INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
7 changes: 7 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
module github.com/PeerDB-io/gluautf8

go 1.20

require (
github.com/yuin/gopher-lua v1.1.1
)
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
github.com/yuin/gopher-lua v1.1.1 h1:kYKnWBjvbNP4XLT3+bPEwAXJx262OhaHDWDVOPjL46M=
github.com/yuin/gopher-lua v1.1.1/go.mod h1:GBR0iDaNXjAgGg9zfCvksxSRnQx76gclCIb7kdAd1Pw=
17 changes: 17 additions & 0 deletions readme.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# utf8 for gopher-lua

Implements Lua 5.3 [utf8](https://www.lua.org/manual/5.3/manual.html#6.5) for [gopher-lua](https://github.com/yuin/gopher-lua). To use, call
```go
import (
"github.com/PeerDB-io/gluautf8"
)

// add so that `local utf8 = require("utf8")` works
L.PreloadModule("utf8", gluautf8.Loader)

// or add to global env
L.Push(ls.NewFunction(gluautf8.Loader))
L.Call(0, 1)
L.Env.RawSetString("utf8", L.Get(-1))
L.Pop(1)
```
210 changes: 210 additions & 0 deletions utf8.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,210 @@
package gluautf8

import (
"unicode/utf8"
"unsafe"

"github.com/yuin/gopher-lua"
)

var Utf8charpattern = lua.LString([]byte{
'[', 0, '-', 0x7f, 0xc2, '-', 0xf4, ']',
'[', 0x80, '-', 0xbf, ']', '*',
})

func Loader(ls *lua.LState) int {
m := ls.SetFuncs(ls.CreateTable(0, 6), map[string]lua.LGFunction{
"char": Utf8char,
"codes": Utf8codes,
"codepoint": Utf8codepoint,
"len": Utf8len,
"offset": Utf8offset,
})
m.RawSetString("charpattern", Utf8charpattern)
ls.Push(m)
return 1
}

func Utf8char(ls *lua.LState) int {
args := ls.GetTop()
b := make([]byte, 0, args)
for i := 1; i <= args; i += 1 {
r := rune(ls.CheckInt(i))
if r > '\U0010FFFF' {
ls.RaiseError("value out of range")
}
b = utf8.AppendRune(b, r)
}
ls.Push(lua.LString(unsafe.String(unsafe.SliceData(b), len(b))))
return 1
}

func utf8iter(ls *lua.LState) int {
s := ls.CheckString(1)
n := ls.CheckInt(2) - 1
if n < 0 {
n = 0
} else if n < len(s) {
for {
n += 1
if n == len(s) || utf8.RuneStart(s[n]) {
break
}
}
}
if n >= len(s) {
return 0
}
r, _ := utf8.DecodeRuneInString(s[n:])
if r == utf8.RuneError {
ls.RaiseError("invalid UTF-8 code")
}
ls.Push(lua.LNumber(n + 1))
ls.Push(lua.LNumber(r))
return 2
}

func Utf8codes(ls *lua.LState) int {
s := ls.CheckString(1)
ls.Push(ls.NewFunction(utf8iter))
ls.Push(lua.LString(s))
ls.Push(lua.LNumber(0))
return 3
}

func Utf8codepoint(ls *lua.LState) int {
s := ls.CheckString(1)
i := ls.OptInt(2, 1)
j := ls.OptInt(3, i)
i -= 1
if i < 0 {
i += len(s) + 1
}
if j < 0 {
j += len(s) + 1
}
if j <= i || i == len(s)+1 {
return 0
}
if i < 0 || i > len(s) || j < 1 || j > len(s) {
ls.RaiseError("position out of range")
}

n := 0
for {
if i >= j {
return n
}
n += 1
r, size := utf8.DecodeRuneInString(s[i:])
ls.Push(lua.LNumber(r))
i += size
if r == utf8.RuneError {
ls.RaiseError("invalid UTF-8 code")
}
}
}

func Utf8len(ls *lua.LState) int {
s := ls.CheckString(1)
i := int(ls.OptNumber(2, 1))
j := int(ls.OptNumber(3, -1))
if i < 0 {
i += len(s)
} else {
i -= 1
}
if j < 0 {
j += len(s) + 1
}
l := 0
for {
if i >= j {
ls.Push(lua.LNumber(l))
return 1
}
l += 1
r, size := utf8.DecodeRuneInString(s[i:])
if r == utf8.RuneError {
ls.Push(lua.LFalse)
ls.Push(lua.LNumber(i + 1))
return 2
}
i += size
}
}

func Utf8offset(ls *lua.LState) int {
s := ls.CheckString(1)
n := ls.CheckInt(2)
var i int
if n < 0 {
i = ls.OptInt(3, len(s)+1)
} else {
i = ls.OptInt(3, 1)
}
if i < 0 {
i += len(s)
} else {
i -= 1
}
if i < 0 || i > len(s) {
ls.RaiseError("position out of range")
}

if n == 0 {
if i < len(s) {
for i > 0 {
if utf8.RuneStart(s[i]) {
break
}
i -= 1
}
}
ls.Push(lua.LNumber(i + 1))
return 1
} else if i < len(s) && !utf8.RuneStart(s[i]) {
ls.RaiseError("initial position is a continuation byte")
}

if n > 0 {
n -= 1
for {
if n == 0 {
ls.Push(lua.LNumber(i + 1))
return 1
}
if i >= len(s) {
break
}
n -= 1
r, size := utf8.DecodeRuneInString(s[i:])
if r == utf8.RuneError {
ls.Push(lua.LFalse)
ls.Push(lua.LNumber(i + 1))
return 2
}
i += size
}
} else {
for {
if i <= 0 {
break
}
n += 1
r, size := utf8.DecodeLastRuneInString(s[:i])
if r == utf8.RuneError {
ls.Push(lua.LFalse)
ls.Push(lua.LNumber(i + 1))
return 2
}
i -= size
if n == 0 {
ls.Push(lua.LNumber(i + 1))
return 1
}
}
}
ls.Push(lua.LNil)
return 1
}
Loading

0 comments on commit de3aa55

Please sign in to comment.