Skip to content

Commit

Permalink
Implement multi-language DLL register + bug fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
mufeedvh committed Dec 13, 2023
1 parent 3174277 commit f468d1f
Show file tree
Hide file tree
Showing 97 changed files with 5,954 additions and 79 deletions.
34 changes: 18 additions & 16 deletions govarnam-rust/example/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,22 +19,24 @@ static VARNAM: Lazy<Varnam> = Lazy::new(|| {
});

fn main() {
let mut matches: Vec<(String, String)> = Vec::with_capacity(20);

let results = VARNAM.transliterate("namaskkaaram");
// for _ in 0..50 {
let mut matches: Vec<(String, String)> = Vec::with_capacity(20);

let results = VARNAM.transliterate("namaskkaaram");

// for item in results {
// println!(
// "Word: {}, Weight: {}, Learned on: {}",
// item.to_string(),
// item.weight,
// item.learned_on,
// );
// }

for result in results {
matches.push(("input".into(), result.to_string()))
}

// for item in results {
// println!(
// "Word: {}, Weight: {}, Learned on: {}",
// item.to_string(),
// item.weight,
// item.learned_on,
// );
eprintln!("{:?}", matches);
// }

for result in results {
matches.push(("input".into(), result.to_string()))
}

eprintln!("{:?}", matches);
}
16 changes: 12 additions & 4 deletions govarnam-rust/src/rvarnam.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,26 +47,34 @@ impl Varnam {
let vst_file = vst_file.as_ref().to_string_lossy().to_string();
let learning_file = learning_file.as_ref().to_string_lossy().to_string();
unsafe {
let _init_id = varnam_init(
let init_id = varnam_init(
vst_file.as_ptr() as *const i8,
learning_file.as_ptr() as *const i8,
&id,
);

while init_id != std::ptr::null() {
return Self::init(vst_file, learning_file);
}
};
// TODO: check error use init_id

Ok(Varnam { handle_id: id })
}

pub fn transliterate<T: AsRef<str>>(&self, word: T) -> Vec<Suggestion_t> {
let id: c_int = 1;
let word = CString::new(word.as_ref()).unwrap();
let c_word = CString::new(word.as_ref()).unwrap();
let mut varray_ptr = varray_t::init();
unsafe { varnam_transliterate(self.handle_id, id, word.as_ptr(), &mut varray_ptr) };
let trans_id = unsafe { varnam_transliterate(self.handle_id, id, c_word.as_ptr(), &mut varray_ptr) };
while trans_id != std::ptr::null() {
return self.transliterate(word);
}
let varray_pointer = unsafe { *varray_ptr as varray_t };
varray_pointer.into()
}
}


impl Drop for Varnam {
fn drop(&mut self) {
unsafe { varnam_close(self.handle_id) }
Expand Down
1 change: 1 addition & 0 deletions govarnam/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,4 @@ install.sh
govarnam.pc

*.vst
a
20 changes: 2 additions & 18 deletions govarnam/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -22,15 +22,11 @@ CURDIR := $(shell pwd)
ifeq ($(UNAME), Darwin)
SED := sed -i ""
LIB_NAME = libgovarnam.dylib
else ifeq ($(UNAME), Windows_NT)
INSTALL_DIR = "C:\\lib"
LIB_NAME = libgovarnam.dll
else
EXT_LDFLAGS = -extldflags "-Wl,-soname,$(LIB_NAME).$(SO_NAME),--version-script,$(CURDIR)/govarnam.syms"
endif

VERSION_STAMP_LDFLAGS := -X 'github.com/varnamproject/govarnam/govarnam.BuildString=${BUILDSTR}' -X 'github.com/varnamproject/govarnam/govarnam.VersionString=${VERSION}' $(EXT_LDFLAGS)

pc:
cp govarnam.pc.in govarnam.pc
${SED} "s#@INSTALL_PREFIX@#${INSTALL_PREFIX}#g" govarnam.pc
Expand Down Expand Up @@ -71,20 +67,8 @@ library-nosqlite:
CGO_ENABLED=1 go build -tags "fts5,libsqlite3" -buildmode=c-shared -ldflags "-s -w ${VERSION_STAMP_LDFLAGS}" -o ${LIB_NAME} .

library:
ifeq ($(UNAME), Windows_NT)
CGO_CFLAGS=-Dvarnam_EXPORTS CGO_ENABLED=1 go build -tags "fts5" -buildmode=c-shared -ldflags "-s -w ${VERSION_STAMP_LDFLAGS}" -o ${LIB_NAME} .
if not exist "${INSTALL_DIR}" (
mkdir "${INSTALL_DIR}"
)
copy ${LIB_NAME} "${INSTALL_DIR}"
echo %PATH% | findstr /C:"${INSTALL_DIR}" > nul
if errorlevel 1 (
setx PATH "%PATH%;${INSTALL_DIR}"
)
else
CGO_ENABLED=1 go build -tags "fts5" -buildmode=c-shared -ldflags "-s -w ${VERSION_STAMP_LDFLAGS}" -o ${LIB_NAME} .
ln -sf "$(realpath ./)/libgovarnam.so" "$(realpath ./)/libgovarnam.so.${SO_NAME}"
endif
CGO_ENABLED=1 go build -tags "fts5" -buildmode=c-shared -ldflags "-s -w ${VERSION_STAMP_LDFLAGS}" -o ${LIB_NAME} .
ln -sf "$(realpath ./)/libgovarnam.so" "$(realpath ./)/libgovarnam.so.${SO_NAME}"

library-mac-universal:
GOOS=darwin GOARCH=arm64 $(MAKE) library
Expand Down
8 changes: 1 addition & 7 deletions govarnam/c-shared.go
Original file line number Diff line number Diff line change
Expand Up @@ -109,8 +109,7 @@ func makeCTransliterationResult(ctx context.Context, goResult govarnam.Translite

//export varnam_get_version
func varnam_get_version() *C.char {
version := govarnam.VersionString
return C.CString(version)
return C.CString(govarnam.VersionString)
}

//export varnam_get_build
Expand Down Expand Up @@ -273,35 +272,30 @@ func varnam_debug(varnamHandleID C.int, val C.int) {
}

// Deprecated. Use varnam_config()
//
//export varnam_set_indic_digits
func varnam_set_indic_digits(varnamHandleID C.int, val C.int) {
varnam_config(varnamHandleID, C.VARNAM_CONFIG_USE_INDIC_DIGITS, val)
}

// Deprecated. Use varnam_config()
//
//export varnam_set_dictionary_suggestions_limit
func varnam_set_dictionary_suggestions_limit(varnamHandleID C.int, val C.int) {
getVarnamHandle(varnamHandleID).varnam.DictionarySuggestionsLimit = int(val)
}

// Deprecated. Use varnam_config()
//
//export varnam_set_pattern_dictionary_suggestions_limit
func varnam_set_pattern_dictionary_suggestions_limit(varnamHandleID C.int, val C.int) {
getVarnamHandle(varnamHandleID).varnam.PatternDictionarySuggestionsLimit = int(val)
}

// Deprecated. Use varnam_config()
//
//export varnam_set_tokenizer_suggestions_limit
func varnam_set_tokenizer_suggestions_limit(varnamHandleID C.int, val C.int) {
getVarnamHandle(varnamHandleID).varnam.TokenizerSuggestionsLimit = int(val)
}

// Deprecated. Use varnam_config()
//
//export varnam_set_dictionary_match_exact
func varnam_set_dictionary_match_exact(varnamHandleID C.int, val C.int) {
if val == 0 {
Expand Down
2 changes: 1 addition & 1 deletion govarnam/govarnam/channel.go
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ func (varnam *Varnam) channelGetFromDictionary(ctx context.Context, word string,

if len(dictResult.partialMatches) > 0 {
// Tokenize the word after the longest match found in dictionary
restOfWord := word[dictResult.longestMatchPosition+1:]
restOfWord := string([]rune(word)[dictResult.longestMatchPosition+1:])

start := time.Now()

Expand Down
2 changes: 1 addition & 1 deletion govarnam/govarnam/constants.go
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ func getVSTLookupDirs() []string {
}
}

//FindVSTDir Get the VST storing directory
// FindVSTDir Get the VST storing directory
func FindVSTDir() (string, error) {
for _, loc := range getVSTLookupDirs() {
if dirExists(loc) {
Expand Down
2 changes: 1 addition & 1 deletion govarnam/govarnam/dictionary.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ type searchDictionaryResult struct {
func (varnam *Varnam) InitDict(dictPath string) error {
var err error

if dictPath == "" || !fileExists(dictPath) {
if !fileExists(dictPath) {
log.Printf("Making Varnam Learnings Dir for %s\n", dictPath)
err := os.MkdirAll(path.Dir(dictPath), 0750)
if err != nil {
Expand Down
4 changes: 3 additions & 1 deletion govarnam/govarnam/govarnam.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import (
"sort"
"strings"
"time"
"unicode"
"unicode/utf8"

// sqlite3
Expand All @@ -25,6 +26,7 @@ type LangRules struct {
Virama string
IndicDigits bool
PatternLongestLength int // Longest length of pattern in VST
UnicodeBlock unicode.RangeTable
}

// SchemeDetails of VST
Expand Down Expand Up @@ -247,8 +249,8 @@ func (varnam *Varnam) setDefaultConfig() {
varnam.DictionaryMatchExact = false

varnam.LangRules.IndicDigits = false

varnam.LangRules.Virama, _ = varnam.getVirama()
varnam.LangRules.UnicodeBlock = varnam.getUnicodeBlock()

if varnam.SchemeDetails.LangCode == "ml" {
varnam.RegisterPatternWordPartializer(varnam.mlPatternWordPartializer)
Expand Down
14 changes: 14 additions & 0 deletions govarnam/govarnam/govarnam_ml_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -478,6 +478,7 @@ func TestMLRecentlyLearnedWords(t *testing.T) {
}

result, err = varnam.GetRecentlyLearntWords(context.Background(), 4, len(words))
checkError(err)
assertEqual(t, result[0].Word, "ആലപ്പുഴ")
}

Expand All @@ -495,3 +496,16 @@ func TestMLGetSuggestions(t *testing.T) {

assertEqual(t, result[0].Word, "ആലപ്പുഴ")
}

func TestMLNativePartialWordsInInput(t *testing.T) {
varnam := getVarnamInstance("ml")

words := []string{"ആലപ്പുഴ", "പുസ്തകം"}
for _, word := range words {
varnam.Learn(word, 0)
}

assertEqual(t, varnam.TransliterateAdvanced("ആലppu").DictionarySuggestions[0].Word, "ആലപ്പുഴ")
assertEqual(t, varnam.TransliterateAdvanced("puസ്ത").DictionarySuggestions[0].Word, "പുസ്തകം")
assertEqual(t, varnam.TransliterateAdvanced("ആലippazham").DictionarySuggestions[0].Word, "ആലിപ്പഴം")
}
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
package govarnam

import "unicode"

/**
* govarnam - An Indian language transliteration library
* Copyright Subin Siby <mail at subinsb (.) com>, 2021
Expand All @@ -23,3 +25,15 @@ func (varnam *Varnam) mlPatternWordPartializer(sug *Suggestion) {
sug.Word = sug.Word[0:len(sug.Word)-size] + "മ"
}
}

func (varnam *Varnam) getUnicodeBlock() unicode.RangeTable {
switch varnam.SchemeDetails.LangCode {
case "kn":
return unicode.RangeTable{R16: []unicode.Range16{{0x0C80, 0x0CFF, 1}}}
case "ml":
return unicode.RangeTable{R16: []unicode.Range16{{0x0D00, 0x0D7F, 1}}}
default:
return unicode.RangeTable{}
}
// TODO add for all languages
}
20 changes: 15 additions & 5 deletions govarnam/govarnam/symbol.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import (
"fmt"
"log"
"strings"
"unicode"

"github.com/mattn/go-sqlite3"
)
Expand Down Expand Up @@ -40,16 +41,18 @@ type Token struct {
character string // Non language character
}

var sqlite3WithLimitDriverRegistered bool
var sqlite3Conn *sqlite3.SQLiteConn

func openDB(path string) (*sql.DB, error) {
if sqlite3Conn == nil {
if !sqlite3WithLimitDriverRegistered {
sql.Register("sqlite3_with_limit", &sqlite3.SQLiteDriver{
ConnectHook: func(conn *sqlite3.SQLiteConn) error {
sqlite3Conn = conn
return nil
},
})
sqlite3WithLimitDriverRegistered = true
}

conn, err := sql.Open("sqlite3_with_limit", path)
Expand Down Expand Up @@ -274,10 +277,17 @@ func (varnam *Varnam) tokenizeWord(ctx context.Context, word string, matchType i
matches := varnam.findLongestPatternMatchSymbols(ctx, sequence, matchType, acceptCondition)

if len(matches) == 0 {
// No matches, add a character token
// Note that we just add 1 character, and move on
token := Token{VARNAM_TOKEN_CHAR, matches, i, string(sequence[:1])}
results = append(results, token)
if unicode.In(sequence[0], &varnam.LangRules.UnicodeBlock) {
// This helps to get suggestions in inputs like "ആലppu"
character := string(sequence[0])
token := Token{VARNAM_TOKEN_SYMBOL, []Symbol{{Value1: character}}, i, character}
results = append(results, token)
} else {
// No matches, add a character token
// Note that we just add 1 character, and move on
token := Token{VARNAM_TOKEN_CHAR, matches, i, string(sequence[:1])}
results = append(results, token)
}

i++
} else {
Expand Down
2 changes: 1 addition & 1 deletion govarnam/govarnamgo/govarnamgo_ml_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ func TestRecentlyLearnedWords(t *testing.T) {
func TestSearchSymbolTable(t *testing.T) {
varnam := getVarnamInstance("ml")

symbol := varnam.NewSearchSymbol()
symbol := NewSearchSymbol()
symbol.Pattern = "la"
result := varnam.SearchSymbolTable(context.Background(), symbol)

Expand Down
4 changes: 2 additions & 2 deletions govarnam/govarnamgo/govarnamgo_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ func assertEqual(t *testing.T, a interface{}, b interface{}) {
}

func setUp(schemeID string) {
varnam, err := VarnamInitFromID(schemeID)
varnam, err := InitFromID(schemeID)
checkError(err)

mutex.Lock()
Expand Down Expand Up @@ -67,7 +67,7 @@ func tearDown() {

func TestMain(m *testing.M) {
var err error
testTempDir, err = os.TempDir("", "govarnam_test")
testTempDir, err = os.MkdirTemp("", "govarnamgo_test")
checkError(err)

setUp("ml")
Expand Down
Binary file added govarnam/schemes/.DS_Store
Binary file not shown.
2 changes: 2 additions & 0 deletions govarnam/schemes/.gitattributes
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
*.vlf filter=lfs diff=lfs merge=lfs -text
*.txt filter=lfs diff=lfs merge=lfs -text
43 changes: 43 additions & 0 deletions govarnam/schemes/.github/workflows/build.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
name: Build

on:
push:
branches: [ master ]
pull_request:
branches: [ master ]

jobs:
publish:
name: Build
runs-on: ubuntu-latest

steps:
- name: Checkout
uses: actions/checkout@v2
with:
lfs: true
submodules: recursive

- name: Set up Go
uses: actions/setup-go@v2
with:
go-version: 1.16

- name: Make GoVarnam
run: |
git clone https://github.com/varnamproject/govarnam.git govarnam
cd govarnam
make
sudo make install
- name: Dependencies
run: |
sudo apt install ruby-ffi
- name: Make Schemes
run: |
./build_all_schemes.sh
- name: Run Tests
run: |
ruby test/run.rb
Loading

0 comments on commit f468d1f

Please sign in to comment.