run ./gradlew spotlessApply

WorksApplications · Jun 12, 2024 · 558da29 · 558da29
1 parent a5665e1
commit 558da29
Show file tree

Hide file tree

Showing 2 changed files with 12 additions and 14 deletions.
diff --git a/src/main/java/com/worksap/nlp/lucene/sudachi/ja/SudachiTokenizer.kt b/src/main/java/com/worksap/nlp/lucene/sudachi/ja/SudachiTokenizer.kt
@@ -19,14 +19,14 @@ package com.worksap.nlp.lucene.sudachi.ja
 import com.worksap.nlp.lucene.sudachi.ja.attributes.MorphemeAttribute
 import com.worksap.nlp.lucene.sudachi.ja.attributes.SudachiAttribute
 import com.worksap.nlp.lucene.sudachi.ja.attributes.SudachiAttributeFactory
+import java.io.StringReader
+import java.nio.CharBuffer
 import org.apache.lucene.analysis.Tokenizer
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute
 import org.apache.lucene.analysis.tokenattributes.OffsetAttribute
 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute
 import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute
 import org.apache.lucene.util.AttributeFactory
-import java.io.StringReader
-import java.nio.CharBuffer
 
 class SudachiTokenizer(
     private val tokenizer: CachingTokenizer,
@@ -57,7 +57,7 @@ class SudachiTokenizer(
     if (m == null) {
       // Create 1MB chunk
       // TODO: Should split with meaningful delimitations.
-      val buffer = CharBuffer.allocate(1*1024*1024)
+      val buffer = CharBuffer.allocate(1 * 1024 * 1024)
       val nread = input.read(buffer)
       if (nread < 0) {
         return false

diff --git a/src/test/java/com/worksap/nlp/lucene/sudachi/ja/TestSudachiTokenizer.kt b/src/test/java/com/worksap/nlp/lucene/sudachi/ja/TestSudachiTokenizer.kt
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2023 Works Applications Co., Ltd.
+ * Copyright (c) 2017-2024 Works Applications Co., Ltd.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -25,6 +25,7 @@ import com.worksap.nlp.sudachi.Config
 import com.worksap.nlp.sudachi.PathAnchor
 import com.worksap.nlp.sudachi.Tokenizer.SplitMode
 import com.worksap.nlp.test.TestDictionary
+import java.io.StringReader
 import org.apache.lucene.analysis.charfilter.MappingCharFilter
 import org.apache.lucene.analysis.charfilter.NormalizeCharMap
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute
@@ -33,8 +34,6 @@ import org.junit.Assert
 import org.junit.Before
 import org.junit.Rule
 import org.junit.Test
-import java.io.StringReader
-
 
 // Test of character segmentation using incrementToken(tokenizer)
 open class TestSudachiTokenizer : BaseTokenStreamTestCase() {
@@ -289,23 +288,22 @@ open class TestSudachiTokenizer : BaseTokenStreamTestCase() {
     assertNotEquals(tokenizerA.hashCode().toLong(), tokenizerB.hashCode().toLong())
   }
 
-
   @Test
   fun hugeCharactersByDefaultMode() {
     val tokenizer = makeTokenizer(SplitMode.C)
-    //tokenizer.setReader(StringReader("東京都に行った。"))
 
-    val charLength = 10*1024*1024
+    val charLength = 10 * 1024 * 1024
     tokenizer.setReader(StringReader("あ".repeat(charLength)))
 
-    val charTermAttribute = tokenizer.addAttribute(
-        CharTermAttribute::class.java,
-    )
+    val charTermAttribute =
+        tokenizer.addAttribute(
+            CharTermAttribute::class.java,
+        )
     tokenizer.reset()
 
     var totalLength = 0
-    while(tokenizer.incrementToken()) {
-      //println(charTermAttribute.toString())
+    while (tokenizer.incrementToken()) {
+      // println(charTermAttribute.toString())
       totalLength += charTermAttribute.length
     }