diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 8489b3c4c1a..c1620c6178f 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -80,6 +80,10 @@ API Changes * GITHUB#12875: Ensure token position is always increased in PathHierarchyTokenizer and ReversePathHierarchyTokenizer and resulting tokens do not overlap. (Michael Froh, Lukáš Vlček) +* GITHUB#12624, GITHUB#12831: Allow FSTCompiler to stream to any DataOutput while building, and + make compile() only return the FSTMetadata. For on-heap (default) use case, please use + FST.fromFSTReader(fstMetadata, fstCompiler.getFSTReader()) to create the FST. (Anh Dung Bui) + New Features --------------------- diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/NormalizeCharMap.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/NormalizeCharMap.java index 01baea12b01..1ffa071835e 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/NormalizeCharMap.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/NormalizeCharMap.java @@ -111,7 +111,7 @@ public NormalizeCharMap build() { for (Map.Entry ent : pendingPairs.entrySet()) { fstCompiler.add(Util.toUTF16(ent.getKey(), scratch), new CharsRef(ent.getValue())); } - map = fstCompiler.compile(); + map = FST.fromFSTReader(fstCompiler.compile(), fstCompiler.getFSTReader()); pendingPairs.clear(); } catch (IOException ioe) { // Bogus FST IOExceptions!! (will never happen) diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/ConvTable.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/ConvTable.java index f22bee1db0d..acbf1976d58 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/ConvTable.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/ConvTable.java @@ -51,7 +51,7 @@ class ConvTable { fstCompiler.add(scratchInts.get(), new CharsRef(entry.getValue())); } - fst = fstCompiler.compile(); + fst = FST.fromFSTReader(fstCompiler.compile(), fstCompiler.getFSTReader()); } catch (IOException bogus) { throw new RuntimeException(bogus); } diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Dictionary.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Dictionary.java index 22e9c5245f6..300cf5305d6 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Dictionary.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Dictionary.java @@ -657,7 +657,7 @@ private FST affixFST(TreeMap> affixes) throws IOE } fstCompiler.add(scratch.get(), output); } - return fstCompiler.compile(); + return FST.fromFSTReader(fstCompiler.compile(), fstCompiler.getFSTReader()); } /** diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/StemmerOverrideFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/StemmerOverrideFilter.java index b46f8f8ff02..83724af63d5 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/StemmerOverrideFilter.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/StemmerOverrideFilter.java @@ -222,7 +222,8 @@ public StemmerOverrideMap build() throws IOException { intsSpare.copyUTF8Bytes(bytesRef); fstCompiler.add(intsSpare.get(), new BytesRef(outputValues.get(id))); } - return new StemmerOverrideMap(fstCompiler.compile(), ignoreCase); + return new StemmerOverrideMap( + FST.fromFSTReader(fstCompiler.compile(), fstCompiler.getFSTReader()), ignoreCase); } } } diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymMap.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymMap.java index 22ba92ff555..7d08424c27a 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymMap.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymMap.java @@ -291,7 +291,7 @@ public SynonymMap build() throws IOException { fstCompiler.add(Util.toUTF32(input, scratchIntsRef), scratch.toBytesRef()); } - FST fst = fstCompiler.compile(); + FST fst = FST.fromFSTReader(fstCompiler.compile(), fstCompiler.getFSTReader()); return new SynonymMap(fst, words, maxHorizontalContext); } } diff --git a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/TokenInfoDictionaryBuilder.java b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/TokenInfoDictionaryBuilder.java index 9547b6f4cfb..28579dc7db6 100644 --- a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/TokenInfoDictionaryBuilder.java +++ b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/TokenInfoDictionaryBuilder.java @@ -126,7 +126,7 @@ private TokenInfoDictionaryWriter buildDictionary(List csvFiles) throws IO dictionary.addMapping((int) ord, offset); offset = next; } - dictionary.setFST(fstCompiler.compile()); + dictionary.setFST(FST.fromFSTReader(fstCompiler.compile(), fstCompiler.getFSTReader())); return dictionary; } diff --git a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/UserDictionary.java b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/UserDictionary.java index beb439cde86..a62ffe5d8ac 100644 --- a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/UserDictionary.java +++ b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/UserDictionary.java @@ -147,7 +147,9 @@ public int compare(String[] left, String[] right) { segmentations.add(wordIdAndLength); ord++; } - this.fst = new TokenInfoFST(fstCompiler.compile(), false); + this.fst = + new TokenInfoFST( + FST.fromFSTReader(fstCompiler.compile(), fstCompiler.getFSTReader()), false); this.morphAtts = new UserMorphData(data.toArray(new String[0])); this.segmentations = segmentations.toArray(new int[segmentations.size()][]); } diff --git a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/TokenInfoDictionaryBuilder.java b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/TokenInfoDictionaryBuilder.java index 79c56c24804..25f2d42dd82 100644 --- a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/TokenInfoDictionaryBuilder.java +++ b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/TokenInfoDictionaryBuilder.java @@ -122,7 +122,7 @@ private TokenInfoDictionaryWriter buildDictionary(List csvFiles) throws IO dictionary.addMapping((int) ord, offset); offset = next; } - dictionary.setFST(fstCompiler.compile()); + dictionary.setFST(FST.fromFSTReader(fstCompiler.compile(), fstCompiler.getFSTReader())); return dictionary; } } diff --git a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/UserDictionary.java b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/UserDictionary.java index 6219be1e813..e5cf043ba55 100644 --- a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/UserDictionary.java +++ b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/UserDictionary.java @@ -130,7 +130,8 @@ private UserDictionary(List entries) throws IOException { lastToken = token; ord++; } - this.fst = new TokenInfoFST(fstCompiler.compile()); + this.fst = + new TokenInfoFST(FST.fromFSTReader(fstCompiler.compile(), fstCompiler.getFSTReader())); int[][] segmentations = _segmentations.toArray(new int[_segmentations.size()][]); short[] rightIds = new short[_rightIds.size()]; for (int i = 0; i < _rightIds.size(); i++) { diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene40/blocktree/Lucene40BlockTreeTermsWriter.java b/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene40/blocktree/Lucene40BlockTreeTermsWriter.java index 4f889d0914c..2ededb9391f 100644 --- a/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene40/blocktree/Lucene40BlockTreeTermsWriter.java +++ b/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene40/blocktree/Lucene40BlockTreeTermsWriter.java @@ -498,7 +498,7 @@ public void compileIndex( } } - index = fstCompiler.compile(); + index = FST.fromFSTReader(fstCompiler.compile(), fstCompiler.getFSTReader()); assert subIndices == null; diff --git a/lucene/classification/src/java/org/apache/lucene/classification/BooleanPerceptronClassifier.java b/lucene/classification/src/java/org/apache/lucene/classification/BooleanPerceptronClassifier.java index 280e82cc2c0..e4c4ccb1a95 100644 --- a/lucene/classification/src/java/org/apache/lucene/classification/BooleanPerceptronClassifier.java +++ b/lucene/classification/src/java/org/apache/lucene/classification/BooleanPerceptronClassifier.java @@ -216,7 +216,7 @@ private void updateFST(SortedMap weights) throws IOException { fstCompiler.add( Util.toIntsRef(scratchBytes.get(), scratchInts), entry.getValue().longValue()); } - fst = fstCompiler.compile(); + fst = FST.fromFSTReader(fstCompiler.compile(), fstCompiler.getFSTReader()); } @Override diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/VariableGapTermsIndexWriter.java b/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/VariableGapTermsIndexWriter.java index 04e3e80c71b..70f1f9918dd 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/VariableGapTermsIndexWriter.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/VariableGapTermsIndexWriter.java @@ -283,7 +283,7 @@ public void add(BytesRef text, TermStats stats, long termsFilePointer) throws IO @Override public void finish(long termsFilePointer) throws IOException { - fst = fstCompiler.compile(); + fst = FST.fromFSTReader(fstCompiler.compile(), fstCompiler.getFSTReader()); if (fst != null) { fst.save(out, out); } diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsBlockTreeTermsWriter.java b/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsBlockTreeTermsWriter.java index 986c6e47d4c..a7ef5ef9932 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsBlockTreeTermsWriter.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsBlockTreeTermsWriter.java @@ -425,7 +425,7 @@ public void compileIndex( assert sumTotalTermCount == totFloorTermCount; - index = fstCompiler.compile(); + index = FST.fromFSTReader(fstCompiler.compile(), fstCompiler.getFSTReader()); assert subIndices == null; /* diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsWriter.java b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsWriter.java index fa46f6451da..1e8c442ccc6 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsWriter.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsWriter.java @@ -277,7 +277,8 @@ public void finishTerm(BytesRef text, BlockTermState state) throws IOException { public void finish(long sumTotalTermFreq, long sumDocFreq, int docCount) throws IOException { // save FST dict if (numTerms > 0) { - final FST fst = fstCompiler.compile(); + final FST fst = + FST.fromFSTReader(fstCompiler.compile(), fstCompiler.getFSTReader()); fields.add( new FieldMetaData(fieldInfo, numTerms, sumTotalTermFreq, sumDocFreq, docCount, fst)); } diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java index e8fbadfe7c5..63ce777d7fd 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java @@ -738,7 +738,7 @@ private void loadTerms() throws IOException { } } docCount = visitedDocs.cardinality(); - fst = fstCompiler.compile(); + fst = FST.fromFSTReader(fstCompiler.compile(), fstCompiler.getFSTReader()); /* PrintStream ps = new PrintStream("out.dot"); fst.toDot(ps); diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/FSTDictionary.java b/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/FSTDictionary.java index 0a6c9010143..a73fef410dd 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/FSTDictionary.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/FSTDictionary.java @@ -185,7 +185,8 @@ public void add(BytesRef blockKey, long blockFilePointer) throws IOException { @Override public FSTDictionary build() throws IOException { - return new FSTDictionary(fstCompiler.compile()); + return new FSTDictionary( + FST.fromFSTReader(fstCompiler.compile(), fstCompiler.getFSTReader())); } } } diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/blocktree/Lucene90BlockTreeTermsWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/blocktree/Lucene90BlockTreeTermsWriter.java index 26aea1dda59..1cf045f9c14 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/blocktree/Lucene90BlockTreeTermsWriter.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/blocktree/Lucene90BlockTreeTermsWriter.java @@ -587,7 +587,7 @@ public void compileIndex( } } - index = fstCompiler.compile(); + index = FST.fromFSTReader(fstCompiler.compile(), fstCompiler.getFSTReader()); assert subIndices == null; diff --git a/lucene/core/src/java/org/apache/lucene/util/fst/FST.java b/lucene/core/src/java/org/apache/lucene/util/fst/FST.java index 260c159f207..6bb5718d5c7 100644 --- a/lucene/core/src/java/org/apache/lucene/util/fst/FST.java +++ b/lucene/core/src/java/org/apache/lucene/util/fst/FST.java @@ -437,6 +437,21 @@ public FST(FSTMetadata metadata, DataInput in, FSTStore fstStore) throws IOEx this.fstReader = fstReader; } + /** + * Create a FST from a {@link FSTReader}. Return null if the metadata is null. + * + * @param fstMetadata the metadata + * @param fstReader the FSTReader + * @return the FST + */ + public static FST fromFSTReader(FSTMetadata fstMetadata, FSTReader fstReader) { + // FSTMetadata could be null if there is no node accepted by the FST + if (fstMetadata == null) { + return null; + } + return new FST<>(fstMetadata, Objects.requireNonNull(fstReader, "FSTReader cannot be null")); + } + /** * Read the FST metadata from DataInput * @@ -516,9 +531,7 @@ public FSTMetadata getMetadata() { } /** - * Save the FST to DataOutput. If you use an {@link org.apache.lucene.store.IndexOutput} to build - * the FST, then you should not and do not need to call this method, as the FST is already saved. - * Doing so will throw an {@link UnsupportedOperationException}. + * Save the FST to DataOutput. * * @param metaOut the DataOutput to write the metadata to * @param out the DataOutput to write the FST bytes to diff --git a/lucene/core/src/java/org/apache/lucene/util/fst/FSTCompiler.java b/lucene/core/src/java/org/apache/lucene/util/fst/FSTCompiler.java index c3238bdaf4b..e837b7775e0 100644 --- a/lucene/core/src/java/org/apache/lucene/util/fst/FSTCompiler.java +++ b/lucene/core/src/java/org/apache/lucene/util/fst/FSTCompiler.java @@ -99,6 +99,7 @@ public class FSTCompiler { private static final FSTReader NULL_FST_READER = new NullFSTReader(); private final NodeHash dedupHash; + // a temporary FST used during building for NodeHash cache final FST fst; private final T NO_OUTPUT; @@ -173,9 +174,7 @@ private FSTCompiler( paddingBytePending = true; this.dataOutput = dataOutput; fst = - new FST<>( - new FST.FSTMetadata<>(inputType, outputs, null, -1, version, 0), - toFSTReader(dataOutput)); + new FST<>(new FST.FSTMetadata<>(inputType, outputs, null, -1, version, 0), NULL_FST_READER); if (suffixRAMLimitMB < 0) { throw new IllegalArgumentException("ramLimitMB must be >= 0; got: " + suffixRAMLimitMB); } else if (suffixRAMLimitMB > 0) { @@ -193,16 +192,6 @@ private FSTCompiler( } } - // Get the respective FSTReader of the DataOutput. If the DataOutput is also a FSTReader then we - // will use it, otherwise we will return a NullFSTReader. Attempting to read from a FST with - // NullFSTReader will throw UnsupportedOperationException - private FSTReader toFSTReader(DataOutput dataOutput) { - if (dataOutput instanceof FSTReader) { - return (FSTReader) dataOutput; - } - return NULL_FST_READER; - } - /** * This class is used for FST backed by non-FSTReader DataOutput. It does not allow getting the * reverse BytesReader nor writing to a DataOutput. @@ -227,6 +216,22 @@ public void writeTo(DataOutput out) { } } + /** + * Get the respective {@link FSTReader} of the {@link DataOutput}. To call this method, you need + * to use the default DataOutput or {@link #getOnHeapReaderWriter(int)}, otherwise we will throw + * an exception. + * + * @return the DataOutput as FSTReader + * @throws IllegalStateException if the DataOutput does not implement FSTReader + */ + public FSTReader getFSTReader() { + if (dataOutput instanceof FSTReader) { + return (FSTReader) dataOutput; + } + throw new IllegalStateException( + "The DataOutput must implement FSTReader, but got " + dataOutput); + } + /** * Fluent-style constructor for FST {@link FSTCompiler}. * @@ -967,10 +972,31 @@ private boolean validOutput(T output) { return output == NO_OUTPUT || !output.equals(NO_OUTPUT); } - /** Returns final FST. NOTE: this will return null if nothing is accepted by the FST. */ - // TODO: make this method to only return the FSTMetadata and user needs to construct the FST - // themselves - public FST compile() throws IOException { + /** + * Returns the metadata of the final FST. NOTE: this will return null if nothing is accepted by + * the FST themselves. + * + *

To create the FST, you need to: + * + *

- If a FSTReader DataOutput was used, such as the one returned by {@link + * #getOnHeapReaderWriter(int)} + * + *

+   *     fstMetadata = fstCompiler.compile();
+   *     fst = FST.fromFSTReader(fstMetadata, fstCompiler.getFSTReader());
+   * 
+ * + *

- If a non-FSTReader DataOutput was used, such as {@link + * org.apache.lucene.store.IndexOutput}, you need to first create the corresponding {@link + * org.apache.lucene.store.DataInput}, such as {@link org.apache.lucene.store.IndexInput} then + * pass it to the FST construct + * + *

+   *     fstMetadata = fstCompiler.compile();
+   *     fst = new FST<>(fstMetadata, dataInput, new OffHeapFSTStore());
+   * 
+ */ + public FST.FSTMetadata compile() throws IOException { final UnCompiledNode root = frontier[0]; @@ -990,7 +1016,7 @@ public FST compile() throws IOException { // root.output=" + root.output); finish(compileNode(root).node); - return fst; + return fst.metadata; } /** Expert: holds a pending (seen but not yet serialized) arc. */ diff --git a/lucene/core/src/java/org/apache/lucene/util/fst/package-info.java b/lucene/core/src/java/org/apache/lucene/util/fst/package-info.java index aa7bd91f8ec..1f80869f699 100644 --- a/lucene/core/src/java/org/apache/lucene/util/fst/package-info.java +++ b/lucene/core/src/java/org/apache/lucene/util/fst/package-info.java @@ -49,7 +49,7 @@ * scratchBytes.copyChars(inputValues[i]); * fstCompiler.add(Util.toIntsRef(scratchBytes.toBytesRef(), scratchInts), outputValues[i]); * } - * FST<Long> fst = fstCompiler.compile(); + * FST<Long> fst = FST.fromFSTReader(fstCompiler.compile(), fstCompiler.getFSTReader()); * * * Retrieval by key: diff --git a/lucene/core/src/test/org/apache/lucene/util/fst/Test2BFST.java b/lucene/core/src/test/org/apache/lucene/util/fst/Test2BFST.java index b2758ca526e..1cf72208a93 100644 --- a/lucene/core/src/test/org/apache/lucene/util/fst/Test2BFST.java +++ b/lucene/core/src/test/org/apache/lucene/util/fst/Test2BFST.java @@ -90,7 +90,7 @@ public void test() throws Exception { nextInput(r, ints2); } - FST fst = fstCompiler.compile(); + FST fst = FST.fromFSTReader(fstCompiler.compile(), fstCompiler.getFSTReader()); for (int verify = 0; verify < 2; verify++) { System.out.println( @@ -183,7 +183,7 @@ public void test() throws Exception { nextInput(r, ints); } - FST fst = fstCompiler.compile(); + FST fst = FST.fromFSTReader(fstCompiler.compile(), fstCompiler.getFSTReader()); for (int verify = 0; verify < 2; verify++) { System.out.println( @@ -273,7 +273,7 @@ public void test() throws Exception { nextInput(r, ints); } - FST fst = fstCompiler.compile(); + FST fst = FST.fromFSTReader(fstCompiler.compile(), fstCompiler.getFSTReader()); for (int verify = 0; verify < 2; verify++) { diff --git a/lucene/core/src/test/org/apache/lucene/util/fst/Test2BFSTOffHeap.java b/lucene/core/src/test/org/apache/lucene/util/fst/Test2BFSTOffHeap.java index 090c99716f6..1773c47f432 100644 --- a/lucene/core/src/test/org/apache/lucene/util/fst/Test2BFSTOffHeap.java +++ b/lucene/core/src/test/org/apache/lucene/util/fst/Test2BFSTOffHeap.java @@ -92,10 +92,10 @@ public void test() throws Exception { nextInput(r, ints2); } - FST fst = fstCompiler.compile(); + FST.FSTMetadata fstMetadata = fstCompiler.compile(); indexOutput.close(); try (IndexInput indexInput = dir.openInput("fst", IOContext.DEFAULT)) { - fst = new FST<>(fst.getMetadata(), indexInput, new OffHeapFSTStore()); + FST fst = new FST<>(fstMetadata, indexInput, new OffHeapFSTStore()); for (int verify = 0; verify < 2; verify++) { System.out.println( @@ -180,10 +180,10 @@ public void test() throws Exception { nextInput(r, ints); } - FST fst = fstCompiler.compile(); + FST.FSTMetadata fstMetadata = fstCompiler.compile(); indexOutput.close(); try (IndexInput indexInput = dir.openInput("fst", IOContext.DEFAULT)) { - fst = new FST<>(fst.getMetadata(), indexInput, new OffHeapFSTStore()); + FST fst = new FST<>(fstMetadata, indexInput, new OffHeapFSTStore()); for (int verify = 0; verify < 2; verify++) { System.out.println( @@ -265,10 +265,10 @@ public void test() throws Exception { nextInput(r, ints); } - FST fst = fstCompiler.compile(); + FST.FSTMetadata fstMetadata = fstCompiler.compile(); indexOutput.close(); try (IndexInput indexInput = dir.openInput("fst", IOContext.DEFAULT)) { - fst = new FST<>(fst.getMetadata(), indexInput, new OffHeapFSTStore()); + FST fst = new FST<>(fstMetadata, indexInput, new OffHeapFSTStore()); for (int verify = 0; verify < 2; verify++) { diff --git a/lucene/core/src/test/org/apache/lucene/util/fst/TestFSTDirectAddressing.java b/lucene/core/src/test/org/apache/lucene/util/fst/TestFSTDirectAddressing.java index 0f064d1ea44..81792f8ed75 100644 --- a/lucene/core/src/test/org/apache/lucene/util/fst/TestFSTDirectAddressing.java +++ b/lucene/core/src/test/org/apache/lucene/util/fst/TestFSTDirectAddressing.java @@ -196,7 +196,7 @@ private static FST buildFST(List entries, FSTCompiler } last = entry; } - return fstCompiler.compile(); + return FST.fromFSTReader(fstCompiler.compile(), fstCompiler.getFSTReader()); } public static void main(String... args) throws Exception { @@ -333,7 +333,7 @@ private static FST recompile(FST fst, float oversizingFactor while ((inputOutput = fstEnum.next()) != null) { fstCompiler.add(inputOutput.input, CharsRef.deepCopyOf(inputOutput.output)); } - return fstCompiler.compile(); + return FST.fromFSTReader(fstCompiler.compile(), fstCompiler.getFSTReader()); } private static int walk(FST read) throws IOException { diff --git a/lucene/core/src/test/org/apache/lucene/util/fst/TestFSTs.java b/lucene/core/src/test/org/apache/lucene/util/fst/TestFSTs.java index 3f56aec1994..9d09f075b72 100644 --- a/lucene/core/src/test/org/apache/lucene/util/fst/TestFSTs.java +++ b/lucene/core/src/test/org/apache/lucene/util/fst/TestFSTs.java @@ -407,7 +407,7 @@ public void testRealTerms() throws Exception { System.out.println(ord + " terms..."); } } - FST fst = fstCompiler.compile(); + FST fst = FST.fromFSTReader(fstCompiler.compile(), fstCompiler.getFSTReader()); if (VERBOSE) { System.out.println( "FST: " @@ -569,7 +569,7 @@ public void run(int limit, boolean verify) throws IOException { System.out.println( ((tMid - tStart) / (double) TimeUnit.SECONDS.toNanos(1)) + " sec to add all terms"); - FST fst = fstCompiler.compile(); + FST fst = FST.fromFSTReader(fstCompiler.compile(), fstCompiler.getFSTReader()); long tEnd = System.nanoTime(); System.out.println( ((tEnd - tMid) / (double) TimeUnit.SECONDS.toNanos(1)) + " sec to finish/pack"); @@ -774,7 +774,8 @@ public void testSingleString() throws Exception { new FSTCompiler.Builder<>(FST.INPUT_TYPE.BYTE1, outputs).build(); fstCompiler.add( Util.toIntsRef(newBytesRef("foobar"), new IntsRefBuilder()), outputs.getNoOutput()); - final BytesRefFSTEnum fstEnum = new BytesRefFSTEnum<>(fstCompiler.compile()); + final BytesRefFSTEnum fstEnum = + new BytesRefFSTEnum<>(FST.fromFSTReader(fstCompiler.compile(), fstCompiler.getFSTReader())); assertNull(fstEnum.seekFloor(newBytesRef("foo"))); assertNull(fstEnum.seekCeil(newBytesRef("foobaz"))); } @@ -788,7 +789,7 @@ public void testDuplicateFSAString() throws Exception { for (int i = 0; i < 10; i++) { fstCompiler.add(Util.toIntsRef(newBytesRef(str), ints), outputs.getNoOutput()); } - FST fst = fstCompiler.compile(); + FST fst = FST.fromFSTReader(fstCompiler.compile(), fstCompiler.getFSTReader()); // count the input paths int count = 0; @@ -863,7 +864,7 @@ public void testSimple() throws Exception { fstCompiler.add(Util.toIntsRef(b, new IntsRefBuilder()), 42L); fstCompiler.add(Util.toIntsRef(c, new IntsRefBuilder()), 13824324872317238L); - final FST fst = fstCompiler.compile(); + final FST fst = FST.fromFSTReader(fstCompiler.compile(), fstCompiler.getFSTReader()); assertEquals(13824324872317238L, (long) Util.get(fst, c)); assertEquals(42, (long) Util.get(fst, b)); @@ -1107,7 +1108,7 @@ FST compile(String[] lines) throws IOException { fstCompiler.add(Util.toIntsRef(term.get(), scratchIntsRef), nothing); } - return fstCompiler.compile(); + return FST.fromFSTReader(fstCompiler.compile(), fstCompiler.getFSTReader()); } void generate(ArrayList out, StringBuilder b, char from, char to, int depth) { @@ -1173,7 +1174,7 @@ public void testFinalOutputOnEndState() throws Exception { new FSTCompiler.Builder<>(FST.INPUT_TYPE.BYTE4, outputs).build(); fstCompiler.add(Util.toUTF32("slat", new IntsRefBuilder()), 10L); fstCompiler.add(Util.toUTF32("st", new IntsRefBuilder()), 17L); - final FST fst = fstCompiler.compile(); + final FST fst = FST.fromFSTReader(fstCompiler.compile(), fstCompiler.getFSTReader()); // Writer w = new OutputStreamWriter(new FileOutputStream("/x/tmp3/out.dot")); StringWriter w = new StringWriter(); Util.toDot(fst, w, false, false); @@ -1190,7 +1191,7 @@ public void testInternalFinalState() throws Exception { Util.toIntsRef(newBytesRef("stat"), new IntsRefBuilder()), outputs.getNoOutput()); fstCompiler.add( Util.toIntsRef(newBytesRef("station"), new IntsRefBuilder()), outputs.getNoOutput()); - final FST fst = fstCompiler.compile(); + final FST fst = FST.fromFSTReader(fstCompiler.compile(), fstCompiler.getFSTReader()); StringWriter w = new StringWriter(); // Writer w = new OutputStreamWriter(new FileOutputStream("/x/tmp/out.dot")); Util.toDot(fst, w, false, false); @@ -1216,7 +1217,7 @@ public void testSaveDifferentMetaOut() throws Exception { fstCompiler.add(Util.toIntsRef(newBytesRef("aac"), scratch), 7L); fstCompiler.add(Util.toIntsRef(newBytesRef("ax"), scratch), 17L); - FST fst = fstCompiler.compile(); + FST fst = FST.fromFSTReader(fstCompiler.compile(), fstCompiler.getFSTReader()); // save the FST to DataOutput, here it would not matter whether we are saving to different // DataOutput for meta or not @@ -1252,7 +1253,6 @@ public void testNonFinalStopNode() throws Exception { final Long nothing = outputs.getNoOutput(); final FSTCompiler fstCompiler = new FSTCompiler.Builder<>(FST.INPUT_TYPE.BYTE1, outputs).build(); - final FST fst = fstCompiler.fst; final FSTCompiler.UnCompiledNode rootNode = new FSTCompiler.UnCompiledNode<>(fstCompiler, 0); @@ -1285,6 +1285,8 @@ public void testNonFinalStopNode() throws Exception { fstCompiler.finish(fstCompiler.addNode(rootNode)); + final FST fst = new FST<>(fstCompiler.fst.metadata, fstCompiler.getFSTReader()); + StringWriter w = new StringWriter(); // Writer w = new OutputStreamWriter(new FileOutputStream("/x/tmp3/out.dot")); Util.toDot(fst, w, false, false); @@ -1333,7 +1335,7 @@ public void testShortestPaths() throws Exception { fstCompiler.add(Util.toIntsRef(newBytesRef("aab"), scratch), 22L); fstCompiler.add(Util.toIntsRef(newBytesRef("aac"), scratch), 7L); fstCompiler.add(Util.toIntsRef(newBytesRef("ax"), scratch), 17L); - final FST fst = fstCompiler.compile(); + final FST fst = FST.fromFSTReader(fstCompiler.compile(), fstCompiler.getFSTReader()); // Writer w = new OutputStreamWriter(new FileOutputStream("out.dot")); // Util.toDot(fst, w, false, false); // w.close(); @@ -1370,7 +1372,7 @@ public void testRejectNoLimits() throws IOException { fstCompiler.add(Util.toIntsRef(newBytesRef("adcde"), scratch), 17L); fstCompiler.add(Util.toIntsRef(newBytesRef("ax"), scratch), 17L); - final FST fst = fstCompiler.compile(); + final FST fst = FST.fromFSTReader(fstCompiler.compile(), fstCompiler.getFSTReader()); final AtomicInteger rejectCount = new AtomicInteger(); Util.TopNSearcher searcher = new Util.TopNSearcher<>(fst, 2, 6, minLongComparator) { @@ -1433,7 +1435,8 @@ public void testShortestPathsWFST() throws Exception { fstCompiler.add(Util.toIntsRef(newBytesRef("aab"), scratch), outputs.newPair(22L, 57L)); fstCompiler.add(Util.toIntsRef(newBytesRef("aac"), scratch), outputs.newPair(7L, 36L)); fstCompiler.add(Util.toIntsRef(newBytesRef("ax"), scratch), outputs.newPair(17L, 85L)); - final FST> fst = fstCompiler.compile(); + final FST> fst = + FST.fromFSTReader(fstCompiler.compile(), fstCompiler.getFSTReader()); // Writer w = new OutputStreamWriter(new FileOutputStream("out.dot")); // Util.toDot(fst, w, false, false); // w.close(); @@ -1492,7 +1495,7 @@ public void testShortestPathsRandom() throws Exception { fstCompiler.add(Util.toIntsRef(newBytesRef(e.getKey()), scratch), e.getValue()); } - final FST fst = fstCompiler.compile(); + final FST fst = FST.fromFSTReader(fstCompiler.compile(), fstCompiler.getFSTReader()); // System.out.println("SAVE out.dot"); // Writer w = new OutputStreamWriter(new FileOutputStream("out.dot")); // Util.toDot(fst, w, false, false); @@ -1619,7 +1622,8 @@ public void testShortestPathsWFSTRandom() throws Exception { Util.toIntsRef(newBytesRef(e.getKey()), scratch), outputs.newPair(weight, output)); } - final FST> fst = fstCompiler.compile(); + final FST> fst = + FST.fromFSTReader(fstCompiler.compile(), fstCompiler.getFSTReader()); // System.out.println("SAVE out.dot"); // Writer w = new OutputStreamWriter(new FileOutputStream("out.dot")); // Util.toDot(fst, w, false, false); @@ -1695,7 +1699,7 @@ public void testLargeOutputsOnArrayArcs() throws Exception { fstCompiler.add(input.get(), newBytesRef(BytesRef.deepCopyOf(output))); } - final FST fst = fstCompiler.compile(); + final FST fst = FST.fromFSTReader(fstCompiler.compile(), fstCompiler.getFSTReader()); for (int arc = 0; arc < 6; arc++) { input.setIntAt(0, arc); final BytesRef result = Util.get(fst, input.get()); @@ -1737,7 +1741,7 @@ public void testIllegallyModifyRootArc() throws Exception { fstCompiler.add(input.get(), term); } - FST fst = fstCompiler.compile(); + FST fst = FST.fromFSTReader(fstCompiler.compile(), fstCompiler.getFSTReader()); Arc arc = new FST.Arc<>(); fst.getFirstArc(arc); @@ -1772,7 +1776,7 @@ public void testSimpleDepth() throws Exception { fstCompiler.add(Util.toIntsRef(ac, new IntsRefBuilder()), 5L); fstCompiler.add(Util.toIntsRef(bd, new IntsRefBuilder()), 7L); - FST fst = fstCompiler.compile(); + FST fst = FST.fromFSTReader(fstCompiler.compile(), fstCompiler.getFSTReader()); assertEquals(3, (long) Util.get(fst, ab)); assertEquals(5, (long) Util.get(fst, ac)); diff --git a/lucene/core/src/test/org/apache/lucene/util/fst/TestUtil.java b/lucene/core/src/test/org/apache/lucene/util/fst/TestUtil.java index 1e6c34e7d8e..cbb37cc221c 100644 --- a/lucene/core/src/test/org/apache/lucene/util/fst/TestUtil.java +++ b/lucene/core/src/test/org/apache/lucene/util/fst/TestUtil.java @@ -116,6 +116,6 @@ private FST buildFST( fstCompiler.add( Util.toIntsRef(new BytesRef(word), new IntsRefBuilder()), outputs.getNoOutput()); } - return fstCompiler.compile(); + return FST.fromFSTReader(fstCompiler.compile(), fstCompiler.getFSTReader()); } } diff --git a/lucene/demo/src/java/org/apache/lucene/demo/knn/KnnVectorDict.java b/lucene/demo/src/java/org/apache/lucene/demo/knn/KnnVectorDict.java index bb65e8b63cf..30c813e5c5a 100644 --- a/lucene/demo/src/java/org/apache/lucene/demo/knn/KnnVectorDict.java +++ b/lucene/demo/src/java/org/apache/lucene/demo/knn/KnnVectorDict.java @@ -155,7 +155,7 @@ void build(Path gloveInput, Directory directory, String dictName) throws IOExcep while (addOneLine(in, binOut)) { // continue; } - fstCompiler.compile().save(fstOut, fstOut); + FST.fromFSTReader(fstCompiler.compile(), fstCompiler.getFSTReader()).save(fstOut, fstOut); binOut.writeInt(numFields - 1); } } diff --git a/lucene/misc/src/test/org/apache/lucene/misc/util/fst/TestFSTsMisc.java b/lucene/misc/src/test/org/apache/lucene/misc/util/fst/TestFSTsMisc.java index 2e991117ba1..44c342b29ab 100644 --- a/lucene/misc/src/test/org/apache/lucene/misc/util/fst/TestFSTsMisc.java +++ b/lucene/misc/src/test/org/apache/lucene/misc/util/fst/TestFSTsMisc.java @@ -174,7 +174,7 @@ public void testListOfOutputs() throws Exception { fstCompiler.add(Util.toIntsRef(new BytesRef("a"), scratch), 3L); fstCompiler.add(Util.toIntsRef(new BytesRef("a"), scratch), 0L); fstCompiler.add(Util.toIntsRef(new BytesRef("b"), scratch), 17L); - final FST fst = fstCompiler.compile(); + final FST fst = FST.fromFSTReader(fstCompiler.compile(), fstCompiler.getFSTReader()); Object output = Util.get(fst, new BytesRef("a")); assertNotNull(output); @@ -208,7 +208,7 @@ public void testListOfOutputsEmptyString() throws Exception { fstCompiler.add(Util.toIntsRef(new BytesRef("a"), scratch), 0L); fstCompiler.add(Util.toIntsRef(new BytesRef("b"), scratch), 0L); - final FST fst = fstCompiler.compile(); + final FST fst = FST.fromFSTReader(fstCompiler.compile(), fstCompiler.getFSTReader()); Object output = Util.get(fst, new BytesRef("")); assertNotNull(output); diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/idversion/VersionBlockTreeTermsWriter.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/idversion/VersionBlockTreeTermsWriter.java index 1443434d2ca..0e9c2332558 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/idversion/VersionBlockTreeTermsWriter.java +++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/idversion/VersionBlockTreeTermsWriter.java @@ -407,7 +407,7 @@ public void compileIndex( } } - index = fstCompiler.compile(); + index = FST.fromFSTReader(fstCompiler.compile(), fstCompiler.getFSTReader()); assert subIndices == null; diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java index b31330035bd..a995b6725e9 100644 --- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java +++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java @@ -586,7 +586,7 @@ public void build(InputIterator iterator) throws IOException { fstCompiler.add(scratchInts.get(), outputs.newPair(cost, br)); } } - fst = fstCompiler.compile(); + fst = FST.fromFSTReader(fstCompiler.compile(), fstCompiler.getFSTReader()); count = newCount; // Util.dotToFile(fst, "/tmp/suggest.dot"); diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/FreeTextSuggester.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/FreeTextSuggester.java index 3d45cd1f71b..87346e9482e 100644 --- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/FreeTextSuggester.java +++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/FreeTextSuggester.java @@ -323,7 +323,7 @@ public void build(InputIterator iterator, double ramBufferSizeMB) throws IOExcep fstCompiler.add(Util.toIntsRef(term, scratchInts), encodeWeight(termsEnum.totalTermFreq())); } - final FST newFst = fstCompiler.compile(); + final FST newFst = FST.fromFSTReader(fstCompiler.compile(), fstCompiler.getFSTReader()); if (newFst == null) { throw new IllegalArgumentException("need at least one suggestion"); } diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/NRTSuggesterBuilder.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/NRTSuggesterBuilder.java index f4daf2a36be..02d9f84d3d4 100644 --- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/NRTSuggesterBuilder.java +++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/NRTSuggesterBuilder.java @@ -104,7 +104,8 @@ public void finishTerm() throws IOException { * CompletionPostingsFormat.FSTLoadMode)})} */ public boolean store(DataOutput output) throws IOException { - final FST> fst = fstCompiler.compile(); + final FST> fst = + FST.fromFSTReader(fstCompiler.compile(), fstCompiler.getFSTReader()); if (fst == null) { return false; } diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletionBuilder.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletionBuilder.java index 29949f845a0..4f71b6e6c7b 100644 --- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletionBuilder.java +++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletionBuilder.java @@ -220,6 +220,6 @@ private FST buildAutomaton(BytesRefSorter sorter) throws IOException { } } - return count == 0 ? null : fstCompiler.compile(); + return count == 0 ? null : FST.fromFSTReader(fstCompiler.compile(), fstCompiler.getFSTReader()); } } diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/WFSTCompletionLookup.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/WFSTCompletionLookup.java index 808b4d7edd6..a3c0fdd137f 100644 --- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/WFSTCompletionLookup.java +++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/WFSTCompletionLookup.java @@ -126,7 +126,7 @@ public void build(InputIterator iterator) throws IOException { previous.copyBytes(scratch); newCount++; } - fst = fstCompiler.compile(); + fst = FST.fromFSTReader(fstCompiler.compile(), fstCompiler.getFSTReader()); count = newCount; } diff --git a/lucene/test-framework/src/java/org/apache/lucene/tests/util/fst/FSTTester.java b/lucene/test-framework/src/java/org/apache/lucene/tests/util/fst/FSTTester.java index 1564b2a7e88..521e8cbe645 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/tests/util/fst/FSTTester.java +++ b/lucene/test-framework/src/java/org/apache/lucene/tests/util/fst/FSTTester.java @@ -282,28 +282,33 @@ public FST doTest() throws IOException { fstCompiler.add(pair.input, pair.output); } } - FST fst = fstCompiler.compile(); + + FST fst = null; + FST.FSTMetadata fstMetadata = fstCompiler.compile(); if (useOffHeap) { indexOutput.close(); - if (fst == null) { + if (fstMetadata == null) { dir.deleteFile("fstOffHeap.bin"); } else { try (IndexInput in = dir.openInput("fstOffHeap.bin", IOContext.DEFAULT)) { - fst = new FST<>(fst.getMetadata(), in); + fst = new FST<>(fstMetadata, in); } finally { dir.deleteFile("fstOffHeap.bin"); } } - } else if (random.nextBoolean() && fst != null) { - IOContext context = LuceneTestCase.newIOContext(random); - try (IndexOutput out = dir.createOutput("fst.bin", context)) { - fst.save(out, out); - } - try (IndexInput in = dir.openInput("fst.bin", context)) { - fst = new FST<>(FST.readMetadata(in, outputs), in); - } finally { - dir.deleteFile("fst.bin"); + } else if (fstMetadata != null) { + fst = FST.fromFSTReader(fstMetadata, fstCompiler.getFSTReader()); + if (random.nextBoolean()) { + IOContext context = LuceneTestCase.newIOContext(random); + try (IndexOutput out = dir.createOutput("fst.bin", context)) { + fst.save(out, out); + } + try (IndexInput in = dir.openInput("fst.bin", context)) { + fst = new FST<>(FST.readMetadata(in, outputs), in); + } finally { + dir.deleteFile("fst.bin"); + } } }