apache · zacharymorn · Nov 28, 2023 · Aug 11, 2021 · Aug 12, 2021 · Aug 13, 2021
diff --git a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ReadTask.java b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ReadTask.java
@@ -29,8 +29,9 @@
 import org.apache.lucene.search.ScoreDoc;
 import org.apache.lucene.search.Sort;
 import org.apache.lucene.search.TopDocs;
-import org.apache.lucene.search.TopFieldCollector;
+import org.apache.lucene.search.TopFieldCollectorManager;
 import org.apache.lucene.search.TopScoreDocCollector;
+import org.apache.lucene.search.TopScoreDocCollectorManager;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.Bits;
 
@@ -108,17 +109,19 @@ public int doLogic() throws Exception {
             // the IndexSearcher search methods that take
             // Weight public again, we can go back to
             // pulling the Weight ourselves:
-            TopFieldCollector collector =
-                TopFieldCollector.create(sort, numHits, withTotalHits() ? Integer.MAX_VALUE : 1);
-            searcher.search(q, collector);
-            hits = collector.topDocs();
+            TopFieldCollectorManager collectorManager =
+                TopFieldCollectorManager.create(
+                    sort, numHits, withTotalHits() ? Integer.MAX_VALUE : 1);
+            hits = searcher.search(q, collectorManager);
           } else {
             hits = searcher.search(q, numHits);
           }
         } else {
-          Collector collector = createCollector();
-          searcher.search(q, collector);
-          // hits = collector.topDocs();
+          TopScoreDocCollectorManager collectorManager =
+              TopScoreDocCollectorManager.create(
+                  numHits(), withTotalHits() ? Integer.MAX_VALUE : 1);
+          searcher.search(q, collectorManager);
+          // hits = collectorManager.topDocs();
         }
 
         if (hits != null) {
@@ -180,6 +183,7 @@ protected int withTopDocs(IndexSearcher searcher, Query q, TopDocs hits) throws
     return res;
   }
 
+  @Deprecated
 Collector collector = createCollector(); 
 searcher.search(q, collector); 
 // hits = collector.topDocs(); 
 Collector collector = createCollector(); 
 searcher.search(q, collector); 
 // hits = collector.topDocs(); 
   protected Collector createCollector() throws Exception {
     return TopScoreDocCollector.create(numHits(), withTotalHits() ? Integer.MAX_VALUE : 1);
   }

diff --git a/...classification/src/java/org/apache/lucene/classification/CachingNaiveBayesClassifier.java b/...classification/src/java/org/apache/lucene/classification/CachingNaiveBayesClassifier.java
@@ -32,7 +32,7 @@
 import org.apache.lucene.search.BooleanQuery;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.TermQuery;
-import org.apache.lucene.search.TotalHitCountCollector;
+import org.apache.lucene.search.TotalHitCountCollectorManager;
 import org.apache.lucene.util.BytesRef;
 
 /**
@@ -179,10 +179,8 @@ private Map<BytesRef, Integer> getWordFreqForClassess(String word) throws IOExce
         if (query != null) {
           booleanQuery.add(query, BooleanClause.Occur.MUST);
         }
-        TotalHitCountCollector totalHitCountCollector = new TotalHitCountCollector();
-        indexSearcher.search(booleanQuery.build(), totalHitCountCollector);
 
-        int ret = totalHitCountCollector.getTotalHits();
+        int ret = indexSearcher.search(booleanQuery.build(), new TotalHitCountCollectorManager());
         if (ret != 0) {
           searched.put(cclass, ret);
         }

diff --git a/.../classification/src/java/org/apache/lucene/classification/SimpleNaiveBayesClassifier.java b/.../classification/src/java/org/apache/lucene/classification/SimpleNaiveBayesClassifier.java
@@ -35,7 +35,7 @@
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.TermQuery;
-import org.apache.lucene.search.TotalHitCountCollector;
+import org.apache.lucene.search.TotalHitCountCollectorManager;
 import org.apache.lucene.search.WildcardQuery;
 import org.apache.lucene.util.BytesRef;
 
@@ -169,7 +169,6 @@ protected int countDocsWithClass() throws IOException {
     Terms terms = MultiTerms.getTerms(this.indexReader, this.classFieldName);
     int docCount;
     if (terms == null || terms.getDocCount() == -1) { // in case codec doesn't support getDocCount
-      TotalHitCountCollector classQueryCountCollector = new TotalHitCountCollector();
       BooleanQuery.Builder q = new BooleanQuery.Builder();
       q.add(
           new BooleanClause(
@@ -179,8 +178,7 @@ protected int countDocsWithClass() throws IOException {
       if (query != null) {
         q.add(query, BooleanClause.Occur.MUST);
       }
-      indexSearcher.search(q.build(), classQueryCountCollector);
-      docCount = classQueryCountCollector.getTotalHits();
+      docCount = indexSearcher.search(q.build(), new TotalHitCountCollectorManager());
     } else {
       docCount = terms.getDocCount();
     }
@@ -276,9 +274,7 @@ private int getWordFreqForClass(String word, Term term) throws IOException {
     if (query != null) {
       booleanQuery.add(query, BooleanClause.Occur.MUST);
     }
-    TotalHitCountCollector totalHitCountCollector = new TotalHitCountCollector();
-    indexSearcher.search(booleanQuery.build(), totalHitCountCollector);
-    return totalHitCountCollector.getTotalHits();
+    return indexSearcher.search(booleanQuery.build(), new TotalHitCountCollectorManager());
   }
 
   private double calculateLogPrior(Term term, int docsWithClassSize) throws IOException {

diff --git a/...rc/java/org/apache/lucene/classification/document/SimpleNaiveBayesDocumentClassifier.java b/...rc/java/org/apache/lucene/classification/document/SimpleNaiveBayesDocumentClassifier.java
@@ -40,7 +40,7 @@
 import org.apache.lucene.search.BooleanQuery;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.TermQuery;
-import org.apache.lucene.search.TotalHitCountCollector;
+import org.apache.lucene.search.TotalHitCountCollectorManager;
 import org.apache.lucene.util.BytesRef;
 
 /**
@@ -263,9 +263,7 @@ private int getWordFreqForClass(String word, String fieldName, Term term) throws
     if (query != null) {
       booleanQuery.add(query, BooleanClause.Occur.MUST);
     }
-    TotalHitCountCollector totalHitCountCollector = new TotalHitCountCollector();
-    indexSearcher.search(booleanQuery.build(), totalHitCountCollector);
-    return totalHitCountCollector.getTotalHits();
+    return indexSearcher.search(booleanQuery.build(), new TotalHitCountCollectorManager());
   }
 
   private double calculateLogPrior(Term term, int docsWithClassSize) throws IOException {

diff --git a/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java b/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java
@@ -66,9 +66,9 @@
  * match lots of documents, counting the number of hits may take much longer than computing the top
  * hits so this trade-off allows to get some minimal information about the hit count without slowing
  * down search too much. The {@link TopDocs#scoreDocs} array is always accurate however. If this
- * behavior doesn't suit your needs, you should create collectors manually with either {@link
- * TopScoreDocCollector#create} or {@link TopFieldCollector#create} and call {@link #search(Query,
- * Collector)}.
+ * behavior doesn't suit your needs, you should create collectorManagers manually with either {@link
+ * TopScoreDocCollectorManager#create} or {@link TopFieldCollectorManager#create} and call {@link
+ * #search(Query, CollectorManager)}.
  *
  * <p><a id="thread-safety"></a>
  *
@@ -478,34 +478,8 @@ public TopDocs searchAfter(ScoreDoc after, Query query, int numHits) throws IOEx
     }
 
     final int cappedNumHits = Math.min(numHits, limit);
-
-    final CollectorManager<TopScoreDocCollector, TopDocs> manager =
-        new CollectorManager<TopScoreDocCollector, TopDocs>() {
-
-          private final HitsThresholdChecker hitsThresholdChecker =
-              (executor == null || leafSlices.length <= 1)
-                  ? HitsThresholdChecker.create(Math.max(TOTAL_HITS_THRESHOLD, numHits))
-                  : HitsThresholdChecker.createShared(Math.max(TOTAL_HITS_THRESHOLD, numHits));
-
-          private final MaxScoreAccumulator minScoreAcc =
-              (executor == null || leafSlices.length <= 1) ? null : new MaxScoreAccumulator();
-
-          @Override
-          public TopScoreDocCollector newCollector() throws IOException {
-            return TopScoreDocCollector.create(
-                cappedNumHits, after, hitsThresholdChecker, minScoreAcc);
-          }
-
-          @Override
-          public TopDocs reduce(Collection<TopScoreDocCollector> collectors) throws IOException {
-            final TopDocs[] topDocs = new TopDocs[collectors.size()];
-            int i = 0;
-            for (TopScoreDocCollector collector : collectors) {
-              topDocs[i++] = collector.topDocs();
-            }
-            return TopDocs.merge(0, cappedNumHits, topDocs);
-          }
-        };
+    CollectorManager<TopScoreDocCollector, TopDocs> manager =
+        new TopScoreDocCollectorManager(cappedNumHits, after, TOTAL_HITS_THRESHOLD);
 
     return search(query, manager);
   }
@@ -527,7 +501,10 @@ public TopDocs search(Query query, int n) throws IOException {
    *
    * @throws TooManyClauses If a query would exceed {@link IndexSearcher#getMaxClauseCount()}
    *     clauses.
+   * @deprecated This method is being deprecated in favor of {@link IndexSearcher#search(Query,
+   *     CollectorManager)} due to its support for concurrency in IndexSearcher
    */
+  @Deprecated
   public void search(Query query, Collector results) throws IOException {
     query = rewrite(query);
     search(leafContexts, createWeight(query, results.scoreMode(), 1), results);
@@ -614,33 +591,7 @@ private TopFieldDocs searchAfter(
     final Sort rewrittenSort = sort.rewrite(this);
 
     final CollectorManager<TopFieldCollector, TopFieldDocs> manager =
-        new CollectorManager<>() {
-
-          private final HitsThresholdChecker hitsThresholdChecker =
-              (executor == null || leafSlices.length <= 1)
-                  ? HitsThresholdChecker.create(Math.max(TOTAL_HITS_THRESHOLD, numHits))
-                  : HitsThresholdChecker.createShared(Math.max(TOTAL_HITS_THRESHOLD, numHits));
-
-          private final MaxScoreAccumulator minScoreAcc =
-              (executor == null || leafSlices.length <= 1) ? null : new MaxScoreAccumulator();
-
-          @Override
-          public TopFieldCollector newCollector() throws IOException {
-            // TODO: don't pay the price for accurate hit counts by default
-            return TopFieldCollector.create(
-                rewrittenSort, cappedNumHits, after, hitsThresholdChecker, minScoreAcc);
-          }
-
-          @Override
-          public TopFieldDocs reduce(Collection<TopFieldCollector> collectors) throws IOException {
-            final TopFieldDocs[] topDocs = new TopFieldDocs[collectors.size()];
-            int i = 0;
-            for (TopFieldCollector collector : collectors) {
-              topDocs[i++] = collector.topDocs();
-            }
-            return TopDocs.merge(rewrittenSort, 0, cappedNumHits, topDocs);
-          }
-        };
+        new TopFieldCollectorManager(rewrittenSort, cappedNumHits, after, TOTAL_HITS_THRESHOLD);
 
     TopFieldDocs topDocs = search(query, manager);
     if (doDocScores) {
@@ -659,9 +610,12 @@ public TopFieldDocs reduce(Collection<TopFieldCollector> collectors) throws IOEx
    */
   public <C extends Collector, T> T search(Query query, CollectorManager<C, T> collectorManager)
       throws IOException {
-    if (executor == null || leafSlices.length <= 1) {
+    if (executor == null || leafSlices.length == 0) {
       final C collector = collectorManager.newCollector();
-      search(query, collector);
+      final Query rewrittenQuery = rewrite(query);
+
+      search(leafContexts, createWeight(rewrittenQuery, collector.scoreMode(), 1), collector);
+
       return collectorManager.reduce(Collections.singletonList(collector));
     } else {
       final List<C> collectors = new ArrayList<>(leafSlices.length);

diff --git a/lucene/core/src/java/org/apache/lucene/search/SortRescorer.java b/lucene/core/src/java/org/apache/lucene/search/SortRescorer.java
@@ -45,7 +45,8 @@ public TopDocs rescore(IndexSearcher searcher, TopDocs firstPassTopDocs, int top
 
     List<LeafReaderContext> leaves = searcher.getIndexReader().leaves();
 
-    TopFieldCollector collector = TopFieldCollector.create(sort, topN, Integer.MAX_VALUE);
+    TopFieldCollector collector =
+        TopFieldCollectorManager.create(sort, topN, Integer.MAX_VALUE).newCollector();
 
     // Now merge sort docIDs from hits, with reader's leaves:
     int hitUpto = 0;

diff --git a/lucene/core/src/java/org/apache/lucene/search/TopDocs.java b/lucene/core/src/java/org/apache/lucene/search/TopDocs.java
@@ -233,7 +233,7 @@ public static TopDocs merge(
    * Returns a new TopFieldDocs, containing topN results across the provided TopFieldDocs, sorting
    * by the specified {@link Sort}. Each of the TopDocs must have been sorted by the same Sort, and
    * sort field values must have been filled (ie, <code>fillFields=true</code> must be passed to
-   * {@link TopFieldCollector#create}).
+   * {@link TopFieldCollectorManager#create}).
    *
    * @see #merge(Sort, int, int, TopFieldDocs[])
    * @lucene.experimental

diff --git a/lucene/core/src/java/org/apache/lucene/search/TopFieldCollector.java b/lucene/core/src/java/org/apache/lucene/search/TopFieldCollector.java
@@ -18,7 +18,6 @@
 
 import java.io.IOException;
 import java.util.Arrays;
-import java.util.Collection;
 import java.util.Comparator;
 import java.util.List;
 import java.util.Objects;
@@ -174,7 +173,7 @@ private static boolean canEarlyTerminateOnPrefix(Sort searchSort, Sort indexSort
    * Implements a TopFieldCollector over one SortField criteria, with tracking
    * document scores and maxScore.
    */
-  private static class SimpleFieldCollector extends TopFieldCollector {
+  static class SimpleFieldCollector extends TopFieldCollector {
     final Sort sort;
     final FieldValueHitQueue<Entry> queue;
 
@@ -214,7 +213,7 @@ public void collect(int doc) throws IOException {
   /*
    * Implements a TopFieldCollector when after != null.
    */
-  private static final class PagingFieldCollector extends TopFieldCollector {
+  static final class PagingFieldCollector extends TopFieldCollector {
 
     final Sort sort;
     int collectedHits;
@@ -383,9 +382,13 @@ protected void updateMinCompetitiveScore(Scorable scorer) throws IOException {
    *     count of the result will be accurate. {@link Integer#MAX_VALUE} may be used to make the hit
    *     count accurate, but this will also make query processing slower.
    * @return a {@link TopFieldCollector} instance which will sort the results by the sort criteria.
+   * @deprecated This method is being deprecated in favor of {@link
+   *     TopFieldCollectorManager#create(Sort, int, int)} due to its support for concurrency in
+   *     IndexSearcher
    */
+  @Deprecated
   public static TopFieldCollector create(Sort sort, int numHits, int totalHitsThreshold) {
-    return create(sort, numHits, null, totalHitsThreshold);
+    return TopFieldCollectorManager.create(sort, numHits, totalHitsThreshold).newCollector();
   }
 
   /**
@@ -407,97 +410,13 @@ public static TopFieldCollector create(Sort sort, int numHits, int totalHitsThre
    *     field is indexed both with doc values and points. In this case, there is an assumption that
    *     the same data is stored in these points and doc values.
    * @return a {@link TopFieldCollector} instance which will sort the results by the sort criteria.
+   * @deprecated This method is being deprecated in favor of using the constructor of {@link
+   *     TopFieldCollectorManager} due to its support for concurrency in IndexSearcher
    */
+  @Deprecated
   public static TopFieldCollector create(
       Sort sort, int numHits, FieldDoc after, int totalHitsThreshold) {
-    if (totalHitsThreshold < 0) {
-      throw new IllegalArgumentException(
-          "totalHitsThreshold must be >= 0, got " + totalHitsThreshold);
-    }
-
-    return create(
-        sort,
-        numHits,
-        after,
-        HitsThresholdChecker.create(Math.max(totalHitsThreshold, numHits)),
-        null /* bottomValueChecker */);
-  }
-
-  /**
-   * Same as above with additional parameters to allow passing in the threshold checker and the max
-   * score accumulator.
-   */
-  static TopFieldCollector create(
-      Sort sort,
-      int numHits,
-      FieldDoc after,
-      HitsThresholdChecker hitsThresholdChecker,
-      MaxScoreAccumulator minScoreAcc) {
-
-    if (sort.fields.length == 0) {
-      throw new IllegalArgumentException("Sort must contain at least one field");
-    }
-
-    if (numHits <= 0) {
-      throw new IllegalArgumentException(
-          "numHits must be > 0; please use TotalHitCountCollector if you just need the total hit count");
-    }
-
-    if (hitsThresholdChecker == null) {
-      throw new IllegalArgumentException("hitsThresholdChecker should not be null");
-    }
-
-    FieldValueHitQueue<Entry> queue = FieldValueHitQueue.create(sort.fields, numHits);
-
-    if (after == null) {
-      return new SimpleFieldCollector(sort, queue, numHits, hitsThresholdChecker, minScoreAcc);
-    } else {
-      if (after.fields == null) {
-        throw new IllegalArgumentException(
-            "after.fields wasn't set; you must pass fillFields=true for the previous search");
-      }
-
-      if (after.fields.length != sort.getSort().length) {
-        throw new IllegalArgumentException(
-            "after.fields has "
-                + after.fields.length
-                + " values but sort has "
-                + sort.getSort().length);
-      }
-
-      return new PagingFieldCollector(
-          sort, queue, after, numHits, hitsThresholdChecker, minScoreAcc);
-    }
-  }
-
-  /**
-   * Create a CollectorManager which uses a shared hit counter to maintain number of hits and a
-   * shared {@link MaxScoreAccumulator} to propagate the minimum score accross segments if the
-   * primary sort is by relevancy.
-   */
-  public static CollectorManager<TopFieldCollector, TopFieldDocs> createSharedManager(
-      Sort sort, int numHits, FieldDoc after, int totalHitsThreshold) {
-    return new CollectorManager<>() {
-
-      private final HitsThresholdChecker hitsThresholdChecker =
-          HitsThresholdChecker.createShared(Math.max(totalHitsThreshold, numHits));
-      private final MaxScoreAccumulator minScoreAcc = new MaxScoreAccumulator();
-
-      @Override
-      public TopFieldCollector newCollector() throws IOException {
-        return create(sort, numHits, after, hitsThresholdChecker, minScoreAcc);
-      }
-
-      @Override
-      public TopFieldDocs reduce(Collection<TopFieldCollector> collectors) throws IOException {
-        final TopFieldDocs[] topDocs = new TopFieldDocs[collectors.size()];
-        int i = 0;
-        for (TopFieldCollector collector : collectors) {
-          topDocs[i++] = collector.topDocs();
-        }
-        return TopDocs.merge(sort, 0, numHits, topDocs);
-      }
-    };
+    return new TopFieldCollectorManager(sort, numHits, after, totalHitsThreshold).newCollector();
   }
 
   /**