-
Notifications
You must be signed in to change notification settings - Fork 1.1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
LUCENE-10002: Deprecate IndexSearch#search(Query, Collector) in favor of IndexSearcher#search(Query, CollectorManager) - TopFieldCollectorManager & TopScoreDocCollectorManager #240
Changes from 16 commits
e23d11b
776759d
0c2c12b
4af7740
86bac52
3a3a86a
d21e5f9
be6977d
2df81c5
81163de
aa114d7
0f7b327
06b0f5a
84641b4
a9a6828
0b4f9cd
d588a82
8f2797f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -19,7 +19,6 @@ | |
import java.io.IOException; | ||
import java.util.ArrayList; | ||
import java.util.Arrays; | ||
import java.util.Collection; | ||
import java.util.Collections; | ||
import java.util.Comparator; | ||
import java.util.List; | ||
|
@@ -62,9 +61,9 @@ | |
* match lots of documents, counting the number of hits may take much longer than computing the top | ||
* hits so this trade-off allows to get some minimal information about the hit count without slowing | ||
* down search too much. The {@link TopDocs#scoreDocs} array is always accurate however. If this | ||
* behavior doesn't suit your needs, you should create collectors manually with either {@link | ||
* TopScoreDocCollector#create} or {@link TopFieldCollector#create} and call {@link #search(Query, | ||
* Collector)}. | ||
* behavior doesn't suit your needs, you should create collectorManagers manually with either {@link | ||
* TopScoreDocCollectorManager} or {@link TopFieldCollectorManager} and call {@link #search(Query, | ||
* CollectorManager)}. | ||
* | ||
* <p><a id="thread-safety"></a> | ||
* | ||
|
@@ -455,35 +454,10 @@ public TopDocs searchAfter(ScoreDoc after, Query query, int numHits) throws IOEx | |
} | ||
|
||
final int cappedNumHits = Math.min(numHits, limit); | ||
|
||
final LeafSlice[] leafSlices = getSlices(); | ||
final CollectorManager<TopScoreDocCollector, TopDocs> manager = | ||
new CollectorManager<TopScoreDocCollector, TopDocs>() { | ||
|
||
private final HitsThresholdChecker hitsThresholdChecker = | ||
leafSlices.length <= 1 | ||
? HitsThresholdChecker.create(Math.max(TOTAL_HITS_THRESHOLD, numHits)) | ||
: HitsThresholdChecker.createShared(Math.max(TOTAL_HITS_THRESHOLD, numHits)); | ||
|
||
private final MaxScoreAccumulator minScoreAcc = | ||
leafSlices.length <= 1 ? null : new MaxScoreAccumulator(); | ||
|
||
@Override | ||
public TopScoreDocCollector newCollector() throws IOException { | ||
return TopScoreDocCollector.create( | ||
cappedNumHits, after, hitsThresholdChecker, minScoreAcc); | ||
} | ||
|
||
@Override | ||
public TopDocs reduce(Collection<TopScoreDocCollector> collectors) throws IOException { | ||
final TopDocs[] topDocs = new TopDocs[collectors.size()]; | ||
int i = 0; | ||
for (TopScoreDocCollector collector : collectors) { | ||
topDocs[i++] = collector.topDocs(); | ||
} | ||
return TopDocs.merge(0, cappedNumHits, topDocs); | ||
} | ||
}; | ||
final boolean supportsConcurrency = getSlices().length > 1; | ||
CollectorManager<TopScoreDocCollector, TopDocs> manager = | ||
new TopScoreDocCollectorManager( | ||
cappedNumHits, after, TOTAL_HITS_THRESHOLD, supportsConcurrency); | ||
|
||
return search(query, manager); | ||
} | ||
|
@@ -510,7 +484,10 @@ public TopDocs search(Query query, int n) throws IOException { | |
* | ||
* @throws TooManyClauses If a query would exceed {@link IndexSearcher#getMaxClauseCount()} | ||
* clauses. | ||
* @deprecated This method is being deprecated in favor of {@link IndexSearcher#search(Query, | ||
* CollectorManager)} due to its support for concurrency in IndexSearcher | ||
*/ | ||
@Deprecated | ||
public void search(Query query, Collector results) throws IOException { | ||
query = rewrite(query, results.scoreMode().needsScores()); | ||
search(leafContexts, createWeight(query, results.scoreMode(), 1), results); | ||
|
@@ -602,34 +579,10 @@ private TopFieldDocs searchAfter( | |
final Sort rewrittenSort = sort.rewrite(this); | ||
final LeafSlice[] leafSlices = getSlices(); | ||
|
||
final boolean supportsConcurrency = leafSlices.length > 1; | ||
final CollectorManager<TopFieldCollector, TopFieldDocs> manager = | ||
new CollectorManager<>() { | ||
|
||
private final HitsThresholdChecker hitsThresholdChecker = | ||
leafSlices.length <= 1 | ||
? HitsThresholdChecker.create(Math.max(TOTAL_HITS_THRESHOLD, numHits)) | ||
: HitsThresholdChecker.createShared(Math.max(TOTAL_HITS_THRESHOLD, numHits)); | ||
|
||
private final MaxScoreAccumulator minScoreAcc = | ||
leafSlices.length <= 1 ? null : new MaxScoreAccumulator(); | ||
|
||
@Override | ||
public TopFieldCollector newCollector() throws IOException { | ||
// TODO: don't pay the price for accurate hit counts by default | ||
return TopFieldCollector.create( | ||
rewrittenSort, cappedNumHits, after, hitsThresholdChecker, minScoreAcc); | ||
} | ||
|
||
@Override | ||
public TopFieldDocs reduce(Collection<TopFieldCollector> collectors) throws IOException { | ||
final TopFieldDocs[] topDocs = new TopFieldDocs[collectors.size()]; | ||
int i = 0; | ||
for (TopFieldCollector collector : collectors) { | ||
topDocs[i++] = collector.topDocs(); | ||
} | ||
return TopDocs.merge(rewrittenSort, 0, cappedNumHits, topDocs); | ||
} | ||
}; | ||
new TopFieldCollectorManager( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Wonderful to see all this redundant code go away! |
||
rewrittenSort, cappedNumHits, after, TOTAL_HITS_THRESHOLD, supportsConcurrency); | ||
|
||
TopFieldDocs topDocs = search(query, manager); | ||
if (doDocScores) { | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -18,7 +18,6 @@ | |
|
||
import java.io.IOException; | ||
import java.util.Arrays; | ||
import java.util.Collection; | ||
import java.util.Comparator; | ||
import java.util.List; | ||
import java.util.Objects; | ||
|
@@ -174,7 +173,7 @@ private static boolean canEarlyTerminateOnPrefix(Sort searchSort, Sort indexSort | |
* Implements a TopFieldCollector over one SortField criteria, with tracking | ||
zacharymorn marked this conversation as resolved.
Show resolved
Hide resolved
|
||
* document scores and maxScore. | ||
*/ | ||
private static class SimpleFieldCollector extends TopFieldCollector { | ||
static class SimpleFieldCollector extends TopFieldCollector { | ||
final Sort sort; | ||
final FieldValueHitQueue<Entry> queue; | ||
|
||
|
@@ -225,7 +224,7 @@ public void collect(int doc) throws IOException { | |
/* | ||
* Implements a TopFieldCollector when after != null. | ||
*/ | ||
private static final class PagingFieldCollector extends TopFieldCollector { | ||
static final class PagingFieldCollector extends TopFieldCollector { | ||
|
||
final Sort sort; | ||
int collectedHits; | ||
|
@@ -405,9 +404,13 @@ protected void updateMinCompetitiveScore(Scorable scorer) throws IOException { | |
* count of the result will be accurate. {@link Integer#MAX_VALUE} may be used to make the hit | ||
* count accurate, but this will also make query processing slower. | ||
* @return a {@link TopFieldCollector} instance which will sort the results by the sort criteria. | ||
* @deprecated This method is deprecated in favor of the constructor of {@link | ||
* TopFieldCollectorManager} due to its support for concurrency in IndexSearcher | ||
*/ | ||
@Deprecated | ||
gsmiller marked this conversation as resolved.
Show resolved
Hide resolved
|
||
public static TopFieldCollector create(Sort sort, int numHits, int totalHitsThreshold) { | ||
return create(sort, numHits, null, totalHitsThreshold); | ||
return new TopFieldCollectorManager(sort, numHits, null, totalHitsThreshold, false) | ||
.newCollector(); | ||
} | ||
|
||
/** | ||
|
@@ -429,106 +432,29 @@ public static TopFieldCollector create(Sort sort, int numHits, int totalHitsThre | |
* field is indexed both with doc values and points. In this case, there is an assumption that | ||
* the same data is stored in these points and doc values. | ||
* @return a {@link TopFieldCollector} instance which will sort the results by the sort criteria. | ||
* @deprecated This method is deprecated in favor of the constructor of {@link | ||
* TopFieldCollectorManager} due to its support for concurrency in IndexSearcher | ||
*/ | ||
@Deprecated | ||
gsmiller marked this conversation as resolved.
Show resolved
Hide resolved
|
||
public static TopFieldCollector create( | ||
Sort sort, int numHits, FieldDoc after, int totalHitsThreshold) { | ||
if (totalHitsThreshold < 0) { | ||
throw new IllegalArgumentException( | ||
"totalHitsThreshold must be >= 0, got " + totalHitsThreshold); | ||
} | ||
|
||
return create( | ||
sort, | ||
numHits, | ||
after, | ||
HitsThresholdChecker.create(Math.max(totalHitsThreshold, numHits)), | ||
null /* bottomValueChecker */); | ||
} | ||
|
||
/** | ||
* Same as above with additional parameters to allow passing in the threshold checker and the max | ||
* score accumulator. | ||
*/ | ||
static TopFieldCollector create( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Since you're fully killing two methods here, does that mean that you're not intending to maintain back compat on 9.0? I assume these would get added back in and marked deprecated if backporting to 8x? I think it's reasonable, just checking what your intention is. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sorry just realized I missed a few comments earlier (they were collapsed on github UI). I think this one and the one from There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Indeed we don't support access to pkg-private code. |
||
Sort sort, | ||
int numHits, | ||
FieldDoc after, | ||
HitsThresholdChecker hitsThresholdChecker, | ||
MaxScoreAccumulator minScoreAcc) { | ||
|
||
if (sort.getSort().length == 0) { | ||
throw new IllegalArgumentException("Sort must contain at least one field"); | ||
} | ||
|
||
if (numHits <= 0) { | ||
throw new IllegalArgumentException( | ||
"numHits must be > 0; please use TotalHitCountCollector if you just need the total hit count"); | ||
} | ||
|
||
if (hitsThresholdChecker == null) { | ||
throw new IllegalArgumentException("hitsThresholdChecker should not be null"); | ||
} | ||
|
||
FieldValueHitQueue<Entry> queue = FieldValueHitQueue.create(sort.getSort(), numHits); | ||
|
||
if (after == null) { | ||
// inform a comparator that sort is based on this single field | ||
// to enable some optimizations for skipping over non-competitive documents | ||
// We can't set single sort when the `after` parameter is non-null as it's | ||
// an implicit sort over the document id. | ||
if (queue.comparators.length == 1) { | ||
queue.comparators[0].setSingleSort(); | ||
} | ||
return new SimpleFieldCollector(sort, queue, numHits, hitsThresholdChecker, minScoreAcc); | ||
} else { | ||
if (after.fields == null) { | ||
throw new IllegalArgumentException( | ||
"after.fields wasn't set; you must pass fillFields=true for the previous search"); | ||
} | ||
|
||
if (after.fields.length != sort.getSort().length) { | ||
throw new IllegalArgumentException( | ||
"after.fields has " | ||
+ after.fields.length | ||
+ " values but sort has " | ||
+ sort.getSort().length); | ||
} | ||
|
||
return new PagingFieldCollector( | ||
sort, queue, after, numHits, hitsThresholdChecker, minScoreAcc); | ||
} | ||
return new TopFieldCollectorManager(sort, numHits, after, totalHitsThreshold, false) | ||
.newCollector(); | ||
} | ||
|
||
/** | ||
* Create a CollectorManager which uses a shared hit counter to maintain number of hits and a | ||
* shared {@link MaxScoreAccumulator} to propagate the minimum score accross segments if the | ||
* primary sort is by relevancy. | ||
* | ||
* @deprecated This method is deprecated in favor of the constructor of {@link | ||
* TopFieldCollectorManager} due to its support for concurrency in IndexSearcher | ||
*/ | ||
@Deprecated | ||
public static CollectorManager<TopFieldCollector, TopFieldDocs> createSharedManager( | ||
Sort sort, int numHits, FieldDoc after, int totalHitsThreshold) { | ||
|
||
int totalHitsMax = Math.max(totalHitsThreshold, numHits); | ||
return new CollectorManager<>() { | ||
private final HitsThresholdChecker hitsThresholdChecker = | ||
HitsThresholdChecker.createShared(totalHitsMax); | ||
private final MaxScoreAccumulator minScoreAcc = | ||
totalHitsMax == Integer.MAX_VALUE ? null : new MaxScoreAccumulator(); | ||
|
||
@Override | ||
public TopFieldCollector newCollector() throws IOException { | ||
return create(sort, numHits, after, hitsThresholdChecker, minScoreAcc); | ||
} | ||
|
||
@Override | ||
public TopFieldDocs reduce(Collection<TopFieldCollector> collectors) throws IOException { | ||
final TopFieldDocs[] topDocs = new TopFieldDocs[collectors.size()]; | ||
int i = 0; | ||
for (TopFieldCollector collector : collectors) { | ||
topDocs[i++] = collector.topDocs(); | ||
} | ||
return TopDocs.merge(sort, 0, numHits, topDocs); | ||
} | ||
}; | ||
return new TopFieldCollectorManager(sort, numHits, after, totalHitsThreshold, true); | ||
} | ||
|
||
/** | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think we like the
@lucene.deprecated
tag?Is your reasoning behind keeping this in there for now that all of our uses of this (internal to Lucene) haven't yet migrated as part of this change? Would the plan me to migrate all usages off of this internally and then actually remove it on
main
/9.0, or are you thinking of keeping it around until 10.0? I think our backwards compatibility policy is such that we could just directly remove this onmain
/9.0, but then leave it like you have it (marked deprecated) if you choose to backport this to 8x. Since this method is so fundamental though, I could easily see an argument to keep it around for an extra major release to give users more time to migrate. Then again, the migration path seems pretty straight-forward. What do you think?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do you mean
@lucene.deprecated
should be created and used instead of using@deprecated
? I can't seems to find that tag is being used in lucene actually...I think@lucene.deprecated
most likely won't be recognized by IDE or build tool to signal / warn the use of deprecated methods though.As explained in https://issues.apache.org/jira/browse/LUCENE-10002?focusedCommentId=17397827&page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-17397827, I use deprecation instead of direct removal in this PR exactly for the reasons you mentioned:
I would be interested in seeing how folks feel about the timing of removal, but after #1 is completed, complete removal of IndexSearcher#search(Query, Collector) in lucene codebase should require only straightforward deletion changes.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think we should try to keep
IndexSearcher#search(Query, Collector)
and theTopXXXCollector#create
factory methods during the 9.x series, as I expect it to be quite commonly used. However in my opinion it would be fine to cut over expert classes like those in the benchmark module without providing such long bw compatibility.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
+1 makes sense to keep it in 9.0. As for my comment about
@lucene.deprecated
, I was getting this mixed up with something else. Apologies for any confusion!There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Though, I think there are both
@deprecated
(javadoc tag) and@Deprecated
(Java class annotation) -- we do both of them typically (looks like you did here @zacharymorn, great!).