Skip to content

Commit

Permalink
Merge pull request #33139 from vespa-engine/geirst/filter-threshold-i…
Browse files Browse the repository at this point in the history
…n-rank-profile

Add syntax for filter-threshold for a rank profile.
  • Loading branch information
geirst authored Jan 20, 2025
2 parents c1f22cb + 30db26a commit aef1164
Show file tree
Hide file tree
Showing 8 changed files with 76 additions and 4 deletions.
9 changes: 9 additions & 0 deletions config-model/src/main/java/com/yahoo/schema/RankProfile.java
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ public class RankProfile implements Cloneable {
private Double targetHitsMaxAdjustmentFactor = null;
private Double weakandStopwordLimit = null;
private Double weakandAdjustTarget = null;
private Double filterThreshold = null;

/** The drop limit used to drop hits with rank score less than or equal to this value */
private double rankScoreDropLimit = -Double.MAX_VALUE;
Expand Down Expand Up @@ -784,6 +785,7 @@ public int getNumSearchPartitions() {
public void setTargetHitsMaxAdjustmentFactor(double factor) { this.targetHitsMaxAdjustmentFactor = factor; }
public void setWeakandStopwordLimit(double limit) { this.weakandStopwordLimit = limit; }
public void setWeakandAdjustTarget(double target) { this.weakandAdjustTarget = target; }
public void setFilterThreshold(double threshold) { this.filterThreshold = threshold; }

public OptionalDouble getTermwiseLimit() {
if (termwiseLimit != null) return OptionalDouble.of(termwiseLimit);
Expand Down Expand Up @@ -826,6 +828,13 @@ public OptionalDouble getWeakandAdjustTarget() {
return uniquelyInherited(RankProfile::getWeakandAdjustTarget, OptionalDouble::isPresent, "weakand-adjust-target").orElse(OptionalDouble.empty());
}

public OptionalDouble getFilterThreshold() {
if (filterThreshold != null) {
return OptionalDouble.of(filterThreshold);
}
return uniquelyInherited(RankProfile::getFilterThreshold, OptionalDouble::isPresent, "filter-threshold").orElse(OptionalDouble.empty());
}

/** Whether we should ignore the default rank features. Set to null to use inherited */
public void setIgnoreDefaultRankFeatures(Boolean ignoreDefaultRankFeatures) {
this.ignoreDefaultRankFeatures = ignoreDefaultRankFeatures;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,7 @@ private static class Deriver {
private final OptionalDouble targetHitsMaxAdjustmentFactor;
private final OptionalDouble weakandStopwordLimit;
private final OptionalDouble weakandAdjustTarget;
private final OptionalDouble filterThreshold;
private final double rankScoreDropLimit;
private final double secondPhaseRankScoreDropLimit;
private final boolean sortBlueprintsByCost;
Expand Down Expand Up @@ -224,6 +225,7 @@ private static class Deriver {
targetHitsMaxAdjustmentFactor = compiled.getTargetHitsMaxAdjustmentFactor();
weakandStopwordLimit = compiled.getWeakandStopwordLimit();
weakandAdjustTarget = compiled.getWeakandAdjustTarget();
filterThreshold = compiled.getFilterThreshold();
keepRankCount = compiled.getKeepRankCount();
rankScoreDropLimit = compiled.getRankScoreDropLimit();
secondPhaseRankScoreDropLimit = compiled.getSecondPhaseRankScoreDropLimit();
Expand Down Expand Up @@ -493,6 +495,9 @@ else if (RankingExpression.propertyName(RankProfile.GLOBAL_PHASE).equals(propert
if (weakandAdjustTarget.isPresent()) {
properties.add(new Pair<>("vespa.matching.weakand.stop_word_adjust_limit", String.valueOf(weakandAdjustTarget.getAsDouble())));
}
if (filterThreshold.isPresent()) {
properties.add(new Pair<>("vespa.matching.filter_threshold", String.valueOf(filterThreshold.getAsDouble())));
}
if (matchPhaseSettings != null) {
properties.add(new Pair<>("vespa.matchphase.degradation.attribute", matchPhaseSettings.getAttribute()));
properties.add(new Pair<>("vespa.matchphase.degradation.ascendingorder", matchPhaseSettings.getAscending() + ""));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ public class ParsedRankProfile extends ParsedBlock {
private Boolean useSignificanceModel = null;
private Double weakandStopwordLimit = null;
private Double weakandAdjustTarget = null;
private Double filterThreshold = null;
private final List<MutateOperation> mutateOperations = new ArrayList<>();
private final List<String> inherited = new ArrayList<>();
private final Map<String, Boolean> fieldsRankFilter = new LinkedHashMap<>();
Expand Down Expand Up @@ -108,6 +109,7 @@ public ParsedRankProfile(String name) {

Optional<Double> getWeakandStopwordLimit() { return Optional.ofNullable(this.weakandStopwordLimit); }
Optional<Double> getWeakandAdjustTarget() { return Optional.ofNullable(this.weakandAdjustTarget); }
Optional<Double> getFilterThreshold() { return Optional.ofNullable(this.filterThreshold); }

public void addSummaryFeatures(FeatureList features) { this.summaryFeatures.add(features); }
public void addMatchFeatures(FeatureList features) { this.matchFeatures.add(features); }
Expand Down Expand Up @@ -255,6 +257,11 @@ public void setWeakandAdjustTarget(double target) {
this.weakandAdjustTarget = target;
}

public void setFilterThreshold(double threshold) {
verifyThat(this.filterThreshold == null, "already has filter-threshold");
this.filterThreshold = threshold;
}

public void setTermwiseLimit(double limit) {
verifyThat(termwiseLimit == null, "already has termwise-limit");
this.termwiseLimit = limit;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ private void populateFrom(ParsedRankProfile parsed, RankProfile profile) {
parsed.getTargetHitsMaxAdjustmentFactor().ifPresent(profile::setTargetHitsMaxAdjustmentFactor);
parsed.getWeakandStopwordLimit().ifPresent(profile::setWeakandStopwordLimit);
parsed.getWeakandAdjustTarget().ifPresent(profile::setWeakandAdjustTarget);
parsed.getFilterThreshold().ifPresent(profile::setFilterThreshold);
parsed.getKeepRankCount().ifPresent(profile::setKeepRankCount);
parsed.getMinHitsPerThread().ifPresent(profile::setMinHitsPerThread);
parsed.getNumSearchPartitions().ifPresent(profile::setNumSearchPartitions);
Expand Down
13 changes: 12 additions & 1 deletion config-model/src/main/javacc/SchemaParser.jj
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,7 @@ TOKEN :
| < WEAKAND: "weakand">
| < STOPWORD_LIMIT: "stopword-limit">
| < ADJUST_TARGET: "adjust-target">
| < FILTER_THRESHOLD: "filter-threshold">
| < INTRAOP_THREADS: "intraop-threads">
| < INTEROP_THREADS: "interop-threads">
| < GPU_DEVICE: "gpu-device">
Expand Down Expand Up @@ -1780,7 +1781,8 @@ void rankProfileItem(ParsedSchema schema, ParsedRankProfile profile) : { }
| onnxModelInProfile(profile)
| strict(profile)
| significance(profile)
| weakand(profile))
| weakand(profile)
| filterThreshold(profile))
}

/**
Expand Down Expand Up @@ -2194,6 +2196,14 @@ void weakandAdjustTarget(ParsedRankProfile profile) :
(<ADJUST_TARGET> <COLON> target = floatValue()) { profile.setWeakandAdjustTarget(target); }
}

void filterThreshold(ParsedRankProfile profile) :
{
double threshold;
}
{
(<FILTER_THRESHOLD> <COLON> threshold = floatValue()) { profile.setFilterThreshold(threshold); }
}

/**
* Consumes a match-features block of a rank profile.
*
Expand Down Expand Up @@ -2745,6 +2755,7 @@ String identifierWithDash() :
| <FAST_ACCESS>
| <FAST_RANK>
| <FAST_SEARCH>
| <FILTER_THRESHOLD>
| <FIRST_PHASE>
| <FROM_DISK>
| <GLOBAL_PHASE>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -570,6 +570,18 @@ private void verifyWeakandAdjustTarget(Double adjustTarget) throws ParseExceptio
adjustTarget, "vespa.matching.weakand.stop_word_adjust_limit");
}

@Test
void filter_threshold_is_configurable() throws ParseException {
verifyFilterThreshold(null);
verifyFilterThreshold(0.05);
}

private void verifyFilterThreshold(Double threshold) throws ParseException {
var rp = createRankProfile(createSDWithRankProfile(null, null, null, null, null, threshold));
verifyRankProfileSetting(rp.getFirst(), rp.getSecond(), RankProfile::getFilterThreshold,
threshold, "vespa.matching.filter_threshold");
}

private void verifyRankProfileSetting(RankProfile rankProfile, RawRankProfile rawRankProfile, Function<RankProfile, OptionalDouble> func,
Double expValue, String expPropertyName) {
if (expValue != null) {
Expand All @@ -584,12 +596,12 @@ private void verifyRankProfileSetting(RankProfile rankProfile, RawRankProfile ra
private Pair<RankProfile, RawRankProfile> createRankProfile(Double postFilterThreshold,
Double approximateThreshold,
Double targetHitsMaxAdjustmentFactor) throws ParseException {
return createRankProfile(createSDWithRankProfile(postFilterThreshold, approximateThreshold, targetHitsMaxAdjustmentFactor, null, null));
return createRankProfile(createSDWithRankProfile(postFilterThreshold, approximateThreshold, targetHitsMaxAdjustmentFactor, null, null, null));
}

private Pair<RankProfile, RawRankProfile> createWeakandRankProfile(Double weakAndStopwordLimit,
Double weakAndAdjustTarget) throws ParseException {
return createRankProfile(createSDWithRankProfile(null, null, null, weakAndStopwordLimit, weakAndAdjustTarget));
return createRankProfile(createSDWithRankProfile(null, null, null, weakAndStopwordLimit, weakAndAdjustTarget, null));
}

private Pair<RankProfile, RawRankProfile> createRankProfile(String schemaContent) throws ParseException {
Expand All @@ -611,7 +623,8 @@ private String createSDWithRankProfile(Double postFilterThreshold,
Double approximateThreshold,
Double targetHitsMaxAdjustmentFactor,
Double weakandStopwordLimit,
Double weakandAdjustTarget) {
Double weakandAdjustTarget,
Double filterThreshold) {
return joinLines(
"search test {",
" document test {}",
Expand All @@ -621,6 +634,7 @@ private String createSDWithRankProfile(Double postFilterThreshold,
(targetHitsMaxAdjustmentFactor != null ? (" target-hits-max-adjustment-factor: " + targetHitsMaxAdjustmentFactor) : ""),
(weakandStopwordLimit != null ? (" weakand { stopword-limit: " + weakandStopwordLimit + "}") : ""),
(weakandAdjustTarget != null ? (" weakand { adjust-target: " + weakandAdjustTarget + "}") : ""),
(filterThreshold != null ? (" filter-threshold: " + filterThreshold) : ""),
" }",
"}");
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,20 @@ void weakand_adjust_target_can_be_parsed() throws Exception {
assertEquals(0.01, target.get());
}


@Test
void filter_threshold_can_be_parsed() throws Exception {
String input = joinLines("schema foo {",
"rank-profile rp {",
"filter-threshold: 0.05",
"}",
"}");
var schema = parseString(input);
var target = schema.getRankProfiles().get(0).getFilterThreshold();
assertTrue(target.isPresent());
assertEquals(0.05, target.get());
}

@Test
void maxOccurrencesCanBeParsed() throws Exception {
String input = joinLines
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -383,6 +383,7 @@ TOKEN :
| < WEAKAND: "weakand">
| < STOPWORD_LIMIT: "stopword-limit">
| < ADJUST_TARGET: "adjust-target">
| < FILTER_THRESHOLD: "filter-threshold">
| < INTRAOP_THREADS: "intraop-threads">
| < INTEROP_THREADS: "interop-threads">
| < GPU_DEVICE: "gpu-device">
Expand Down Expand Up @@ -2076,6 +2077,7 @@ void rankProfileItem(ParsedSchema schema, ParsedRankProfile profile) : { }
| strictElm(profile)
| significanceElm(profile)
| weakandElm(profile)
| filterThreshold(profile)
)
;

Expand Down Expand Up @@ -2512,6 +2514,14 @@ void weakandAdjustTarget(ParsedRankProfile profile) :
(<ADJUST_TARGET> <COLON> target = floatValue()) { profile.setWeakandAdjustTarget(target); }
;

void filterThreshold(ParsedRankProfile profile) :
{
double threshold;
}

(<FILTER_THRESHOLD> <COLON> threshold = floatValue()) { profile.setFilterThreshold(threshold); }
;

/**
* Consumes a match-features block of a rank profile.
*
Expand Down Expand Up @@ -3079,6 +3089,7 @@ String identifierWithDashStr :
| <FAST_ACCESS>
| <FAST_RANK>
| <FAST_SEARCH>
| <FILTER_THRESHOLD>
| <FIRST_PHASE>
| <FROM_DISK>
| <GLOBAL_PHASE>
Expand Down

0 comments on commit aef1164

Please sign in to comment.