Skip to content

Commit

Permalink
Merge pull request #135 from AnyRoad/rescore
Browse files Browse the repository at this point in the history
adds query rescoring
  • Loading branch information
jillesvangurp authored May 8, 2024
2 parents 15955eb + 70634d3 commit 2aa4976
Show file tree
Hide file tree
Showing 2 changed files with 115 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,44 @@ class SearchTest : SearchTestBase() {
response.responses shouldHaveSize 2
}

@Test
fun shouldApplyRescore() = coRun {
val indexName = testDocumentIndex()
client.bulk(target = indexName, refresh = Refresh.WaitFor) {
index(TestDocument("doc 1", tags = listOf("rescore")).json())
index(TestDocument("doc 2", tags = listOf("nope")).json())
index(TestDocument("doc 3", tags = listOf("another")).json())
}
val response = client.search(indexName, explain = true) {
query = matchAll()
val firstRescoreQuery = constantScore {
filter = match(TestDocument::tags, "rescore")
}

val secondRescoreQuery = constantScore {
filter = match(TestDocument::tags, "another")
}
rescore(
rescorer(3) {
scoreMode = RescoreScoreMode.total
rescoreQueryWeight = 20.0
queryWeight = 2.0
rescoreQuery = firstRescoreQuery
},
rescorer(3) {
scoreMode = RescoreScoreMode.multiply
rescoreQueryWeight = 5.0
queryWeight = 1.0
rescoreQuery = secondRescoreQuery
}
)
}

response.hits!!.hits shouldHaveSize 3
response.parseHits<TestDocument>().map(TestDocument::name) shouldBe listOf("doc 1", "doc 3", "doc 2")
response.hits!!.hits.map(SearchResponse.Hit::score) shouldBe listOf(22.0, 10.0, 2.0)
}

@Test
fun shouldExposeSeqNo() = coRun {
val indexName = testDocumentIndex()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -255,3 +255,80 @@ fun SearchDSL.dotted(vararg elements: Any) = elements.joinToString(".") { pathCo
else -> pathComponent.toString()
}
}

fun SearchDSL.rescore(vararg rescores: Rescorer) = getOrCreateMutableList("rescore").addAll(rescores)

/**
* Rescoring can help to improve precision by reordering just the top (e.g. 100 - 500) documents
* returned by the query and post_filter phases, using a secondary (usually more costly) algorithm,
* instead of applying the costly algorithm to all documents in the index.
*/
class Rescorer : JsonDsl() {
/**
* The number of docs which will be examined on each shard
*/
var windowSize by property<Int>()

/**
* Second query excuted only on the Top-K results returned by the query and post_filter phases.
*/
var query by property<RescoreQuery>()
}

class RescoreQuery : JsonDsl() {
/**
* Query to apply
*/
var rescoreQuery by esQueryProperty()

/**
* The relative importance of the original query
*/
var queryWeight by property<Double>()

/**
* The relative importance of the rescore query
*/
var rescoreQueryWeight by property<Double>()

/**
* way the original score and rescore score are combined
*/
var scoreMode by property<RescoreScoreMode>()
}

/**
* Controls the way the original score and rescore score are combined
*/
enum class RescoreScoreMode {
/**
* Average the original score and the rescore query score.
*/
avg,

/**
* Take the min of the original score and the rescore query score.
*/
min,

/**
* Take the max of original score and the rescore query score.
*/
max,

/**
* Add the original score and the rescore query score. The default.
*/
total,

/**
* Multiply the original score by the rescore query score.
*/
multiply
}

fun SearchDSL.rescorer(windowSize: Int, queryBlock: RescoreQuery.() -> Unit) =
Rescorer().apply {
this.windowSize = windowSize
this.query = RescoreQuery().apply(queryBlock)
}

0 comments on commit 2aa4976

Please sign in to comment.