Skip to content

Commit

Permalink
Search / Similar document improvement.
Browse files Browse the repository at this point in the history
  • Loading branch information
fxprunayre committed Sep 11, 2024
1 parent c2be3a2 commit b3818bd
Show file tree
Hide file tree
Showing 3 changed files with 77 additions and 22 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -289,4 +289,58 @@ in `records.json` (eg. adding Danish):
"any.langdan"
]
},
```
```
## Similar documents
Similar documents are displayed at the bottom of the record view.
The more like this query can be configured for the user interface.
* `moreLikeThisSameType` is a boolean to enable or disable the more like this query for the same type of document (eg. suggest series only for a record which is a series).
*
* `moreLikeThisConfig` is the configuration of the more like this query. The configuration is the same as the [Elasticsearch more like this query](https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-mlt-query.html).
To configure a more like this query computing similarity between the current record title terms (added to the `like` element) and the title, abstract and tag fields of other records:
```json
"moreLikeThisConfig": {
"more_like_this": {
"fields": [
"resourceTitleObject.default",
"resourceAbstractObject.default",
"tag.raw"
],
"like": null,
"min_term_freq": 1,
"min_word_length": 3,
"max_query_terms": 35,
"minimum_should_match": "70%"
}
},
```
`like` element can also be used to provide a document to compare with. In this case, the query will compute similarity for the complete document and.also search for current record title terms.
```json
"moreLikeThisConfig": {
"more_like_this": {
"like": [
{
"_id": null
}
],
"analyzer": "english",
"minimum_should_match": "40%"
}
}
```
If all your records are in the same language, adding the analyzer to the `more_like_this` query can improve the similarity computation.
```json
"analyzer": "english",
```
Original file line number Diff line number Diff line change
Expand Up @@ -135,11 +135,27 @@
};

function buildQuery() {
function buildMoreLikeThisFields(fields) {
var config = {};
angular.copy(moreLikeThisQuery, config);
if (Array.isArray(config.more_like_this.like)) {
config.more_like_this.like.forEach(function (field) {
if (typeof field === "object" && field._id !== undefined) {
field._id = scope.md.uuid;
}
});
config.more_like_this.like.push(scope.md.resourceTitle);
} else {
config.more_like_this.like = scope.md.resourceTitle;
}
return config;
}

var query = gnESFacet.buildDefaultQuery(
{
bool: {
must: [
moreLikeThisQuery,
buildMoreLikeThisFields(),
{ terms: { isTemplate: ["n"] } },
// TODO: We may want to use it for subtemplate
{ terms: { draft: ["n", "e"] } }
Expand All @@ -161,23 +177,6 @@
scope.size
);

function setMoreLikeThisFields(fields) {
var config = query.query.bool.must[0].more_like_this.like;
if (Array.isArray(config)) {
config.forEach(function(field) {
if (typeof field === 'object' && field._id !== undefined) {
field._id = scope.md.uuid;
}
});
config.push(scope.md.resourceTitle);
} else {
config = scope.md.resourceTitle;
}
query.query.bool.must[0].more_like_this.like = config;
}

setMoreLikeThisFields();

var resourceType = scope.md.resourceType
? scope.md.resourceType[0]
: undefined;
Expand Down
8 changes: 5 additions & 3 deletions web-ui/src/main/resources/catalog/js/CatController.js
Original file line number Diff line number Diff line change
Expand Up @@ -314,9 +314,11 @@
moreLikeThisSameType: true,
moreLikeThisConfig: {
more_like_this: {
like: [{
"_id": null
}],
like: [
{
_id: null
}
],
analyzer: "english",
minimum_should_match: "40%"
}
Expand Down

0 comments on commit b3818bd

Please sign in to comment.