From 2d11739a416dd8e85856d0547d54f1e131513662 Mon Sep 17 00:00:00 2001 From: Callum Reekie Date: Thu, 27 Jan 2022 14:59:58 +0000 Subject: [PATCH 1/7] optimise data use search --- src/resources/search/search.repository.js | 121 +++++++++++++--------- 1 file changed, 70 insertions(+), 51 deletions(-) diff --git a/src/resources/search/search.repository.js b/src/resources/search/search.repository.js index cff33322..00799344 100644 --- a/src/resources/search/search.repository.js +++ b/src/resources/search/search.repository.js @@ -136,8 +136,24 @@ export async function getObjectResult(type, searchAll, searchQuery, startIndex, newSearchQuery['$and'] = newSearchQuery['$and'].filter(exp => !exp['$text']); } + let dataUseSort = {}; + + if (sort === '') { + dataUseSort = searchAll ? { lastActivity: -1 } : { score: { $meta: 'textScore' } }; + } else if (sort === 'relevance') { + dataUseSort = searchAll ? { projectTitle: 1 } : { score: { $meta: 'textScore' } }; + } else if (sort === 'popularity') { + dataUseSort = searchAll ? { counter: -1, projectTitle: 1 } : { counter: -1, score: { $meta: 'textScore' } }; + } else if (sort === 'latest') { + dataUseSort = searchAll ? { lastActivity: -1 } : { lastActivity: -1, score: { $meta: 'textScore' } }; + } else if (sort === 'resources') { + dataUseSort = searchAll ? { relatedResourcesCount: -1 } : { relatedResourcesCount: -1, score: { $meta: 'textScore' } }; + } + queryObject = [ { $match: searchTerm }, + { $addFields: { relatedResourcesCount: { $size: { $ifNull: ['$relatedObjects', []] } } } }, + { $sort: dataUseSort }, { $lookup: { from: 'publishers', @@ -390,63 +406,65 @@ export async function getObjectResult(type, searchAll, searchQuery, startIndex, ]; } - if (sort === '') { - if (type === 'dataset') { + if (type !== 'dataUseRegister') { + if (sort === '') { + if (type === 'dataset') { + if (searchAll) queryObject.push({ $sort: { 'datasetfields.metadataquality.weighted_quality_score': -1, name: 1 } }); + else queryObject.push({ $sort: { score: { $meta: 'textScore' } } }); + } else if (type === 'paper') { + if (searchAll) queryObject.push({ $sort: { journalYear: -1 } }); + else queryObject.push({ $sort: { journalYear: -1, score: { $meta: 'textScore' } } }); + } else { + if (form === 'true' && searchAll) { + queryObject.push({ $sort: { myEntity: -1, latestUpdate: -1 } }); + } else if (form === 'true' && !searchAll) { + queryObject.push({ $sort: { myEntity: -1, score: { $meta: 'textScore' } } }); + } else if (form !== 'true' && searchAll) { + queryObject.push({ $sort: { latestUpdate: -1 } }); + } else if (form !== 'true' && !searchAll) { + queryObject.push({ $sort: { score: { $meta: 'textScore' } } }); + } + } + } else if (sort === 'relevance') { + if (type === 'person') { + if (searchAll) queryObject.push({ $sort: { lastname: 1 } }); + else queryObject.push({ $sort: { score: { $meta: 'textScore' } } }); + } else { + if (searchAll) queryObject.push({ $sort: { name: 1 } }); + else queryObject.push({ $sort: { score: { $meta: 'textScore' } } }); + } + } else if (sort === 'popularity') { + if (type === 'person') { + if (searchAll) queryObject.push({ $sort: { counter: -1, lastname: 1 } }); + else queryObject.push({ $sort: { counter: -1, score: { $meta: 'textScore' } } }); + } else { + if (searchAll) queryObject.push({ $sort: { counter: -1, name: 1 } }); + else queryObject.push({ $sort: { counter: -1, score: { $meta: 'textScore' } } }); + } + } else if (sort === 'metadata') { if (searchAll) queryObject.push({ $sort: { 'datasetfields.metadataquality.weighted_quality_score': -1, name: 1 } }); - else queryObject.push({ $sort: { score: { $meta: 'textScore' } } }); - } else if (type === 'paper') { - if (searchAll) queryObject.push({ $sort: { journalYear: -1 } }); - else queryObject.push({ $sort: { journalYear: -1, score: { $meta: 'textScore' } } }); - } else { + else queryObject.push({ $sort: { 'datasetfields.metadataquality.weighted_quality_score': -1, score: { $meta: 'textScore' } } }); + } else if (sort === 'startdate') { if (form === 'true' && searchAll) { - queryObject.push({ $sort: { myEntity: -1, latestUpdate: -1 } }); + queryObject.push({ $sort: { myEntity: -1, 'courseOptions.startDate': 1 } }); } else if (form === 'true' && !searchAll) { - queryObject.push({ $sort: { myEntity: -1, score: { $meta: 'textScore' } } }); + queryObject.push({ $sort: { myEntity: -1, 'courseOptions.startDate': 1, score: { $meta: 'textScore' } } }); } else if (form !== 'true' && searchAll) { - queryObject.push({ $sort: { latestUpdate: -1 } }); + queryObject.push({ $sort: { 'courseOptions.startDate': 1 } }); } else if (form !== 'true' && !searchAll) { - queryObject.push({ $sort: { score: { $meta: 'textScore' } } }); + queryObject.push({ $sort: { myEntity: -1, 'courseOptions.startDate': 1, score: { $meta: 'textScore' } } }); + } + } else if (sort === 'latest') { + if (searchAll) queryObject.push({ $sort: { latestUpdate: -1 } }); + else queryObject.push({ $sort: { latestUpdate: -1, score: { $meta: 'textScore' } } }); + } else if (sort === 'resources') { + if (searchAll) queryObject.push({ $sort: { relatedresources: -1 } }); + else queryObject.push({ $sort: { relatedresources: -1, score: { $meta: 'textScore' } } }); + } else if (sort === 'sortbyyear') { + if (type === 'paper') { + if (searchAll) queryObject.push({ $sort: { journalYear: -1 } }); + else queryObject.push({ $sort: { journalYear: -1, score: { $meta: 'textScore' } } }); } - } - } else if (sort === 'relevance') { - if (type === 'person') { - if (searchAll) queryObject.push({ $sort: { lastname: 1 } }); - else queryObject.push({ $sort: { score: { $meta: 'textScore' } } }); - } else { - if (searchAll) queryObject.push({ $sort: { name: 1 } }); - else queryObject.push({ $sort: { score: { $meta: 'textScore' } } }); - } - } else if (sort === 'popularity') { - if (type === 'person') { - if (searchAll) queryObject.push({ $sort: { counter: -1, lastname: 1 } }); - else queryObject.push({ $sort: { counter: -1, score: { $meta: 'textScore' } } }); - } else { - if (searchAll) queryObject.push({ $sort: { counter: -1, name: 1 } }); - else queryObject.push({ $sort: { counter: -1, score: { $meta: 'textScore' } } }); - } - } else if (sort === 'metadata') { - if (searchAll) queryObject.push({ $sort: { 'datasetfields.metadataquality.weighted_quality_score': -1, name: 1 } }); - else queryObject.push({ $sort: { 'datasetfields.metadataquality.weighted_quality_score': -1, score: { $meta: 'textScore' } } }); - } else if (sort === 'startdate') { - if (form === 'true' && searchAll) { - queryObject.push({ $sort: { myEntity: -1, 'courseOptions.startDate': 1 } }); - } else if (form === 'true' && !searchAll) { - queryObject.push({ $sort: { myEntity: -1, 'courseOptions.startDate': 1, score: { $meta: 'textScore' } } }); - } else if (form !== 'true' && searchAll) { - queryObject.push({ $sort: { 'courseOptions.startDate': 1 } }); - } else if (form !== 'true' && !searchAll) { - queryObject.push({ $sort: { myEntity: -1, 'courseOptions.startDate': 1, score: { $meta: 'textScore' } } }); - } - } else if (sort === 'latest') { - if (searchAll) queryObject.push({ $sort: { latestUpdate: -1 } }); - else queryObject.push({ $sort: { latestUpdate: -1, score: { $meta: 'textScore' } } }); - } else if (sort === 'resources') { - if (searchAll) queryObject.push({ $sort: { relatedresources: -1 } }); - else queryObject.push({ $sort: { relatedresources: -1, score: { $meta: 'textScore' } } }); - } else if (sort === 'sortbyyear') { - if (type === 'paper') { - if (searchAll) queryObject.push({ $sort: { journalYear: -1 } }); - else queryObject.push({ $sort: { journalYear: -1, score: { $meta: 'textScore' } } }); } } @@ -459,6 +477,7 @@ export async function getObjectResult(type, searchAll, searchQuery, startIndex, console.log(err); }); // Return data + return { data: searchResults }; } From 8317cc7e74f2ac8f2aef2204c3482af016e95d18 Mon Sep 17 00:00:00 2001 From: Callum Reekie Date: Thu, 27 Jan 2022 17:55:07 +0000 Subject: [PATCH 2/7] further optimisation of DUR search pipeline --- src/resources/search/search.repository.js | 30 ++++++++++++++++------- 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/src/resources/search/search.repository.js b/src/resources/search/search.repository.js index 00799344..ba0b616a 100644 --- a/src/resources/search/search.repository.js +++ b/src/resources/search/search.repository.js @@ -132,6 +132,8 @@ export async function getObjectResult(type, searchAll, searchQuery, startIndex, } else if (type === 'dataUseRegister') { const searchTerm = (newSearchQuery && newSearchQuery['$and'] && newSearchQuery['$and'].find(exp => !_.isNil(exp['$text']))) || {}; + console.log(searchTerm); + if (searchTerm) { newSearchQuery['$and'] = newSearchQuery['$and'].filter(exp => !exp['$text']); } @@ -151,9 +153,11 @@ export async function getObjectResult(type, searchAll, searchQuery, startIndex, } queryObject = [ - { $match: searchTerm }, + { $match: { ...searchTerm, ...newSearchQuery } }, { $addFields: { relatedResourcesCount: { $size: { $ifNull: ['$relatedObjects', []] } } } }, { $sort: dataUseSort }, + { $skip: parseInt(startIndex) }, + { $limit: maxResults }, { $lookup: { from: 'publishers', @@ -191,7 +195,6 @@ export async function getObjectResult(type, searchAll, searchQuery, startIndex, publisherInfo: { name: '$publisherDetails.name' }, }, }, - { $match: newSearchQuery }, { $project: { _id: 0, @@ -468,15 +471,24 @@ export async function getObjectResult(type, searchAll, searchQuery, startIndex, } } - // Get paged results based on query params - const searchResults = await collection - .aggregate(queryObject) - .skip(parseInt(startIndex)) - .limit(parseInt(maxResults)) - .catch(err => { + let time1 = Date.now(); + + let searchResults; + if (type === 'dataUseRegister') { + searchResults = await collection.aggregate(queryObject).catch(err => { console.log(err); }); - // Return data + } else { + searchResults = await collection + .aggregate(queryObject) + .skip(parseInt(startIndex)) + .limit(parseInt(maxResults)) + .catch(err => { + console.log(err); + }); + } + + console.log((Date.now() - time1) / 1000); return { data: searchResults }; } From 63f8451ef55c79c2b5ec31070d43c5954c8da48e Mon Sep 17 00:00:00 2001 From: Callum Reekie Date: Thu, 27 Jan 2022 17:55:07 +0000 Subject: [PATCH 3/7] further optimisation of DUR search pipeline --- src/resources/search/search.repository.js | 24 ++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/src/resources/search/search.repository.js b/src/resources/search/search.repository.js index 00799344..67b2b38e 100644 --- a/src/resources/search/search.repository.js +++ b/src/resources/search/search.repository.js @@ -151,9 +151,11 @@ export async function getObjectResult(type, searchAll, searchQuery, startIndex, } queryObject = [ - { $match: searchTerm }, + { $match: { ...searchTerm, ...newSearchQuery } }, { $addFields: { relatedResourcesCount: { $size: { $ifNull: ['$relatedObjects', []] } } } }, { $sort: dataUseSort }, + { $skip: parseInt(startIndex) }, + { $limit: maxResults }, { $lookup: { from: 'publishers', @@ -191,7 +193,6 @@ export async function getObjectResult(type, searchAll, searchQuery, startIndex, publisherInfo: { name: '$publisherDetails.name' }, }, }, - { $match: newSearchQuery }, { $project: { _id: 0, @@ -468,15 +469,20 @@ export async function getObjectResult(type, searchAll, searchQuery, startIndex, } } - // Get paged results based on query params - const searchResults = await collection - .aggregate(queryObject) - .skip(parseInt(startIndex)) - .limit(parseInt(maxResults)) - .catch(err => { + let searchResults; + if (type === 'dataUseRegister') { + searchResults = await collection.aggregate(queryObject).catch(err => { console.log(err); }); - // Return data + } else { + searchResults = await collection + .aggregate(queryObject) + .skip(parseInt(startIndex)) + .limit(parseInt(maxResults)) + .catch(err => { + console.log(err); + }); + } return { data: searchResults }; } From bbbe7a0d272b63d4b161b21483d3138bae9077c7 Mon Sep 17 00:00:00 2001 From: Callum Reekie Date: Thu, 27 Jan 2022 17:58:45 +0000 Subject: [PATCH 4/7] remove rogue console logs - again --- src/resources/search/search.repository.js | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/resources/search/search.repository.js b/src/resources/search/search.repository.js index a8089d9e..67b2b38e 100644 --- a/src/resources/search/search.repository.js +++ b/src/resources/search/search.repository.js @@ -132,8 +132,6 @@ export async function getObjectResult(type, searchAll, searchQuery, startIndex, } else if (type === 'dataUseRegister') { const searchTerm = (newSearchQuery && newSearchQuery['$and'] && newSearchQuery['$and'].find(exp => !_.isNil(exp['$text']))) || {}; - console.log(searchTerm); - if (searchTerm) { newSearchQuery['$and'] = newSearchQuery['$and'].filter(exp => !exp['$text']); } From 440b77a334d9d64577ebd65d678991e3c829d60d Mon Sep 17 00:00:00 2001 From: Callum Reekie Date: Fri, 28 Jan 2022 09:48:41 +0000 Subject: [PATCH 5/7] optimise --- src/resources/search/search.repository.js | 52 ++++++++++++----------- 1 file changed, 28 insertions(+), 24 deletions(-) diff --git a/src/resources/search/search.repository.js b/src/resources/search/search.repository.js index 67b2b38e..a48ea1b8 100644 --- a/src/resources/search/search.repository.js +++ b/src/resources/search/search.repository.js @@ -138,16 +138,22 @@ export async function getObjectResult(type, searchAll, searchQuery, startIndex, let dataUseSort = {}; - if (sort === '') { - dataUseSort = searchAll ? { lastActivity: -1 } : { score: { $meta: 'textScore' } }; - } else if (sort === 'relevance') { - dataUseSort = searchAll ? { projectTitle: 1 } : { score: { $meta: 'textScore' } }; - } else if (sort === 'popularity') { - dataUseSort = searchAll ? { counter: -1, projectTitle: 1 } : { counter: -1, score: { $meta: 'textScore' } }; - } else if (sort === 'latest') { - dataUseSort = searchAll ? { lastActivity: -1 } : { lastActivity: -1, score: { $meta: 'textScore' } }; - } else if (sort === 'resources') { - dataUseSort = searchAll ? { relatedResourcesCount: -1 } : { relatedResourcesCount: -1, score: { $meta: 'textScore' } }; + switch (sort) { + case '': + dataUseSort = searchAll ? { lastActivity: -1 } : { score: { $meta: 'textScore' } }; + break; + case 'relevance': + dataUseSort = searchAll ? { projectTitle: 1 } : { score: { $meta: 'textScore' } }; + break; + case 'popularity': + dataUseSort = searchAll ? { counter: -1, projectTitle: 1 } : { counter: -1, score: { $meta: 'textScore' } }; + break; + case 'latest': + dataUseSort = searchAll ? { lastActivity: -1 } : { lastActivity: -1, score: { $meta: 'textScore' } }; + break; + case 'resources': + dataUseSort = searchAll ? { relatedResourcesCount: -1 } : { relatedResourcesCount: -1, score: { $meta: 'textScore' } }; + break; } queryObject = [ @@ -469,20 +475,18 @@ export async function getObjectResult(type, searchAll, searchQuery, startIndex, } } - let searchResults; - if (type === 'dataUseRegister') { - searchResults = await collection.aggregate(queryObject).catch(err => { - console.log(err); - }); - } else { - searchResults = await collection - .aggregate(queryObject) - .skip(parseInt(startIndex)) - .limit(parseInt(maxResults)) - .catch(err => { - console.log(err); - }); - } + const searchResults = + type === 'dataUseRegister' + ? await collection.aggregate(queryObject).catch(err => { + console.log(err); + }) + : await collection + .aggregate(queryObject) + .skip(parseInt(startIndex)) + .limit(parseInt(maxResults)) + .catch(err => { + console.log(err); + }); return { data: searchResults }; } From 46f8163b86ee3555eb5e6c2b5be2c2325c963149 Mon Sep 17 00:00:00 2001 From: Callum Reekie Date: Fri, 28 Jan 2022 12:07:15 +0000 Subject: [PATCH 6/7] fix slow DUR filters --- .../dataUseRegister.repository.js | 40 +++++++++++++++++++ src/resources/filters/filters.service.js | 4 +- src/resources/search/search.repository.js | 25 +++++++----- 3 files changed, 57 insertions(+), 12 deletions(-) diff --git a/src/resources/dataUseRegister/dataUseRegister.repository.js b/src/resources/dataUseRegister/dataUseRegister.repository.js index ca74ed61..0beaa6cf 100644 --- a/src/resources/dataUseRegister/dataUseRegister.repository.js +++ b/src/resources/dataUseRegister/dataUseRegister.repository.js @@ -76,6 +76,46 @@ export default class DataUseRegisterRepository extends Repository { } } + async getDataUseRegistersFilters(query, options = {}) { + if (options.aggregate) { + const searchTerm = (query && query['$and'] && query['$and'].find(exp => !isNil(exp['$text']))) || {}; + + if (searchTerm) { + query['$and'] = query['$and'].filter(exp => !exp['$text']); + } + + const aggregateQuery = [ + { $match: searchTerm }, + { + $lookup: { + from: 'publishers', + localField: 'publisher', + foreignField: '_id', + as: 'publisherDetails', + }, + }, + { + $addFields: { + publisherInfo: { name: '$publisherDetails.name' }, + }, + }, + { $match: { $and: [...query['$and']] } }, + ]; + + if (query.fields) { + aggregateQuery.push({ + $project: query.fields.split(',').reduce((obj, key) => { + return { ...obj, [key]: 1 }; + }, {}), + }); + } + return DataUseRegister.aggregate(aggregateQuery); + } else { + const options = { lean: true }; + return this.find(query, options); + } + } + getDataUseRegisterByApplicationId(applicationId) { return this.dataUseRegister.findOne({ projectId: applicationId }, 'id').lean(); } diff --git a/src/resources/filters/filters.service.js b/src/resources/filters/filters.service.js index 101061e0..ea6c2bb8 100644 --- a/src/resources/filters/filters.service.js +++ b/src/resources/filters/filters.service.js @@ -150,7 +150,7 @@ export default class FiltersService { break; case 'dataUseRegister': fields = `organisationName,organisationSector,keywords,publisherDetails.name,fundersAndSponsors`; - entities = await this.DataUseRegisterRepository.getDataUseRegisters({ ...query, fields }, { aggregate: true }); + entities = await this.DataUseRegisterRepository.getDataUseRegistersFilters({ ...query, fields }, { aggregate: true }); break; } // 3. Loop over each entity @@ -319,7 +319,7 @@ export default class FiltersService { keywords, organisationName, organisationSector, - publisher: publisherDetails[0].name, + publisher: 'test', //publisherDetails[0].name, fundersAndSponsors, }; break; diff --git a/src/resources/search/search.repository.js b/src/resources/search/search.repository.js index a48ea1b8..90b7219a 100644 --- a/src/resources/search/search.repository.js +++ b/src/resources/search/search.repository.js @@ -157,11 +157,7 @@ export async function getObjectResult(type, searchAll, searchQuery, startIndex, } queryObject = [ - { $match: { ...searchTerm, ...newSearchQuery } }, - { $addFields: { relatedResourcesCount: { $size: { $ifNull: ['$relatedObjects', []] } } } }, - { $sort: dataUseSort }, - { $skip: parseInt(startIndex) }, - { $limit: maxResults }, + { $match: searchTerm }, { $lookup: { from: 'publishers', @@ -170,6 +166,16 @@ export async function getObjectResult(type, searchAll, searchQuery, startIndex, as: 'publisherDetails', }, }, + { + $addFields: { + publisherInfo: { name: '$publisherDetails.name' }, + }, + }, + { $match: newSearchQuery }, + { $addFields: { relatedResourcesCount: { $size: { $ifNull: ['$relatedObjects', []] } } } }, + { $sort: dataUseSort }, + { $skip: parseInt(startIndex) }, + { $limit: maxResults }, { $lookup: { from: 'tools', @@ -194,11 +200,6 @@ export async function getObjectResult(type, searchAll, searchQuery, startIndex, as: 'gatewayDatasetsInfo', }, }, - { - $addFields: { - publisherInfo: { name: '$publisherDetails.name' }, - }, - }, { $project: { _id: 0, @@ -475,6 +476,8 @@ export async function getObjectResult(type, searchAll, searchQuery, startIndex, } } + let time1 = Date.now(); + const searchResults = type === 'dataUseRegister' ? await collection.aggregate(queryObject).catch(err => { @@ -488,6 +491,8 @@ export async function getObjectResult(type, searchAll, searchQuery, startIndex, console.log(err); }); + console.log((Date.now() - time1) / 1000); + return { data: searchResults }; } From 9d80a739da62575122c3f4cb89ccd0ed60a23d52 Mon Sep 17 00:00:00 2001 From: Callum Reekie Date: Fri, 28 Jan 2022 12:08:39 +0000 Subject: [PATCH 7/7] fix slow DUR filters - update --- src/resources/filters/filters.service.js | 2 +- src/resources/search/search.repository.js | 4 ---- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/src/resources/filters/filters.service.js b/src/resources/filters/filters.service.js index ea6c2bb8..b184cfeb 100644 --- a/src/resources/filters/filters.service.js +++ b/src/resources/filters/filters.service.js @@ -319,7 +319,7 @@ export default class FiltersService { keywords, organisationName, organisationSector, - publisher: 'test', //publisherDetails[0].name, + publisher: publisherDetails[0].name, fundersAndSponsors, }; break; diff --git a/src/resources/search/search.repository.js b/src/resources/search/search.repository.js index 90b7219a..65b688d8 100644 --- a/src/resources/search/search.repository.js +++ b/src/resources/search/search.repository.js @@ -476,8 +476,6 @@ export async function getObjectResult(type, searchAll, searchQuery, startIndex, } } - let time1 = Date.now(); - const searchResults = type === 'dataUseRegister' ? await collection.aggregate(queryObject).catch(err => { @@ -491,8 +489,6 @@ export async function getObjectResult(type, searchAll, searchQuery, startIndex, console.log(err); }); - console.log((Date.now() - time1) / 1000); - return { data: searchResults }; }