From 33f4f8564353f10ce2288e8c624cfb0df74d69de Mon Sep 17 00:00:00 2001 From: Justin Littman Date: Wed, 30 Oct 2019 16:52:48 -0400 Subject: [PATCH 1/2] Deconfigures indexing for clarity. --- __tests__/Indexer.test.js | 18 ++--- config/default.js | 61 --------------- src/Indexer.js | 63 +++++---------- src/ResourceIndexer.js | 139 ++++++++++++++++++---------------- src/SinopiaTemplateIndexer.js | 33 ++++++++ 5 files changed, 131 insertions(+), 183 deletions(-) diff --git a/__tests__/Indexer.test.js b/__tests__/Indexer.test.js index 3be3063..e5f62dc 100644 --- a/__tests__/Indexer.test.js +++ b/__tests__/Indexer.test.js @@ -93,9 +93,7 @@ describe('Indexer', () => { 'There is nothing either good or bad, but thinking makes it so.' ], subtitle: ['A Tragic Tale about a Prince of Denmark'], - 'subtitle-suggest': ['a', 'tragic', 'tale', 'about', 'a', 'prince', 'of', 'denmark'], title: ['Hamlet'], - 'title-suggest': ['hamlet'], type: ['http://id.loc.gov/ontologies/bibframe/AbbreviatedTitle'], uri: 'http://foo.bar/12345', created: '2019-10-18T16:08:43.300Z', @@ -212,7 +210,6 @@ describe('Indexer', () => { 'n' ], title: ['What factors influence the quality of hazard mitigation plans in Washington State?'], - 'title-suggest': ['what', 'factors', 'influence', 'the', 'quality', 'of', 'hazard', 'mitigation', 'plans', 'in', 'washington', 'state?'], type: ['http://rdaregistry.info/Elements/c/C10006'], uri: 'http://foo.bar/12345', created: '2019-10-23T15:40:51.049Z', @@ -357,9 +354,7 @@ describe('Indexer', () => { 'There is nothing either good or bad, but thinking makes it so.' ], subtitle: ['A Tragic Tale about a Prince of Denmark'], - 'subtitle-suggest': ['a', 'tragic', 'tale', 'about', 'a', 'prince', 'of', 'denmark'], title: ['Hamlet'], - 'title-suggest': ['hamlet'], type:[ [ 'http://id.loc.gov/ontologies/bibframe/AbbreviatedTitle', 'http://id.loc.gov/ontologies/bibframe/WorkTitle' @@ -519,9 +514,8 @@ describe('Indexer', () => { it('does not log an error', async () => { await indexer.setupIndices() - const indexCount = Object.keys(config.get('indexMappings')).length - expect(createSpy).toHaveBeenCalledTimes(indexCount) - expect(mappingSpy).toHaveBeenCalledTimes(indexCount) + expect(createSpy).toHaveBeenCalledTimes(2) + expect(mappingSpy).toHaveBeenCalledTimes(2) expect(logSpy).not.toHaveBeenCalled() }) }) @@ -574,16 +568,16 @@ describe('Indexer', () => { describe('indexFrom()', () => { it('returns the resource index name by default', () => { - expect(indexer.indexFrom([])[0]).toBe('sinopia_resources') + expect(indexer.indexFrom([])).toBe('sinopia_resources') }) it('returns the resource index name for a resource', () => { - expect(indexer.indexFrom(resourceObjectTypes)[0]).toBe('sinopia_resources') + expect(indexer.indexFrom(resourceObjectTypes)).toBe('sinopia_resources') }) it('returns the non RDF index name when types includes LDP-NRS', () => { - expect(indexer.indexFrom(['http://www.w3.org/ns/ldp#NonRDFSource'])[0]).toBe('sinopia_templates') + expect(indexer.indexFrom(['http://www.w3.org/ns/ldp#NonRDFSource'])).toBe('sinopia_templates') }) it('returns undefined for a container', () => { - expect(indexer.indexFrom(containerObjectTypes)[0]).toBe(undefined) + expect(indexer.indexFrom(containerObjectTypes)).toBe(undefined) }) }) }) diff --git a/config/default.js b/config/default.js index 2509cc0..c53873e 100644 --- a/config/default.js +++ b/config/default.js @@ -15,67 +15,6 @@ module.exports = { defaultMimeType: process.env.DEFAULT_MIME_TYPE || 'application/ld+json', indexType: process.env.INDEX_TYPE || 'sinopia', indexUrl: process.env.INDEX_URL || 'http://localhost:9200', - // Note that InputLookupSinopia expects uri and label fields. - indexMappings: process.env.INDEX_FIELD_MAPPINGS - ? JSON.parse(process.env.INDEX_FIELD_MAPPINGS) - : { - sinopia_resources: { - store_document: false, - fields: { - title: { - type: 'text', - path: '$..[mainTitle,P10223,P20315,P40085,P30156]', //BIBFRAME and RDA - autosuggest: true - }, - subtitle: { - type: 'text', - path: '$..subtitle', - autosuggest: true - }, - type: { - type: 'keyword', - store: true - }, - uri: { - type: 'keyword', - id: true, - store: true, - index: true - }, - label: { - type: 'keyword', - // If not title, then URI. - fields: [['title', 'subtitle'], ['uri']], - joinby: ': ', - store: true, - index: false - }, - created: { - type: 'date', - asTypes: ['Create'], - store: true, - index: true - }, - modified: { - type: 'date', - asTypes: ['Create', 'Update'], - store: true, - index: true - }, - text: { - type: 'text', - path: '$..*', - store: false, - index: true - } - } - }, - // Not yet mapped - sinopia_templates: { - store_document: true, - fields: {} - } - }, nonRdfTypeURI: process.env.NON_RDF_TYPE_URI || 'http://www.w3.org/ns/ldp#NonRDFSource', nonRdfMimeType: process.env.NON_RDF_MIME_TYPE || 'application/json', debug: process.env.DEBUG !== undefined ? process.env.DEBUG : true diff --git a/src/Indexer.js b/src/Indexer.js index 5fc35ce..40c8969 100644 --- a/src/Indexer.js +++ b/src/Indexer.js @@ -15,6 +15,10 @@ export default class Indexer { this.logger = new Logger() this.knownIndexResults = ['created', 'updated'] this.knownDeleteResults = ['deleted'] + this.indexers = { + sinopia_templates: SinopiaTemplateIndexer, + sinopia_resources : ResourceIndexer + } } /** @@ -25,21 +29,20 @@ export default class Indexer { * @returns {Promise} resolves to true if successful; null if not */ async index(json, uri, types) { - const [index, store_document, fields] = this.indexFrom(types) + const index = this.indexFrom(types) this.logger.debug(`${uri} (${types}) has index ${index}`) - if (index === undefined) { + + const indexer = this.indexers[index] + if (indexer === undefined) { this.logger.debug(`skipping indexing ${uri} (${types})`) return true } - const indexer = index === 'sinopia_templates' ? SinopiaTemplateIndexer : ResourceIndexer - - const body = new indexer(json, uri, store_document, fields).index() return this.client.index({ index: index, - type: config.get('indexType'), + type: 'sinopia', id: this.identifierFrom(uri), - body: body + body: new indexer(json, uri).index() }).then(indexResponse => { if (!this.knownIndexResults.includes(indexResponse.result)) throw { message: JSON.stringify(indexResponse) } @@ -58,14 +61,14 @@ export default class Indexer { * @param {Promise} resolves to types - one or more LDP type URIs */ async delete(uri, types) { - const [index] = this.indexFrom(types) + const index = this.indexFrom(types) if (index === undefined) { this.logger.debug(`skipping deleting ${uri} (${types})`) return true } return this.client.delete({ index, - type: config.get('indexType'), + type: 'sinopia', id: this.identifierFrom(uri) }).then(indexResponse => { if (!this.knownDeleteResults.includes(indexResponse.result)) @@ -93,12 +96,10 @@ export default class Indexer { /** * Create indices, if needed, and add field mappings - * @returns {null} */ async setupIndices() { - const indexMappings = config.get('indexMappings') try { - for (const index of Object.keys(indexMappings)) { + for (const index of Object.keys(this.indexers)) { const indexExists = await this.client.indices.exists({ index: index }) if (!indexExists) { @@ -110,8 +111,8 @@ export default class Indexer { await this.client.indices.putMapping({ index: index, - type: config.get('indexType'), - body: this.buildMappingsFromConfig(indexMappings[index].fields) + type: 'sinopia', + body: this.indexers[index].indexMapping }) } } catch(error) { @@ -120,32 +121,6 @@ export default class Indexer { return null } - /** - * Build field mappings from configuration - * @param {Object} fields - Field configuration - * @returns {Object} - */ - buildMappingsFromConfig(fields) { - const mappingObject = { properties: {} } - - for (const [fieldName, fieldProperties] of Object.entries(fields)) { - mappingObject.properties[fieldName] = { - type: fieldProperties.type, - store: fieldProperties.store == true, - index: fieldProperties.index == false ? false : true - } - - if (fieldProperties.autosuggest) { - mappingObject.properties[`${fieldName}-suggest`] = { - type: 'completion' - } - } - } - - return mappingObject - } - - /** * Remove and recreate all known indices * @returns {Promise} resolves to null upon completion (errors, if any, are logged) @@ -175,12 +150,10 @@ export default class Indexer { /** * Returns index information given a list of LDP types. * @param {Array} types - LDP type URIs of object - * @returns {[string, store_document, fields]} name of index | undefined if should not index, whether to store document, field configuration + * @returns {string]} name of index or undefined */ indexFrom(types) { - if (types.includes('http://www.w3.org/ns/ldp#BasicContainer')) return [undefined, undefined, undefined] - const indexMappings = config.get('indexMappings') - const index = types.includes(config.get('nonRdfTypeURI')) ? 'sinopia_templates' : 'sinopia_resources' - return [index, indexMappings[index].store_document, indexMappings[index].fields] + if (types.includes('http://www.w3.org/ns/ldp#BasicContainer')) return undefined + return types.includes(config.get('nonRdfTypeURI')) ? 'sinopia_templates' : 'sinopia_resources' } } diff --git a/src/ResourceIndexer.js b/src/ResourceIndexer.js index bc0fe88..af09cfa 100644 --- a/src/ResourceIndexer.js +++ b/src/ResourceIndexer.js @@ -5,19 +5,13 @@ export default class { * Builds up an index entry out of a JSON body, given index field mappings from config * @param {Object} json - A Trellis RDFSource * @param {string} uri - Trellis URI of the document - * @param {boolean} store_document - Whether to add the document to the indexed object - * @param {Object} fields - Configuration for fields to be indexed * @returns {Object} an object containing configured field values if any found */ - constructor(json, uri, store_document, fields) { + constructor(json, uri) { this.indexObject = {} - if(store_document) { - this.indexObject.document = json - } this.uri = uri this.json = json - this.fields = fields } /** @@ -25,12 +19,15 @@ export default class { * @returns {Object} an object containing configured field values if any found */ index() { - this.buildIndexEntryFields() - this.buildAggregateFields() - this.buildAutosuggest() - this.buildActivityStreamFields() - this.buildRDFTypes() + this.indexObject['uri'] = this.uri + this.buildFromPath('title', '$..[mainTitle,P10223,P20315,P40085,P30156]') //BIBFRAME and RDA + this.buildFromPath('subtitle', '$..subtitle') + this.buildLabel() + this.buildFromPath('text', '$..*') + this.buildActivityStreamField('created', ['Create']) + this.buildActivityStreamField('modified', ['Create', 'Update']) + this.buildRDFTypes() if (!this.indexObject.uri || !this.indexObject.label) { throw `${this.uri} requires a uri and label: ${this.indexObject}` @@ -39,70 +36,35 @@ export default class { return this.indexObject } - buildIndexEntryFields() { - for (const [fieldName, fieldProperties] of Object.entries(this.fields)) { - if(fieldProperties.id) { - this.indexObject[fieldName] = this.uri - } else if (fieldProperties.path) { - const fieldValues = JSONPath({ - json: this.json, - path: fieldProperties.path, - flatten: true - }) - .filter(obj => obj['@value']) // Filter out fields without values, e.g., from the @context object - .map(obj => obj['@value']) // Extract the value and ignore the @language for now (this is currently coupled to how titles are modeled) - if (fieldValues.length > 0) this.indexObject[fieldName] = fieldValues - } - } - } + buildFromPath(fieldName, path) { + const fieldValues = JSONPath({ + json: this.json, + path: path, + flatten: true + }) + .filter(obj => obj['@value']) // Filter out fields without values, e.g., from the @context object + .map(obj => obj['@value']) // Extract the value and ignore the @language for now (this is currently coupled to how titles are modeled) + if (fieldValues.length > 0) this.indexObject[fieldName] = fieldValues - buildAggregateFields() { - for (const [fieldName, fieldProperties] of Object.entries(this.fields)) { - if (fieldProperties.fields) { - const fieldValues = fieldProperties.fields.map((fields) => { - return this.getFieldValues(fields) - // if (values.length > 0) return values.join(fieldProperties.joinby || ' ') - }).filter((values) => values.length > 0) - if(fieldValues.length > 0) { - this.indexObject[fieldName] = fieldValues[0].join(fieldProperties.joinby || ' ') - } - } - } } - getFieldValues(fields) { - const values = [] - fields.forEach((fieldName) => { + buildLabel() { + const labelValues = [] + const fieldNames = ['title', 'subtitle'] + fieldNames.forEach((fieldName) => { if (this.indexObject[fieldName] && this.indexObject[fieldName].length > 0) { - values.push(this.indexObject[fieldName]) + labelValues.push(this.indexObject[fieldName]) } }) - return values + this.indexObject['label'] = labelValues.length > 0 ? labelValues.join(': ') : this.uri } - buildAutosuggest() { - for (const [fieldName, fieldProperties] of Object.entries(this.fields)) { - if (fieldProperties.autosuggest && this.indexObject[fieldName] && this.indexObject[fieldName].length > 0) { - this.indexObject[`${fieldName}-suggest`] = this.indexObject[fieldName].join(' ').split(' ').map(token => token.toLowerCase()) - } - } - } - - buildActivityStreamFields() { - for (const [fieldName, fieldProperties] of Object.entries(this.fields)) { - if(fieldProperties.asTypes) { - const asDate = this.getActivityStreamDate(fieldProperties.asTypes, this.json['@graph']) - if (asDate) this.indexObject[fieldName] = asDate - } - } - } - - getActivityStreamDate(asTypes, graph) { - const dates = graph + buildActivityStreamField(fieldName, asTypes) { + const dates = this.json['@graph'] .filter((item) => item.atTime && item['@type']) .filter((item) => item['@type'].some((type) => asTypes.map((asType) => `as:${asType}`).includes(type))) .map((item) => item.atTime).sort().reverse() - return dates.length > 0 ? dates[0] : undefined + if (dates.length > 0) this.indexObject[fieldName] = dates[0] } buildRDFTypes() { @@ -111,4 +73,51 @@ export default class { .filter((item) => item['@id'] === '') .map(item => item['@type']) } + + static get indexMapping() { + return { + properties: { + title: { + type: 'text', + store: true, + index: true + }, + subtitle: { + type: 'text', + store: true, + index: true + }, + type: { + type: 'keyword', + store: true, + index: true + }, + uri: { + type: 'keyword', + store: true, + index: true + }, + label: { + type: 'keyword', + store: true, + index: false + }, + created: { + type: 'date', + store: true, + index: true + }, + modified: { + type: 'date', + store: true, + index: true + }, + text: { + type: 'text', + store: false, + index: true + } + } + } + } } diff --git a/src/SinopiaTemplateIndexer.js b/src/SinopiaTemplateIndexer.js index 010c5fd..1f37fad 100644 --- a/src/SinopiaTemplateIndexer.js +++ b/src/SinopiaTemplateIndexer.js @@ -22,4 +22,37 @@ export default class { return this.indexObject } + + static get indexMapping() { + return { + properties: { + author: { + type: 'text', + store: true, + index: true + }, + date: { + type: 'date', + store: true, + index: false + }, + remark: { + type: 'text', + store: true, + index: true + }, + resourceLabel: { + type: 'text', + store: true, + index: true + }, + resourceURI: { + type: 'keyword', + store: true, + index: true + }, + } + } + } + } From a0c4db7daf81909e791809b33dc0da6e9bd5e568 Mon Sep 17 00:00:00 2001 From: Justin Littman Date: Wed, 30 Oct 2019 18:16:01 -0400 Subject: [PATCH 2/2] Removed search.js --- README.md | 8 ------- search.js | 60 -------------------------------------------------- src/Indexer.js | 8 +++---- 3 files changed, 4 insertions(+), 72 deletions(-) delete mode 100644 search.js diff --git a/README.md b/README.md index e2278e1..2bda8a8 100644 --- a/README.md +++ b/README.md @@ -67,14 +67,6 @@ $ curl -i -X POST -H 'Content-Type: application/ld+json' -H 'Link: { - console.log(`querying ElasticSearch for "${query}" (full-text search)`) - - const result = await client.search({ - index: config.get('resourceIndexName'), - type: config.get('indexType'), - body: { - query: { - multi_match: { - query: query, - fields: Object.keys(config.get('indexFieldMappings')) - } - } - } - }) - - console.dir(result.hits) -} - -const suggestSearch = async () => { - console.log(`querying ElasticSearch for "${query}" (suggest search)`) - - const suggestBody = { text: query } - - for (const [fieldName, fieldProperties] of Object.entries(config.get('indexFieldMappings'))) { - if (!fieldProperties.autosuggest) { - continue - } - - suggestBody[fieldName] = { - completion: { - field: `${fieldName}-suggest` - } - } - } - - const result = await client.search({ - index: config.get('resourceIndexName'), - type: config.get('indexType'), - body: { - suggest: suggestBody - } - }) - - console.dir(result) - console.dir(result.suggest) -} - -const runSearches = async () => { - await fullTextSearch() - await suggestSearch() -} - -runSearches() diff --git a/src/Indexer.js b/src/Indexer.js index 40c8969..bc55c72 100644 --- a/src/Indexer.js +++ b/src/Indexer.js @@ -17,7 +17,7 @@ export default class Indexer { this.knownDeleteResults = ['deleted'] this.indexers = { sinopia_templates: SinopiaTemplateIndexer, - sinopia_resources : ResourceIndexer + sinopia_resources: ResourceIndexer } } @@ -40,7 +40,7 @@ export default class Indexer { return this.client.index({ index: index, - type: 'sinopia', + type: config.get('indexType'), id: this.identifierFrom(uri), body: new indexer(json, uri).index() }).then(indexResponse => { @@ -68,7 +68,7 @@ export default class Indexer { } return this.client.delete({ index, - type: 'sinopia', + type: config.get('indexType'), id: this.identifierFrom(uri) }).then(indexResponse => { if (!this.knownDeleteResults.includes(indexResponse.result)) @@ -111,7 +111,7 @@ export default class Indexer { await this.client.indices.putMapping({ index: index, - type: 'sinopia', + type: config.get('indexType'), body: this.indexers[index].indexMapping }) }