So you want to set up a search index in Javascript? Here is one way to do it. You can of course make many variations on this theme- consult the rest of the documentation for options and alternatives.
Find some data in the right format. You could for example index 1000 old Reuters articles, like the ones found here
Initialize a search index and add the data
const indexData = function(err, newIndex) {
if (!err) {
index = newIndex
request(url).pipe(index.feed())
}
}
require('search-index')(ops, indexData)
Run a search query
index.search({
query: [{
AND: {
'*': ['search', 'words'] // search for "search" and "words" in all ("*") fields
}
}]
}).on('data', printResults) // make pretty results
Save this file as index.js
, do npm install JSONStream chalk request term-cluster
, and run it as node index.js
to get a really basic CLI search engine for some old Reuters articles.
const chalk = require('chalk')
const request = require('request')
const tc = require('term-cluster')
const url = 'https://raw.githubusercontent.com/fergiemcdowall/reuters-21578-json/master/data/fullFileStream/justTen.str'
const ops = {
indexPath: 'myCoolIndex',
logLevel: 'error'
}
var index
const indexData = function(err, newIndex) {
if (!err) {
index = newIndex
request(url)
.pipe(index.feed()
.on('finish', searchCLI)
}
}
const printPrompt = function () {
console.log()
console.log()
process.stdout.write('search > ')
}
const searchCLI = function () {
printPrompt()
process.stdin.resume()
process.stdin.on('data', search)
}
const search = function(rawQuery) {
index.search(rawQuery.toString().replace( /\r?\n|\r/g, '' ))
.on('data', printResults)
.on('end', printPrompt)
}
const printResults = function (data) {
console.log('\n' + chalk.blue(data.document.id) + ' : ' + chalk.blue(data.document.title))
const terms = Object.keys(data.scoringCriteria[0].df).map(function(item) {
return item.substring(2)
})
for (var key in data.document) {
if (data.document[key]) {
var teaser = tc(data.document[key], terms)
if (teaser) console.log(teaser)
}
}
console.log()
}
require('search-index')(ops, indexData)