Skip to content

Commit

Permalink
Suggestions - Related search (fuzzy) & Did you mean
Browse files Browse the repository at this point in the history
  • Loading branch information
emaillenin committed Mar 7, 2015
1 parent 6322e0a commit 44f8051
Show file tree
Hide file tree
Showing 7 changed files with 104 additions and 30 deletions.
40 changes: 25 additions & 15 deletions flaskr.py
Original file line number Diff line number Diff line change
@@ -1,24 +1,24 @@
import sys
import os
import json
from opencricket.config import config

from flask import Flask, request, abort, make_response

sys.path.append(os.path.dirname(__file__))

import json
from opencricket.config import config
from flask import Flask, request, abort, make_response
from opencricket.chart.sentence_parser import SentenceParser
from opencricket.chart.syntax_response import SyntaxResponse
from opencricket.suggestion.productions import Productions
from opencricket.suggestion.suggestions import Suggestions
from opencricket.chart.player_names import PlayerNames

app = Flask(__name__)


@app.route("/")
def search():
user_search = request.args.get('search', '')
try:
user_search = request.args.get('search', '')
player_names = PlayerNames(config.metadata_dir + 'player_names.txt').get_player_names(user_search)
parser = SentenceParser(user_search, player_names)
except Exception as e:
Expand All @@ -27,26 +27,36 @@ def search():
abort(500)
result = parser.parse_sentence()
if result is not None:
return SyntaxResponse.build_response(result, False)
return json_response(SyntaxResponse.build_response(result, False))
else:
abort(422)
first_suggestion = Suggestions().first_suggestion(user_search)
if first_suggestion is not None:
parser = SentenceParser(first_suggestion, player_names)
did_you_mean = Suggestions().did_you_mean(user_search)
return json_response(SyntaxResponse.build_response(parser.parse_sentence(), True, first_suggestion, did_you_mean))
else:
did_you_mean = Suggestions().did_you_mean(user_search)
if did_you_mean is not None:
return json_response(SyntaxResponse.build_did_you_mean_response(did_you_mean))
else:
abort(422)

@app.route("/related")
def related():
return json_response(SyntaxResponse.build_related_search(Suggestions().related_search(request.args.get('search',''))))

@app.route("/productions")
def production():
r = make_response(Productions().productions())
r.mimetype = 'application/json'
return r

return json_response(Productions().productions())

@app.route("/suggestions")
def suggestions():
return json_response(Productions().suggestions(request.args.get('search', '')))
return json_response(Suggestions().suggestions(request.args.get('search', '')))

@app.route("/load_index")
def load_index():
r = make_response(Productions().load_index(config.exploded_dir))
r.mimetype = 'application/json'
return r
Productions().load_index(config.exploded_dir)
return ok()

@app.route("/create_index")
def create_index():
Expand Down
2 changes: 1 addition & 1 deletion opencricket/chart/sentence_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ def __init__(self, sentence, player_names=None):

if not player_names:
player_names = []
self.input = sentence
self.input = sentence.strip()
title_case_pattern = re.compile('^[A-Z].*')
title_case_words = [word.lower() for word in self.input.split(' ') if title_case_pattern.match(word)] + [
'default']
Expand Down
14 changes: 12 additions & 2 deletions opencricket/chart/syntax_response.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,18 @@

class SyntaxResponse:
@staticmethod
def build_response(syntax_string, suggested):
def build_response(syntax_string, suggested, suggested_search=None, did_you_mean=None):
syntax_json = json.loads(syntax_string)
response_json = {"root": list(syntax_json.keys())[0], "suggested": suggested}
response_json.update(syntax_json)
return json.dumps(response_json)
if suggested: response_json.update({"suggested_search": suggested_search})
if did_you_mean is not None: response_json.update({"did_you_mean": did_you_mean})
return response_json

@staticmethod
def build_did_you_mean_response(did_you_mean):
return {"did_you_mean": did_you_mean}

@staticmethod
def build_related_search(related_search):
return {"related_search": related_search}
12 changes: 11 additions & 1 deletion opencricket/config/es_config.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,15 @@
from elasticsearch import Elasticsearch

index_settings='{ "settings": { "index": { "analysis": { "filter": { "stemmer": { "type": "stemmer", "language": "english" }, "autocompleteFilter": { "max_shingle_size": "5", "min_shingle_size": "2", "type": "shingle" }, "stopwords": { "type": "stop", "stopwords": [ "_english_" ] } }, "analyzer": { "didYouMean": { "filter": [ "lowercase" ], "char_filter": [ "html_strip" ], "type": "custom", "tokenizer": "standard" }, "autocomplete": { "filter": [ "lowercase", "autocompleteFilter" ], "char_filter": [ "html_strip" ], "type": "custom", "tokenizer": "standard" }, "default": { "filter": [ "lowercase", "stopwords", "stemmer" ], "char_filter": [ "html_strip" ], "type": "custom", "tokenizer": "standard" } } } } } }'
mapping='{ "player_stats": { "properties": { "autocomplete": { "type": "string", "analyzer": "autocomplete" }, "did_you_mean": { "type": "string", "analyzer": "didYouMean" }, "question": { "type": "string", "copy_to": [ "autocomplete", "did_you_mean" ] } } } }'

def es_suggestion(search_string):
return '{"suggest":{"didYouMean":{"text":"%s","phrase":{"field":"did_you_mean"}}},"query":{"match":{"question":"%s"}}}' % (search_string, search_string)
return '{"suggest":{"didYouMean":{"text":"%s","phrase":{"field":"did_you_mean"}}},"query":{"match":{"question":"%s"}}}' % (search_string, search_string)

def es_fuzzy_match(search_string):
return '{"query":{"match":{"question":{"query":"%s","fuzziness":3,"prefix_length":2}}}}' % search_string


def es_builder(hosts=None):
if (hosts == None): hosts = '127.0.0.1'
return Elasticsearch(hosts=hosts)
15 changes: 4 additions & 11 deletions opencricket/suggestion/productions.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,7 @@
from collections import Counter
from os.path import basename
from opencricket.chart.sentence_parser import SentenceParser
from _datetime import datetime
import elasticsearch
from elasticsearch import Elasticsearch
from elasticsearch import helpers
from opencricket.config import es_config
EXPANSIONS = 'expansions'
Expand All @@ -18,8 +16,7 @@
class Productions:

def __init__(self, es_host = None):
if(es_host == None): es_host = '127.0.0.1'
self.es = Elasticsearch(hosts=es_host)
self.es = es_config.es_builder(es_host)

def productions(self):
# TODO While producing expansions, use Map Reduce instead of Iteration
Expand All @@ -39,15 +36,15 @@ def productions(self):
syntax_expansions[key.__str__()] = list(stats_parser._leftcorner_words[key])

result.append({root: {SYNTAX: root_productions, EXPANSIONS: syntax_expansions}})
return json.dumps(result)
return result


def explode(self, expansions_dir, exploded_dir):
reference_expansions = {}
for filename in glob.iglob(os.path.join(expansions_dir, '*.txt')):
with codecs.open(filename, encoding='utf-8') as f:
reference_expansions[os.path.splitext(basename(f.name))[0]] = f.read().splitlines()
productions = json.loads(self.productions())
productions = self.productions()
for production in productions:
for key, syntax in production.items():
if (os.path.exists(os.path.join(exploded_dir, key))): os.remove(os.path.join(exploded_dir, key))
Expand All @@ -68,9 +65,6 @@ def explode(self, expansions_dir, exploded_dir):
f.write('\n'.join([tmp % a for a in list(product(*final_items))]) + '\n')


def suggestions(self, search_string):
return self.es.search(index='opencricket', body=es_config.es_suggestion(search_string))

def create_index(self):
self.es.indices.create(index='opencricket', body=es_config.index_settings)
self.es.indices.put_mapping(index='opencricket', doc_type='player_stats', body=es_config.mapping)
Expand All @@ -81,10 +75,9 @@ def load_index(self, exploded_dir):
"_index": "opencricket",
"_type": "player_stats",
"_source": {
"question": line
"question": line.strip()
}} for line in f]
elasticsearch.helpers.bulk(self.es,actions, chunk_size=100000)
return json.dumps({'status': 'ok'})

def delete_index(self):
self.es.indices.delete(index='opencricket')
Expand Down
33 changes: 33 additions & 0 deletions opencricket/suggestion/suggestions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
from opencricket.config import es_config


class Suggestions:
def __init__(self, es_host=None):
self.es = es_config.es_builder(es_host)

def suggestions(self, search_string):
return self.es.search(index='opencricket', body=es_config.es_suggestion(search_string))

def first_suggestion(self, search_string):
suggestions = self.suggestions(search_string)
hits = suggestions['hits']['hits']
if len(hits) > 0:
return hits[0]['_source']['question']
else:
return None

def related_search(self, search_string):
fuzzy_matches = self.es.search(index='opencricket', body=es_config.es_fuzzy_match(search_string))
hits = fuzzy_matches['hits']['hits']
if len(hits) > 0:
return [match['_source']['question'] for match in hits]
else:
return None

def did_you_mean(self, search_string):
suggestions = self.suggestions(search_string)
did_you_mean_options = suggestions['suggest']['didYouMean'][0]['options']
if len(did_you_mean_options) > 0:
return [dym['text'] for dym in did_you_mean_options]
else:
return None
18 changes: 18 additions & 0 deletions tests/integration/test_search.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
import unittest
import json
import flaskr

class TestSearch(unittest.TestCase):

def setUp(self):
self.app = flaskr.app.test_client()
self.expected_response = json.loads('{"suggested": false, "root": "player_stats", "player_stats": {"word_stats": "stats", "player": {"player1": "virat", "player2": "kohli"}}}')

def test_search(self):
rv = self.app.get('/?search=Virat Kohli Stats')
self.assertEqual(rv._status_code, 200)
self.assertEqual(json.loads(rv.data.decode("utf-8")), self.expected_response)

if __name__ == '__main__':
unittest.main()

0 comments on commit 44f8051

Please sign in to comment.