diff --git a/flaskr.py b/flaskr.py index 3253772..676bc8f 100644 --- a/flaskr.py +++ b/flaskr.py @@ -1,15 +1,15 @@ import sys import os -import json -from opencricket.config import config - -from flask import Flask, request, abort, make_response sys.path.append(os.path.dirname(__file__)) +import json +from opencricket.config import config +from flask import Flask, request, abort, make_response from opencricket.chart.sentence_parser import SentenceParser from opencricket.chart.syntax_response import SyntaxResponse from opencricket.suggestion.productions import Productions +from opencricket.suggestion.suggestions import Suggestions from opencricket.chart.player_names import PlayerNames app = Flask(__name__) @@ -17,8 +17,8 @@ @app.route("/") def search(): + user_search = request.args.get('search', '') try: - user_search = request.args.get('search', '') player_names = PlayerNames(config.metadata_dir + 'player_names.txt').get_player_names(user_search) parser = SentenceParser(user_search, player_names) except Exception as e: @@ -27,26 +27,36 @@ def search(): abort(500) result = parser.parse_sentence() if result is not None: - return SyntaxResponse.build_response(result, False) + return json_response(SyntaxResponse.build_response(result, False)) else: - abort(422) + first_suggestion = Suggestions().first_suggestion(user_search) + if first_suggestion is not None: + parser = SentenceParser(first_suggestion, player_names) + did_you_mean = Suggestions().did_you_mean(user_search) + return json_response(SyntaxResponse.build_response(parser.parse_sentence(), True, first_suggestion, did_you_mean)) + else: + did_you_mean = Suggestions().did_you_mean(user_search) + if did_you_mean is not None: + return json_response(SyntaxResponse.build_did_you_mean_response(did_you_mean)) + else: + abort(422) + +@app.route("/related") +def related(): + return json_response(SyntaxResponse.build_related_search(Suggestions().related_search(request.args.get('search','')))) @app.route("/productions") def production(): - r = make_response(Productions().productions()) - r.mimetype = 'application/json' - return r - + return json_response(Productions().productions()) @app.route("/suggestions") def suggestions(): - return json_response(Productions().suggestions(request.args.get('search', ''))) + return json_response(Suggestions().suggestions(request.args.get('search', ''))) @app.route("/load_index") def load_index(): - r = make_response(Productions().load_index(config.exploded_dir)) - r.mimetype = 'application/json' - return r + Productions().load_index(config.exploded_dir) + return ok() @app.route("/create_index") def create_index(): diff --git a/opencricket/chart/sentence_parser.py b/opencricket/chart/sentence_parser.py index 588f195..3bc66ae 100644 --- a/opencricket/chart/sentence_parser.py +++ b/opencricket/chart/sentence_parser.py @@ -11,7 +11,7 @@ def __init__(self, sentence, player_names=None): if not player_names: player_names = [] - self.input = sentence + self.input = sentence.strip() title_case_pattern = re.compile('^[A-Z].*') title_case_words = [word.lower() for word in self.input.split(' ') if title_case_pattern.match(word)] + [ 'default'] diff --git a/opencricket/chart/syntax_response.py b/opencricket/chart/syntax_response.py index 0a9593b..52479af 100644 --- a/opencricket/chart/syntax_response.py +++ b/opencricket/chart/syntax_response.py @@ -3,8 +3,18 @@ class SyntaxResponse: @staticmethod - def build_response(syntax_string, suggested): + def build_response(syntax_string, suggested, suggested_search=None, did_you_mean=None): syntax_json = json.loads(syntax_string) response_json = {"root": list(syntax_json.keys())[0], "suggested": suggested} response_json.update(syntax_json) - return json.dumps(response_json) + if suggested: response_json.update({"suggested_search": suggested_search}) + if did_you_mean is not None: response_json.update({"did_you_mean": did_you_mean}) + return response_json + + @staticmethod + def build_did_you_mean_response(did_you_mean): + return {"did_you_mean": did_you_mean} + + @staticmethod + def build_related_search(related_search): + return {"related_search": related_search} \ No newline at end of file diff --git a/opencricket/config/es_config.py b/opencricket/config/es_config.py index a07fbc1..874a8dc 100644 --- a/opencricket/config/es_config.py +++ b/opencricket/config/es_config.py @@ -1,5 +1,15 @@ +from elasticsearch import Elasticsearch + index_settings='{ "settings": { "index": { "analysis": { "filter": { "stemmer": { "type": "stemmer", "language": "english" }, "autocompleteFilter": { "max_shingle_size": "5", "min_shingle_size": "2", "type": "shingle" }, "stopwords": { "type": "stop", "stopwords": [ "_english_" ] } }, "analyzer": { "didYouMean": { "filter": [ "lowercase" ], "char_filter": [ "html_strip" ], "type": "custom", "tokenizer": "standard" }, "autocomplete": { "filter": [ "lowercase", "autocompleteFilter" ], "char_filter": [ "html_strip" ], "type": "custom", "tokenizer": "standard" }, "default": { "filter": [ "lowercase", "stopwords", "stemmer" ], "char_filter": [ "html_strip" ], "type": "custom", "tokenizer": "standard" } } } } } }' mapping='{ "player_stats": { "properties": { "autocomplete": { "type": "string", "analyzer": "autocomplete" }, "did_you_mean": { "type": "string", "analyzer": "didYouMean" }, "question": { "type": "string", "copy_to": [ "autocomplete", "did_you_mean" ] } } } }' def es_suggestion(search_string): - return '{"suggest":{"didYouMean":{"text":"%s","phrase":{"field":"did_you_mean"}}},"query":{"match":{"question":"%s"}}}' % (search_string, search_string) \ No newline at end of file + return '{"suggest":{"didYouMean":{"text":"%s","phrase":{"field":"did_you_mean"}}},"query":{"match":{"question":"%s"}}}' % (search_string, search_string) + +def es_fuzzy_match(search_string): + return '{"query":{"match":{"question":{"query":"%s","fuzziness":3,"prefix_length":2}}}}' % search_string + + +def es_builder(hosts=None): + if (hosts == None): hosts = '127.0.0.1' + return Elasticsearch(hosts=hosts) diff --git a/opencricket/suggestion/productions.py b/opencricket/suggestion/productions.py index acadf38..ef7db28 100644 --- a/opencricket/suggestion/productions.py +++ b/opencricket/suggestion/productions.py @@ -6,9 +6,7 @@ from collections import Counter from os.path import basename from opencricket.chart.sentence_parser import SentenceParser -from _datetime import datetime import elasticsearch -from elasticsearch import Elasticsearch from elasticsearch import helpers from opencricket.config import es_config EXPANSIONS = 'expansions' @@ -18,8 +16,7 @@ class Productions: def __init__(self, es_host = None): - if(es_host == None): es_host = '127.0.0.1' - self.es = Elasticsearch(hosts=es_host) + self.es = es_config.es_builder(es_host) def productions(self): # TODO While producing expansions, use Map Reduce instead of Iteration @@ -39,7 +36,7 @@ def productions(self): syntax_expansions[key.__str__()] = list(stats_parser._leftcorner_words[key]) result.append({root: {SYNTAX: root_productions, EXPANSIONS: syntax_expansions}}) - return json.dumps(result) + return result def explode(self, expansions_dir, exploded_dir): @@ -47,7 +44,7 @@ def explode(self, expansions_dir, exploded_dir): for filename in glob.iglob(os.path.join(expansions_dir, '*.txt')): with codecs.open(filename, encoding='utf-8') as f: reference_expansions[os.path.splitext(basename(f.name))[0]] = f.read().splitlines() - productions = json.loads(self.productions()) + productions = self.productions() for production in productions: for key, syntax in production.items(): if (os.path.exists(os.path.join(exploded_dir, key))): os.remove(os.path.join(exploded_dir, key)) @@ -68,9 +65,6 @@ def explode(self, expansions_dir, exploded_dir): f.write('\n'.join([tmp % a for a in list(product(*final_items))]) + '\n') - def suggestions(self, search_string): - return self.es.search(index='opencricket', body=es_config.es_suggestion(search_string)) - def create_index(self): self.es.indices.create(index='opencricket', body=es_config.index_settings) self.es.indices.put_mapping(index='opencricket', doc_type='player_stats', body=es_config.mapping) @@ -81,10 +75,9 @@ def load_index(self, exploded_dir): "_index": "opencricket", "_type": "player_stats", "_source": { - "question": line + "question": line.strip() }} for line in f] elasticsearch.helpers.bulk(self.es,actions, chunk_size=100000) - return json.dumps({'status': 'ok'}) def delete_index(self): self.es.indices.delete(index='opencricket') diff --git a/opencricket/suggestion/suggestions.py b/opencricket/suggestion/suggestions.py new file mode 100644 index 0000000..63f5d0e --- /dev/null +++ b/opencricket/suggestion/suggestions.py @@ -0,0 +1,33 @@ +from opencricket.config import es_config + + +class Suggestions: + def __init__(self, es_host=None): + self.es = es_config.es_builder(es_host) + + def suggestions(self, search_string): + return self.es.search(index='opencricket', body=es_config.es_suggestion(search_string)) + + def first_suggestion(self, search_string): + suggestions = self.suggestions(search_string) + hits = suggestions['hits']['hits'] + if len(hits) > 0: + return hits[0]['_source']['question'] + else: + return None + + def related_search(self, search_string): + fuzzy_matches = self.es.search(index='opencricket', body=es_config.es_fuzzy_match(search_string)) + hits = fuzzy_matches['hits']['hits'] + if len(hits) > 0: + return [match['_source']['question'] for match in hits] + else: + return None + + def did_you_mean(self, search_string): + suggestions = self.suggestions(search_string) + did_you_mean_options = suggestions['suggest']['didYouMean'][0]['options'] + if len(did_you_mean_options) > 0: + return [dym['text'] for dym in did_you_mean_options] + else: + return None diff --git a/tests/integration/test_search.py b/tests/integration/test_search.py new file mode 100644 index 0000000..a51abd4 --- /dev/null +++ b/tests/integration/test_search.py @@ -0,0 +1,18 @@ +import unittest +import json +import flaskr + +class TestSearch(unittest.TestCase): + + def setUp(self): + self.app = flaskr.app.test_client() + self.expected_response = json.loads('{"suggested": false, "root": "player_stats", "player_stats": {"word_stats": "stats", "player": {"player1": "virat", "player2": "kohli"}}}') + + def test_search(self): + rv = self.app.get('/?search=Virat Kohli Stats') + self.assertEqual(rv._status_code, 200) + self.assertEqual(json.loads(rv.data.decode("utf-8")), self.expected_response) + +if __name__ == '__main__': + unittest.main() +