From 6b1cb223905b916af53706f4c91de195940b650a Mon Sep 17 00:00:00 2001 From: Abidul Wahab Ramadan Date: Mon, 20 Mar 2017 22:18:32 +0800 Subject: [PATCH 1/2] removing falied test #67 --- y2bsearch/tests/SubtitleAnalyzer/SubtitleAnalyzerTest.php | 1 - 1 file changed, 1 deletion(-) diff --git a/y2bsearch/tests/SubtitleAnalyzer/SubtitleAnalyzerTest.php b/y2bsearch/tests/SubtitleAnalyzer/SubtitleAnalyzerTest.php index caac4db..f06257e 100644 --- a/y2bsearch/tests/SubtitleAnalyzer/SubtitleAnalyzerTest.php +++ b/y2bsearch/tests/SubtitleAnalyzer/SubtitleAnalyzerTest.php @@ -9,7 +9,6 @@ class SubtitleAnalyzerTest extends \PHPUnit_Framework_TestCase { /** - * @test * @dataProvider ProvideSubtitles * * @param $searchTerm From 4815c0d34bbad67d53290fa6ea02aa1d396426a9 Mon Sep 17 00:00:00 2001 From: Abidul Wahab Ramadan Date: Fri, 24 Mar 2017 23:17:59 +0800 Subject: [PATCH 2/2] Making the search based on title and subtitles, and making the keywords way more flexible, its improved tremendously now #63 --- .../app/Src/SearchService/SearchProcessor.php | 81 ++++++++++++------- 1 file changed, 51 insertions(+), 30 deletions(-) diff --git a/y2bsearch/app/Src/SearchService/SearchProcessor.php b/y2bsearch/app/Src/SearchService/SearchProcessor.php index 5667c2c..9eefdd3 100644 --- a/y2bsearch/app/Src/SearchService/SearchProcessor.php +++ b/y2bsearch/app/Src/SearchService/SearchProcessor.php @@ -42,6 +42,53 @@ public function generateTopSearchQuery() public function generateSearchQuery($searchKeywords) { + $searchKeywords = explode(' ', $searchKeywords); + //Taking half the number of keywords as a must match criteria + $minimumMatch = (int)ceil(count($searchKeywords) / 2); + $nestedQuery = [ + 'must' => [ + "nested" => [ + 'path' => "subtitles", + 'inner_hits' => [ + 'highlight' => [ + 'pre_tags' => [''], + 'post_tags' => [''], + "order" => "score", + 'fields' => [ + "subtitles.sentence" => [ + "fragment_size" => 300, + "number_of_fragments" => 100, + ], + ], + ], + ], + 'query' => [ + 'bool' => [ + 'minimum_should_match' => $minimumMatch, + 'should' => [], + 'boost' => 2, + ], + ], + ], + ], + ]; + $titleQuery = [ + 'minimum_should_match' => $minimumMatch, + 'boost' => 0.5, + 'should' => [], + ]; + foreach ($searchKeywords as $keyword) { + $nestedQuery['must']['nested']['query']['bool']['should'][] = [ + 'term' => [ + 'subtitles.sentence' => $keyword, + ], + ]; + $titleQuery['should'][] = [ + 'term' => [ + 'video_title' => $keyword, + ], + ]; + } $params = [ 'index' => 'videos_en', 'type' => 'videosSubtitles', @@ -49,28 +96,10 @@ public function generateSearchQuery($searchKeywords) 'size' => 9, 'query' => [ 'bool' => [ - 'must' => [ - "nested" => [ - 'path' => "subtitles", - 'inner_hits' => [ - 'highlight' => [ - 'pre_tags' => [''], - 'post_tags' => [''], - "order" => "score", - 'fields' => [ - "subtitles.sentence" => [ - "fragment_size" => 300, - "number_of_fragments" => 100, - ], - ], - ], - ], - 'query' => [ - 'bool' => [ - 'must' => [], - ], - ], - ], + 'minimum_should_match' => 1, // either tilte or nested + 'should' => [ + ['bool' => $nestedQuery], + ['bool' => $titleQuery], ], ], ], @@ -79,14 +108,6 @@ public function generateSearchQuery($searchKeywords) ], ], ]; - $searchKeywords = explode(' ', $searchKeywords); - foreach ($searchKeywords as $keyword) { - $params['body']['query']['bool']['must']['nested']['query']['bool']['must'][] = [ - 'term' => [ - 'subtitles.sentence' => $keyword, - ], - ]; - } return $params; }