From 966e1e18347029a7586a8d915241fff11cd205ab Mon Sep 17 00:00:00 2001 From: David Nuon Date: Tue, 19 Dec 2023 07:54:11 -0800 Subject: [PATCH 01/11] feat: add timing for resultings filtering --- search/api.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/search/api.py b/search/api.py index 893dacce..9f9efed6 100644 --- a/search/api.py +++ b/search/api.py @@ -9,6 +9,11 @@ from .result_processor import SearchResultProcessor from .utils import DateRange +import logging +import time + +log = logging.getLogger(__name__) + # Default filters that we support, override using COURSE_DISCOVERY_FILTERS setting if desired DEFAULT_FILTER_FIELDS = ["org", "modes", "language"] @@ -74,13 +79,23 @@ def perform_search( log_search_params=log_search_params, ) + # post-process the result + start = time.time() +q for result in results["results"]: result["data"] = SearchResultProcessor.process_result(result["data"], search_term, user) results["access_denied_count"] = len([r for r in results["results"] if r["data"] is None]) results["results"] = [r for r in results["results"] if r["data"] is not None] + end = time.time() + + log.info("ES result timings: %s", { + 'es_query_time_in_ms': results['took'], + 'filtering_time_in_seconds': end - start, + 'es_has_timed_out': results['timed_out'], + }) return results From aaa4372f82ef3c0506a5a40264ce85dc3714583a Mon Sep 17 00:00:00 2001 From: David Nuon Date: Tue, 19 Dec 2023 08:25:20 -0800 Subject: [PATCH 02/11] feat: Add search timings to segment events --- search/api.py | 7 +++++-- search/views.py | 1 + 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/search/api.py b/search/api.py index 9f9efed6..8affd6ba 100644 --- a/search/api.py +++ b/search/api.py @@ -82,7 +82,7 @@ def perform_search( # post-process the result start = time.time() -q + for result in results["results"]: result["data"] = SearchResultProcessor.process_result(result["data"], search_term, user) @@ -90,12 +90,15 @@ def perform_search( results["results"] = [r for r in results["results"] if r["data"] is not None] end = time.time() + filtering_time_in_seconds = end - start log.info("ES result timings: %s", { 'es_query_time_in_ms': results['took'], - 'filtering_time_in_seconds': end - start, + 'filtering_time_in_seconds': filtering_time_in_seconds, 'es_has_timed_out': results['timed_out'], }) + + results['filtering_time_in_seconds'] = filtering_time_in_seconds return results diff --git a/search/views.py b/search/views.py index c59f7463..6f2bae14 100644 --- a/search/views.py +++ b/search/views.py @@ -114,6 +114,7 @@ def do_search(request, course_id=None): "page_size": size, "page_number": page, "results_count": results["total"], + "filtering_time_in_seconds": results["filtering_time_in_seconds"], } ) From 820344ba1375132def7442c44dc0ff65e34330ab Mon Sep 17 00:00:00 2001 From: David Nuon Date: Wed, 20 Dec 2023 11:35:17 -0800 Subject: [PATCH 03/11] feat: Add more timing data to search --- search/api.py | 26 ++++++++++++++++++-------- search/views.py | 3 ++- 2 files changed, 20 insertions(+), 9 deletions(-) diff --git a/search/api.py b/search/api.py index 8affd6ba..82e6cc21 100644 --- a/search/api.py +++ b/search/api.py @@ -69,6 +69,10 @@ def perform_search( raise NoSearchEngineError("No search engine specified in settings.SEARCH_ENGINE") log_search_params = getattr(settings, "SEARCH_COURSEWARE_CONTENT_LOG_PARAMS", False) + search_time = { + "start": time.time() + } + results = searcher.search( query_string=search_term, field_dictionary=field_dictionary, @@ -79,26 +83,32 @@ def perform_search( log_search_params=log_search_params, ) + processing_time = { + "start": time.time() + } # post-process the result - start = time.time() - for result in results["results"]: result["data"] = SearchResultProcessor.process_result(result["data"], search_term, user) results["access_denied_count"] = len([r for r in results["results"] if r["data"] is None]) results["results"] = [r for r in results["results"] if r["data"] is not None] - end = time.time() - filtering_time_in_seconds = end - start + processing_time['end'] = time.time() + search_time['end'] = time.time() + + processing_time_in_seconds = processing_time['end'] - processing_time['start'] + search_time_in_seconds = search_time['end'] - search_time['start'] log.info("ES result timings: %s", { - 'es_query_time_in_ms': results['took'], - 'filtering_time_in_seconds': filtering_time_in_seconds, - 'es_has_timed_out': results['timed_out'], + "es_query_time_in_ms": results["took"], + "es_has_timed_out": results["timed_out"], + "processing_time_in_seconds": processing_time_in_seconds, + "search_time_in_seconds": search_time_in_seconds, }) - results['filtering_time_in_seconds'] = filtering_time_in_seconds + results["processing_time_in_seconds"] = processing_time_in_seconds + results["search_time_in_seconds"] = search_time_in_seconds return results diff --git a/search/views.py b/search/views.py index 6f2bae14..7f4c0c56 100644 --- a/search/views.py +++ b/search/views.py @@ -114,7 +114,8 @@ def do_search(request, course_id=None): "page_size": size, "page_number": page, "results_count": results["total"], - "filtering_time_in_seconds": results["filtering_time_in_seconds"], + "processing_time_in_seconds": results["processing_time_in_seconds"], + "search_time_in_seconds": results["search_time_in_seconds"] } ) From 8542dda2409efc85e54208f464eed09dd876f2fc Mon Sep 17 00:00:00 2001 From: David Nuon Date: Wed, 20 Dec 2023 11:39:50 -0800 Subject: [PATCH 04/11] feat: add filter generation measuring --- search/api.py | 7 +++++++ search/views.py | 3 ++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/search/api.py b/search/api.py index 82e6cc21..abb45214 100644 --- a/search/api.py +++ b/search/api.py @@ -57,10 +57,14 @@ def perform_search( """ # field_, filter_ and exclude_dictionary(s) can be overridden by calling application # field_dictionary includes course if course_id provided + filter_generation_time = { + "start": time.time() + } (field_dictionary, filter_dictionary, exclude_dictionary) = SearchFilterGenerator.generate_field_filters( user=user, course_id=course_id ) + filter_generation_time['end'] = time.time() searcher = SearchEngine.get_search_engine( getattr(settings, "COURSEWARE_CONTENT_INDEX_NAME", "courseware_content") @@ -99,16 +103,19 @@ def perform_search( processing_time_in_seconds = processing_time['end'] - processing_time['start'] search_time_in_seconds = search_time['end'] - search_time['start'] + filter_generation_time_in_seconds = filter_generation_time['end'] - filter_generation_time['start'] log.info("ES result timings: %s", { "es_query_time_in_ms": results["took"], "es_has_timed_out": results["timed_out"], "processing_time_in_seconds": processing_time_in_seconds, "search_time_in_seconds": search_time_in_seconds, + "filter_generation_time_in_seconds": filter_generation_time_in_seconds, }) results["processing_time_in_seconds"] = processing_time_in_seconds results["search_time_in_seconds"] = search_time_in_seconds + results["filter_generation_time_in_seconds"] = filter_generation_time_in_seconds return results diff --git a/search/views.py b/search/views.py index 7f4c0c56..0d807ee3 100644 --- a/search/views.py +++ b/search/views.py @@ -115,7 +115,8 @@ def do_search(request, course_id=None): "page_number": page, "results_count": results["total"], "processing_time_in_seconds": results["processing_time_in_seconds"], - "search_time_in_seconds": results["search_time_in_seconds"] + "search_time_in_seconds": results["search_time_in_seconds"], + "filter_generation_time_in_seconds": results["filter_generation_time_in_seconds"], } ) From 15ab7459858764b1262794fa94233cd908a5b8f7 Mon Sep 17 00:00:00 2001 From: David Nuon Date: Wed, 20 Dec 2023 11:41:31 -0800 Subject: [PATCH 05/11] fix: Remove unavailable field --- search/api.py | 1 - 1 file changed, 1 deletion(-) diff --git a/search/api.py b/search/api.py index abb45214..c9532bfb 100644 --- a/search/api.py +++ b/search/api.py @@ -107,7 +107,6 @@ def perform_search( log.info("ES result timings: %s", { "es_query_time_in_ms": results["took"], - "es_has_timed_out": results["timed_out"], "processing_time_in_seconds": processing_time_in_seconds, "search_time_in_seconds": search_time_in_seconds, "filter_generation_time_in_seconds": filter_generation_time_in_seconds, From dcb77ded00ff3179b570c594fa157d13468ef416 Mon Sep 17 00:00:00 2001 From: David Nuon Date: Wed, 20 Dec 2023 12:19:03 -0800 Subject: [PATCH 06/11] fix: remove timing information from event --- search/api.py | 9 ++++++--- search/views.py | 5 +---- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/search/api.py b/search/api.py index c9532bfb..575721a0 100644 --- a/search/api.py +++ b/search/api.py @@ -105,12 +105,15 @@ def perform_search( search_time_in_seconds = search_time['end'] - search_time['start'] filter_generation_time_in_seconds = filter_generation_time['end'] - filter_generation_time['start'] - log.info("ES result timings: %s", { - "es_query_time_in_ms": results["took"], + timing_results = { + "name": "edx_search.search_timings", + "cluster_query_time_in_ms": results["took"], "processing_time_in_seconds": processing_time_in_seconds, "search_time_in_seconds": search_time_in_seconds, "filter_generation_time_in_seconds": filter_generation_time_in_seconds, - }) + } + + log.info(f"edx-search search timings {timing_results}") results["processing_time_in_seconds"] = processing_time_in_seconds results["search_time_in_seconds"] = search_time_in_seconds diff --git a/search/views.py b/search/views.py index 0d807ee3..6ae8eecb 100644 --- a/search/views.py +++ b/search/views.py @@ -113,10 +113,7 @@ def do_search(request, course_id=None): "search_term": search_term, "page_size": size, "page_number": page, - "results_count": results["total"], - "processing_time_in_seconds": results["processing_time_in_seconds"], - "search_time_in_seconds": results["search_time_in_seconds"], - "filter_generation_time_in_seconds": results["filter_generation_time_in_seconds"], + "results_count": results["total"] } ) From ab3ae139638fe51ea910b4757939a8e460f151c3 Mon Sep 17 00:00:00 2001 From: David Nuon Date: Wed, 20 Dec 2023 12:21:29 -0800 Subject: [PATCH 07/11] fix: import order --- search/api.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/search/api.py b/search/api.py index 575721a0..fdde4e74 100644 --- a/search/api.py +++ b/search/api.py @@ -1,6 +1,8 @@ """ search business logic implementations """ from datetime import datetime +import logging +import time from django.conf import settings @@ -9,9 +11,6 @@ from .result_processor import SearchResultProcessor from .utils import DateRange -import logging -import time - log = logging.getLogger(__name__) # Default filters that we support, override using COURSE_DISCOVERY_FILTERS setting if desired From 2a54da35c2d79c6072a5953df00fd6a8394547f8 Mon Sep 17 00:00:00 2001 From: David Nuon Date: Wed, 20 Dec 2023 12:24:02 -0800 Subject: [PATCH 08/11] fix: undo comma removal --- search/views.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/search/views.py b/search/views.py index 6ae8eecb..c59f7463 100644 --- a/search/views.py +++ b/search/views.py @@ -113,7 +113,7 @@ def do_search(request, course_id=None): "search_term": search_term, "page_size": size, "page_number": page, - "results_count": results["total"] + "results_count": results["total"], } ) From c56745eb315a756dfc5137b8f924f912006d61fe Mon Sep 17 00:00:00 2001 From: David Nuon Date: Wed, 20 Dec 2023 13:02:53 -0800 Subject: [PATCH 09/11] feat: Use eventracker in api.py instead of logging --- search/api.py | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/search/api.py b/search/api.py index fdde4e74..d1d9ded3 100644 --- a/search/api.py +++ b/search/api.py @@ -1,18 +1,16 @@ """ search business logic implementations """ from datetime import datetime -import logging import time from django.conf import settings +from eventtracking import tracker as track from .filter_generator import SearchFilterGenerator from .search_engine_base import SearchEngine from .result_processor import SearchResultProcessor from .utils import DateRange -log = logging.getLogger(__name__) - # Default filters that we support, override using COURSE_DISCOVERY_FILTERS setting if desired DEFAULT_FILTER_FIELDS = ["org", "modes", "language"] @@ -104,19 +102,12 @@ def perform_search( search_time_in_seconds = search_time['end'] - search_time['start'] filter_generation_time_in_seconds = filter_generation_time['end'] - filter_generation_time['start'] - timing_results = { - "name": "edx_search.search_timings", - "cluster_query_time_in_ms": results["took"], + eventtracking.track("edx.course.search.executed", { + "search_term": search_term, "processing_time_in_seconds": processing_time_in_seconds, "search_time_in_seconds": search_time_in_seconds, "filter_generation_time_in_seconds": filter_generation_time_in_seconds, - } - - log.info(f"edx-search search timings {timing_results}") - - results["processing_time_in_seconds"] = processing_time_in_seconds - results["search_time_in_seconds"] = search_time_in_seconds - results["filter_generation_time_in_seconds"] = filter_generation_time_in_seconds + }) return results From 65a9d4eecf27a2dbf7f2399fb669c5ff3cc3b42c Mon Sep 17 00:00:00 2001 From: David Nuon Date: Wed, 20 Dec 2023 13:05:20 -0800 Subject: [PATCH 10/11] fix: fix typo --- search/api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/search/api.py b/search/api.py index d1d9ded3..a8a331e7 100644 --- a/search/api.py +++ b/search/api.py @@ -102,7 +102,7 @@ def perform_search( search_time_in_seconds = search_time['end'] - search_time['start'] filter_generation_time_in_seconds = filter_generation_time['end'] - filter_generation_time['start'] - eventtracking.track("edx.course.search.executed", { + track.emit("edx.course.search.executed", { "search_term": search_term, "processing_time_in_seconds": processing_time_in_seconds, "search_time_in_seconds": search_time_in_seconds, From d2e29e0e9b4b01ae70ada12bdabfa92686ba4a5c Mon Sep 17 00:00:00 2001 From: David Nuon Date: Wed, 3 Jan 2024 12:57:43 -0800 Subject: [PATCH 11/11] feat: Refactored timing logic to its own class --- search/api.py | 55 ++++++++----- search/tests/test_api_timing_events.py | 107 +++++++++++++++++++++++++ search/utils.py | 42 ++++++++++ 3 files changed, 182 insertions(+), 22 deletions(-) create mode 100644 search/tests/test_api_timing_events.py diff --git a/search/api.py b/search/api.py index a8a331e7..fde7010d 100644 --- a/search/api.py +++ b/search/api.py @@ -1,15 +1,13 @@ """ search business logic implementations """ from datetime import datetime -import time - from django.conf import settings from eventtracking import tracker as track from .filter_generator import SearchFilterGenerator from .search_engine_base import SearchEngine from .result_processor import SearchResultProcessor -from .utils import DateRange +from .utils import DateRange, Timer # Default filters that we support, override using COURSE_DISCOVERY_FILTERS setting if desired DEFAULT_FILTER_FIELDS = ["org", "modes", "language"] @@ -54,14 +52,13 @@ def perform_search( """ # field_, filter_ and exclude_dictionary(s) can be overridden by calling application # field_dictionary includes course if course_id provided - filter_generation_time = { - "start": time.time() - } + filter_generation_timer = Timer() + filter_generation_timer.start() (field_dictionary, filter_dictionary, exclude_dictionary) = SearchFilterGenerator.generate_field_filters( user=user, course_id=course_id ) - filter_generation_time['end'] = time.time() + filter_generation_timer.stop() searcher = SearchEngine.get_search_engine( getattr(settings, "COURSEWARE_CONTENT_INDEX_NAME", "courseware_content") @@ -70,9 +67,8 @@ def perform_search( raise NoSearchEngineError("No search engine specified in settings.SEARCH_ENGINE") log_search_params = getattr(settings, "SEARCH_COURSEWARE_CONTENT_LOG_PARAMS", False) - search_time = { - "start": time.time() - } + search_timer = Timer() + search_timer.start() results = searcher.search( query_string=search_term, @@ -84,9 +80,8 @@ def perform_search( log_search_params=log_search_params, ) - processing_time = { - "start": time.time() - } + processing_timer = Timer() + processing_timer.start() # post-process the result for result in results["results"]: @@ -95,20 +90,36 @@ def perform_search( results["access_denied_count"] = len([r for r in results["results"] if r["data"] is None]) results["results"] = [r for r in results["results"] if r["data"] is not None] - processing_time['end'] = time.time() - search_time['end'] = time.time() + processing_timer.stop() + search_timer.stop() - processing_time_in_seconds = processing_time['end'] - processing_time['start'] - search_time_in_seconds = search_time['end'] - search_time['start'] - filter_generation_time_in_seconds = filter_generation_time['end'] - filter_generation_time['start'] + emit_api_timing_event(search_term, course_id, filter_generation_timer, processing_timer, search_timer) + return results + +def emit_api_timing_event(search_term, course_id, filter_generation_timer, processing_timer, search_timer): + """ + Emit the timing events for the search API + """ track.emit("edx.course.search.executed", { "search_term": search_term, - "processing_time_in_seconds": processing_time_in_seconds, - "search_time_in_seconds": search_time_in_seconds, - "filter_generation_time_in_seconds": filter_generation_time_in_seconds, + "course_id": course_id, + "filter_generation_time": { + "start": filter_generation_timer.start_time, + "end": filter_generation_timer.end_time, + "elapsed": filter_generation_timer.elapsed_time, + }, + "processing_time": { + "start": processing_timer.start_time_string, + "end": processing_timer.start_time_string, + "elapsed": processing_timer.elapsed_time, + }, + "search_time": { + "start": search_timer.start_time_string, + "end": search_timer.start_time_string, + "elapsed": search_timer.elapsed_time, + }, }) - return results def course_discovery_search(search_term=None, size=20, from_=0, field_dictionary=None): diff --git a/search/tests/test_api_timing_events.py b/search/tests/test_api_timing_events.py new file mode 100644 index 00000000..1c11347e --- /dev/null +++ b/search/tests/test_api_timing_events.py @@ -0,0 +1,107 @@ +""" Tests for timing functionality """ + +import datetime +from unittest.mock import patch, call + +from django.test import TestCase +from django.test.utils import override_settings +from search.tests.mock_search_engine import MockSearchEngine +from search.utils import Timer +from search.api import emit_api_timing_event + + +@override_settings(SEARCH_ENGINE="search.tests.mock_search_engine.MockSearchEngine") +class TimingEventsTest(TestCase): + """ Tests to see if timing events are emitted""" + + def setUp(self): + super().setUp() + MockSearchEngine.destroy() + patcher = patch('search.api.track') + self.mock_track = patcher.start() + self.addCleanup(patcher.stop) + + def tearDown(self): + MockSearchEngine.destroy() + super().tearDown() + + def test_perform_search(self): + search_term = "testing search" + course_id = "mock.course.id" + + filter_generation_timer = Timer() + filter_generation_timer.start() + filter_generation_timer.stop() + + search_timer = Timer() + search_timer.start() + search_timer.stop() + + processing_timer = Timer() + processing_timer.start() + processing_timer.stop() + + emit_api_timing_event(search_term, course_id, filter_generation_timer, processing_timer, search_timer) + timing_event_call = self.mock_track.emit.mock_calls[0] + expected_call = call("edx.course.search.executed", { + "search_term": search_term, + "course_id": course_id, + "filter_generation_time": { + "start": filter_generation_timer.start_time, + "end": filter_generation_timer.end_time, + "elapsed": filter_generation_timer.elapsed_time, + }, + "processing_time": { + "start": processing_timer.start_time_string, + "end": processing_timer.start_time_string, + "elapsed": processing_timer.elapsed_time, + }, + "search_time": { + "start": search_timer.start_time_string, + "end": search_timer.start_time_string, + "elapsed": search_timer.elapsed_time, + }, + }) + self.assertEqual(timing_event_call, expected_call) + + +class TimerTest(TestCase): + """ + Timer Test Case + """ + + def test_start_timer(self): + timer = Timer() + timer.start() + timer.stop() + self.assertIsNotNone(timer.start_time) + self.assertIsNotNone(timer.end_time) + + def test_elapsed_time(self): + # pylint: disable=protected-access + + start = datetime.datetime(2024, 1, 1, 0, 0, 0, 0) + end = start + datetime.timedelta(seconds=5) + + timer = Timer() + timer._start_time = start + timer._end_time = end + + self.assertEqual(timer.elapsed_time, 5) + self.assertEqual(timer.start_time, start) + self.assertEqual(timer.end_time, end) + + def test_elapsed_time_string(self): + # pylint: disable=protected-access + + start = datetime.datetime(2024, 1, 1, 0, 0, 0, 0) + end = start + datetime.timedelta(seconds=5) + + timer = Timer() + timer._start_time = start + timer._end_time = end + + self.assertEqual(timer.elapsed_time, 5) + self.assertEqual(timer.start_time_string, "2024-01-01T00:00:00") + self.assertEqual(timer.end_time_string, "2024-01-01T00:00:05") + self.assertGreaterEqual(timer.end_time, timer.start_time) diff --git a/search/utils.py b/search/utils.py index a5b4a424..ded4a759 100644 --- a/search/utils.py +++ b/search/utils.py @@ -1,6 +1,7 @@ """ Utility classes to support others """ import importlib +import datetime from collections.abc import Iterable @@ -65,3 +66,44 @@ def upper_string(self): def lower_string(self): """ use isoformat for _lower date's string format """ return self._lower.isoformat() + + +class Timer: + + """ Simple timer class to measure elapsed time """ + def __init__(self): + self._start_time = None + self._end_time = None + + def start(self): + """ Start the timer """ + self._start_time = datetime.datetime.now() + + def stop(self): + """ Stop the timer """ + self._end_time = datetime.datetime.now() + + @property + def start_time(self): + """ Return the start time """ + return self._start_time + + @property + def end_time(self): + """ Return the end time """ + return self._end_time + + @property + def start_time_string(self): + """ use isoformat for the start time """ + return self._start_time.isoformat() + + @property + def end_time_string(self): + """ use isoformat for the end time """ + return self._end_time.isoformat() + + @property + def elapsed_time(self): + """ Return the elapsed time """ + return (self._end_time - self._start_time).seconds