marqo-ai · farshidz · Feb 13, 2025 · Feb 13, 2025 · Feb 13, 2025 · Feb 13, 2025
diff --git a/.github/workflows/locust_perf_test.yml b/.github/workflows/locust_perf_test.yml
@@ -1,5 +1,5 @@
-name: locust_performance_tests
-run-name: Locust test suite ${{ inputs.test_suite }} run on ${{ inputs.marqo_host }} with ${{ inputs.number_of_users }} users
+name: Locust Performance Test
+run-name: ${{ inputs.test_suite }} on image ${{ inputs.image_to_test }} run on ${{ inputs.marqo_host }} with ${{ inputs.number_of_users }} users
 # runs Locust performance tests on local Marqo container or Marqo Cloud
 
 on:
@@ -10,6 +10,8 @@ on:
         required: true
         options:
           - random_index_and_tensor_search.py
+          - random_index_and_hybrid_search_rrf.py
+          - random_index_and_hybrid_search_rrf_with_global_score_modifiers.py
         description: >
           Location of the locustfile that contains the test suite. This works as the entrypoint
 
@@ -128,7 +130,7 @@ jobs:
 
       - name: Start local Marqo container
         if: github.event.inputs.marqo_host == 'http://localhost:8882'
-        run: ./.github/scripts/start_docker_marqo.sh ${{ github.event.inputs.image_to_test }} -e MARQO_MODELS_TO_PRELOAD='["${{ github.event.inputs.index_model_name }}"]'
+        run: ./tests/api_tests/v1/scripts/start_docker_marqo.sh ${{ github.event.inputs.image_to_test }} -e MARQO_MODELS_TO_PRELOAD='["${{ github.event.inputs.index_model_name }}"]'
 
       - name: Install Perf Tests Dependencies
         run: |

diff --git a/perf_tests/random_index_and_hybrid_search_rrf.py b/perf_tests/random_index_and_hybrid_search_rrf.py
@@ -0,0 +1,102 @@
+from __future__ import annotations
+
+import random
+import os
+
+from locust import events, task, between, run_single_user
+from locust.env import Environment
+from wonderwords import RandomSentence, RandomWord
+import marqo
+
+from common.marqo_locust_http_user import MarqoLocustHttpUser
+
+"""
+Performance test on Hybrid RRF search.
+Does NOT use global score modifiers in search, but adds it to documents.
+"""
+
+INDEX_NAME = os.getenv('MARQO_INDEX_NAME', 'locust-test')
+
+
+class AddDocToStructuredIndexUser(MarqoLocustHttpUser):
+    fixed_count = 1
+    wait_time = between(1, 2)
+
+    @task
+    def add_docs(self):
+        # Generate random documents batch (5-10 docs) with random length description of 1-5 sentences
+        s = "this is a random sentence."
+        random_docs = [{
+            'title': s,
+            'description': ' '.join([s for j in range(i)]),
+            'mult_field': i,
+            'add_field': i
+        } for i in range(10)]
+
+        self.client.index(INDEX_NAME).add_documents(documents=random_docs)
+
+
+class SearchUser(MarqoLocustHttpUser):
+    wait_time = between(1, 2)
+    w = RandomWord()
+
+    @task
+    def search(self):
+        # Random search query to retrieve first 20 results
+        self.client.index(INDEX_NAME).search(
+            q=' '.join(self.w.random_words(amount=5)),
+            search_method='HYBRID',
+            hybrid_parameters={
+                'retrievalMethod': 'disjunction',
+                'rankingMethod': 'rrf'
+            },
+            limit=20,
+            show_highlights=False,
+            offset=0,
+        )
+
+
+@events.init.add_listener
+def on_test_start(environment: Environment, **kwargs):
+    host = environment.host
+    local_run = host == 'http://localhost:8882'
+    if local_run:
+        # Create index if run local
+        marqo_client = marqo.Client(url=host)
+        marqo_client.create_index(
+            INDEX_NAME,
+
+            settings_dict={
+                "type": "structured",
+                "model": os.getenv('MARQO_INDEX_MODEL_NAME', 'hf/e5-base-v2'),
+                "allFields": [
+                    {"name": "title", "type": "text", "features": ["lexical_search"]},
+                    {"name": "description", "type": "text", "features": ["lexical_search"]},
+                    {"name": "mult_field", "type": "int", "features": ["score_modifier"]},
+                    {"name": "add_field", "type": "int", "features": ["score_modifier"]}
+                ],
+                "tensorFields": ['title', 'description']
+            }
+        )
+
+
+@events.quitting.add_listener
+def on_test_stop(environment, **kwargs):
+    host = environment.host
+    local_run = host == 'http://localhost:8882'
+    if local_run:
+        marqo_client = marqo.Client(url=host)
+        marqo_client.delete_index(INDEX_NAME)
+
+
+# @events.request.add_listener
+# def on_request(name, response, exception, **kwargs):
+#     """
+#     Event handler that get triggered on every request
+#     """
+#     # print out processing time for each request
+#     print(name,  response.json()['processingTimeMs'])
+
+
+if __name__ == "__main__":
+    run_single_user(AddDocToStructuredIndexUser)
diff --git a/perf_tests/random_index_and_hybrid_search_rrf_with_global_score_modifiers.py b/perf_tests/random_index_and_hybrid_search_rrf_with_global_score_modifiers.py
@@ -0,0 +1,105 @@
+from __future__ import annotations
+
+import random
+import os
+
+from locust import events, task, between, run_single_user
+from locust.env import Environment
+from wonderwords import RandomSentence, RandomWord
+import marqo
+
+from common.marqo_locust_http_user import MarqoLocustHttpUser
+
+"""
+Performance test on Hybrid RRF search.
+Uses global score modifiers in search.
+"""
+
+INDEX_NAME = os.getenv('MARQO_INDEX_NAME', 'locust-test')
+
+
+class AddDocToStructuredIndexUser(MarqoLocustHttpUser):
+    fixed_count = 1
+    wait_time = between(1, 2)
+
+    @task
+    def add_docs(self):
+        # Generate random documents batch (5-10 docs) with random length description of 1-5 sentences
+        s = "this is a random sentence."
+        random_docs = [{
+            'title': s,
+            'description': ' '.join([s for j in range(i)]),
+            'mult_field': i,
+            'add_field': i
+        } for i in range(10)]
+
+        self.client.index(INDEX_NAME).add_documents(documents=random_docs)
+
+
+class SearchUser(MarqoLocustHttpUser):
+    wait_time = between(1, 2)
+    w = RandomWord()
+
+    @task
+    def search(self):
+        # Random search query to retrieve first 20 results
+        self.client.index(INDEX_NAME).search(
+            q=' '.join(self.w.random_words(amount=5)),
+            search_method='HYBRID',
+            hybrid_parameters={
+                'retrievalMethod': 'disjunction',
+                'rankingMethod': 'rrf'
+            },
+            score_modifiers={
+                "multiply_score_by": [{"field_name": "mult_field", "weight": 2}],
+                "add_to_score": [{"field_name": "add_field", "weight": -0.1}]
+            },
+            limit=20,
+            show_highlights=False,
+            offset=0,
+        )
+
+
+@events.init.add_listener
+def on_test_start(environment: Environment, **kwargs):
+    host = environment.host
+    local_run = host == 'http://localhost:8882'
+    if local_run:
+        # Create index if run local
+        marqo_client = marqo.Client(url=host)
+        marqo_client.create_index(
+            INDEX_NAME,
+            settings_dict={
+                "type": "structured",
+                "model": os.getenv('MARQO_INDEX_MODEL_NAME', 'hf/e5-base-v2'),
+                "allFields": [
+                    {"name": "title", "type": "text", "features": ["lexical_search"]},
+                    {"name": "description", "type": "text", "features": ["lexical_search"]},
+                    {"name": "mult_field", "type": "int", "features": ["score_modifier"]},
+                    {"name": "add_field", "type": "int", "features": ["score_modifier"]}
+                ],
+                "tensorFields": ['title', 'description']
+            }
+        )
+
+
+@events.quitting.add_listener
+def on_test_stop(environment, **kwargs):
+    host = environment.host
+    local_run = host == 'http://localhost:8882'
+    if local_run:
+        marqo_client = marqo.Client(url=host)
+        marqo_client.delete_index(INDEX_NAME)
+
+
+# @events.request.add_listener
+# def on_request(name, response, exception, **kwargs):
+#     """
+#     Event handler that get triggered on every request
+#     """
+#     # print out processing time for each request
+#     print(name,  response.json()['processingTimeMs'])
+
+
+if __name__ == "__main__":
+    run_single_user(AddDocToStructuredIndexUser)
diff --git a/perf_tests/requirements.in b/perf_tests/requirements.in
@@ -1,3 +1,3 @@
 locust==2.25.0
-marqo==3.7.0
+marqo==3.11.0
 wonderwords==2.2.0
diff --git a/perf_tests/requirements.txt b/perf_tests/requirements.txt
@@ -2,7 +2,7 @@
 # This file is autogenerated by pip-compile with Python 3.8
 # by the following command:
 #
-#    pip-compile requirements.in
+#    pip-compile --output-file=requirements.txt requirements.in
 #
 annotated-types==0.7.0
     # via pydantic
@@ -51,7 +51,7 @@ markupsafe==2.1.5
     # via
     #   jinja2
     #   werkzeug
-marqo==3.7.0
+marqo==3.11.0
     # via -r requirements.in
 msgpack==1.0.8
     # via locust
@@ -78,7 +78,6 @@ tomli==2.0.1
 typing-extensions==4.12.2
     # via
     #   annotated-types
-    #   marqo
     #   pydantic
     #   pydantic-core
 urllib3==1.26.19

diff --git a/tests/api_tests/v1/scripts/start_docker_marqo.sh b/tests/api_tests/v1/scripts/start_docker_marqo.sh