Merge pull request awslabs#196 from awslabs/tcu-testing

fixing concurrency bug
Bit-Quill · Sep 6, 2024 · 43578a2 · 43578a2
2 parents 657e61b + 76654a7
commit 43578a2
Show file tree

Hide file tree

Showing 5 changed files with 397 additions and 467 deletions.
diff --git a/tools/python/timestream-compute-units-testing/README.md b/tools/python/timestream-compute-units-testing/README.md
@@ -34,4 +34,28 @@ We provided scripts to create Timestream for LiveAnalytics resource (database an
     Binning, grouping, and ordering for given host. A relatively more resource intensive query than lastpoint-query 
     ```sql
     select bin(time, 1m) AS binned_time, max(cpu_utilization) as max_cpu_utilization from "devops"."sample_devops" where time > ago(10m) and hostname='host2' group by bin(time, 1m) order by binned_time asc
-    ```
+    ```
+
+
+
+Following graphs show Latency Percentiles (seconds), Queries Per Minute, Throttling Counts vs number of workers. 
+
+select last point with 4 TCU configuration
+
+We start with 7 workers (users) and continue to increase the number of users accessing the data and we notice that with just 4 TCUs, the service supported approximately 4830 queries per minute (approximately 81 Queries per second) with p90 latency of less than 150ms.  As the number of Grafana users/workers increase, we see an increase in the latency, and thereby a decrease in the number of queries per minute but zero throttles. This is because the default SDK maximum retries set to 3 (a best practice) and SDK retries the queries before throttling. If sub-500ms performance query latency is acceptable for your use case, you could serve up to 35 concurrent users and 4000+ queries per minute with 4 TCU. 
+select last point with 8 TCU configuration : We increase the MaxQueryTCU to 8 and rerun the test. (8 TCUs might not immediately allocated, based on the workload the service automatically scales, there is chance the initial results are only for 4 TCUs)
+
+
+ 
+
+We observed with 8 TCUs the service supported approximately 5000 queries. If your requirement of p99 is sub second performance, you could serve 50 concurrent users and 5000 queries per minute with 8 TCU. Based, on the use-case you could have lesser number of queries which may yield p99 to sub 500 milliseconds as well. 
+
+
+
+binning and grouping with 4 TCU configuration 
+
+With 4 TCUs, you can analyze the metrics of 2750+ hosts in a minute with 30 concurrent workers (Grafana users), and maximum of approximately 3346 queries with 14 concurrent workers at 55 QPS. As the number of Grafana users increase, so does the latency and thereby fewer queries per minute. 
+
+binning and grouping with 8 TCU configuration
+
+With 8 TCUs, you can analyze the metrics of 3750+ hosts in a minute for 30 concurrent Grafana users. 
diff --git a/tools/python/timestream-compute-units-testing/last_point-query.ipynb b/tools/python/timestream-compute-units-testing/last_point-query.ipynb
@@ -0,0 +1,187 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e33a646c",
+   "metadata": {
+    "scrolled": false
+   },
+   "outputs": [],
+   "source": [
+    "import boto3\n",
+    "import threading\n",
+    "import time\n",
+    "from datetime import datetime\n",
+    "import numpy as np\n",
+    "import matplotlib.pyplot as plt\n",
+    "from botocore.exceptions import ClientError\n",
+    "\n",
+    "# Initialize the Timestream client\n",
+    "client = boto3.client('timestream-query', region_name='us-east-2')\n",
+    "\n",
+    "# List of different Timestream queries\n",
+    "queries = [\n",
+    "    f'select memory from \"devops\".\"sample_devops\" where time > ago(10m) and hostname=\\'host1\\' order by time desc limit 1',\n",
+    "]\n",
+    "\n",
+    "# Function to run a single query and measure its duration\n",
+    "def run_query(query, worker_id, worker_results,throttling_results):\n",
+    "    start_time = time.time()\n",
+    "    worker_results[worker_id] = []\n",
+    "    end_time = time.time() + duration_seconds\n",
+    "    throttling_results[worker_id] = []\n",
+    "    throttling_count = []\n",
+    "    results_dict = {}\n",
+    "    while time.time() < end_time:\n",
+    "        try:\n",
+    "            start = time.time()\n",
+    "            response = client.query(QueryString=query)\n",
+    "            duration = time.time() - start\n",
+    "            worker_results[worker_id].append((duration))\n",
+    "        except ClientError as e:\n",
+    "            if e.response['Error']['Code'] == 'ThrottlingException':\n",
+    "                throttling_results[worker_id].append(\"ThrottlingException\")\n",
+    "            else:\n",
+    "                raise e \n",
+    "\n",
+    "\n",
+    "# Function to run queries in parallel using threads\n",
+    "def run_parallel_queries(duration_seconds, queries, users):\n",
+    "    end_time = time.time() + duration_seconds\n",
+    "    total_queries = 0\n",
+    "    query_durations = []\n",
+    "    throttling_count = {}\n",
+    "    worker_results = {}\n",
+    "    throttling_results = {}\n",
+    "    threads = []\n",
+    "\n",
+    "    print(f\"\\nLoad Test Start time: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\")\n",
+    "\n",
+    "\n",
+    "    for i in range(users):\n",
+    "        worker_id = i\n",
+    "        thread = threading.Thread(target=run_query, args=(queries[i % len(queries)], worker_id, worker_results, throttling_results))\n",
+    "        threads.append(thread)\n",
+    "        thread.start()\n",
+    "\n",
+    "    for thread in threads:\n",
+    "        thread.join()\n",
+    "\n",
+    "    \n",
+    "    query_durations = [value for worker_results in worker_results.values() for value in worker_results]\n",
+    "    total_queries = len(query_durations)\n",
+    "    throttling_count=(sum(len(exceptions_list) for exceptions_list in throttling_results.values()))\n",
+    "    \n",
+    "\n",
+    "    print(f\"Load Test End time: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\")\n",
+    "    \n",
+    "    if query_durations:\n",
+    "        p50 = np.percentile(query_durations, 50)\n",
+    "        p90 = np.percentile(query_durations, 90)\n",
+    "        p99 = np.percentile(query_durations, 99)\n",
+    "    else:\n",
+    "        p50 = p90 = p99 = None \n",
+    "\n",
+    "    \n",
+    "    return total_queries, p50, p90, p99, throttling_count\n",
+    "\n",
+    "\n",
+    "users_count = [1,2,3,4,5,6,7,9,11,13,15,17,21]  \n",
+    "duration_seconds = 60\n",
+    "\n",
+    "results = []\n",
+    "\n",
+    "# Run the tests for different worker counts\n",
+    "for users in users_count:\n",
+    "    total_queries, p50, p90, p99, throttling_count = run_parallel_queries(duration_seconds, queries, users)\n",
+    "    results.append((users, total_queries, p50, p90, p99, throttling_count))\n",
+    "    print(f\"num_users: {users}\")\n",
+    "    print(f\"Total number of queries run in {duration_seconds} seconds: {total_queries}\")\n",
+    "    print(f\"p50 (50th percentile) of query durations: {p50:.2f} seconds\")\n",
+    "    print(f\"p90 (90th percentile) of query durations: {p90:.2f} seconds\")\n",
+    "    print(f\"p99 (99th percentile) of query durations: {p99:.2f} seconds\")\n",
+    "    print(f\"Throttling count: {throttling_count}\")\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "1aedc0c2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "total_queries = [result[1] for result in results]\n",
+    "p50s = [result[2] for result in results]\n",
+    "p90s = [result[3] for result in results]\n",
+    "p99s = [result[4] for result in results]\n",
+    "throttling_counts = [result[5] for result in results]\n",
+    "\n",
+    "plt.figure(figsize=(12, 8))\n",
+    "\n",
+    "# Plot latency percentiles\n",
+    "plt.subplot(3, 1, 1)\n",
+    "plt.plot(users_count, p50s, label='p50')\n",
+    "plt.plot(users_count, p90s, label='p90')\n",
+    "plt.plot(users_count, p99s, label='p99')\n",
+    "plt.xlabel('Number of Users')\n",
+    "plt.ylabel('Latency (seconds)')\n",
+    "plt.xticks(users_count)\n",
+    "plt.title('Latency Percentiles')\n",
+    "plt.legend()\n",
+    "\n",
+    "# Plot Queries Per Minute (QPM)\n",
+    "plt.subplot(3, 1, 2)\n",
+    "qpm = [q / (duration_seconds / 60) for q in total_queries]\n",
+    "plt.plot(users_count, qpm, label='Queries Per Minute (QPM)')\n",
+    "plt.xlabel('Number of Users')\n",
+    "plt.ylabel('Queries Per Minute')\n",
+    "plt.xticks(users_count)\n",
+    "plt.title('Queries Per Minute')\n",
+    "plt.legend()\n",
+    "\n",
+    "# Plot Throttling Counts\n",
+    "plt.subplot(3, 1, 3)\n",
+    "plt.plot(users_count, throttling_counts, label='Throttling Count', color='red')\n",
+    "plt.xlabel('Number of Users')\n",
+    "plt.ylabel('Throttling Count')\n",
+    "plt.xticks(users_count)\n",
+    "plt.title('Throttling Count')\n",
+    "plt.legend()\n",
+    "\n",
+    "plt.tight_layout()\n",
+    "plt.show() "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "258db7a6",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "conda_python3",
+   "language": "python",
+   "name": "conda_python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.14"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}