Skip to content

Commit

Permalink
Adjust workload runner transient backoff
Browse files Browse the repository at this point in the history
  • Loading branch information
geoffxy committed Nov 17, 2023
1 parent 221ed6a commit 3353184
Show file tree
Hide file tree
Showing 3 changed files with 38 additions and 11 deletions.
22 changes: 16 additions & 6 deletions workloads/IMDB_extended/run_repeating_analytics.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,11 @@ def noop(_signal, _frame):

exec_count += 1
if rand_backoff is not None:
print(f"[RA {runner_idx}] Continued after transient errors.")
print(
f"[RA {runner_idx}] Continued after transient errors.",
flush=True,
file=sys.stderr,
)
rand_backoff = None

except BradClientError as ex:
Expand All @@ -212,18 +216,24 @@ def noop(_signal, _frame):

if rand_backoff is None:
rand_backoff = RandomizedExponentialBackoff(
max_retries=10,
base_delay_s=2.0,
max_delay_s=timedelta(minutes=10).total_seconds(),
max_retries=20,
base_delay_s=1.0,
max_delay_s=timedelta(minutes=1).total_seconds(),
)
print(
f"[RA {runner_idx}] Backing off due to transient errors.",
flush=True,
file=sys.stderr,
)
print(f"[RA {runner_idx}] Backing off due to transient errors.")

# Delay retrying in the case of a transient error (this
# happens during blueprint transitions).
wait_s = rand_backoff.wait_time_s()
if wait_s is None:
print(
f"[RA {runner_idx}] Aborting benchmark. Too many transient errors."
f"[RA {runner_idx}] Aborting benchmark. Too many transient errors.",
flush=True,
file=sys.stderr,
)
break
time.sleep(wait_s)
Expand Down
18 changes: 15 additions & 3 deletions workloads/IMDB_extended/run_transactions.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,11 @@ def noop_handler(_signal, _frame):
succeeded = txn(db)

if rand_backoff is not None:
print(f"[T {worker_idx}] Continued after transient errors.")
print(
f"[T {worker_idx}] Continued after transient errors.",
flush=True,
file=sys.stderr,
)
rand_backoff = None

except BradClientError as ex:
Expand All @@ -123,13 +127,21 @@ def noop_handler(_signal, _frame):
base_delay_s=0.1,
max_delay_s=timedelta(minutes=1).total_seconds(),
)
print(f"[T {worker_idx}] Backing off due to transient errors.")
print(
f"[T {worker_idx}] Backing off due to transient errors.",
flush=True,
file=sys.stderr,
)

# Delay retrying in the case of a transient error (this
# happens during blueprint transitions).
wait_s = rand_backoff.wait_time_s()
if wait_s is None:
print("Aborting benchmark. Too many transient errors.")
print(
"Aborting benchmark. Too many transient errors.",
flush=True,
file=sys.stderr,
)
break
time.sleep(wait_s)

Expand Down
9 changes: 7 additions & 2 deletions workloads/IMDB_extended/workload_utils/connect.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import asyncio
import pyodbc
import os
import sys
from typing import Optional

from brad.config.engine import Engine
Expand Down Expand Up @@ -50,10 +51,14 @@ def connect_to_db(
else:
port_offset = (worker_index + args.client_offset) % args.num_front_ends
port = args.brad_port + port_offset
print(f"[{worker_index}] Connecting to BRAD at {args.brad_host}:{port}")
print(
f"[{worker_index}] Connecting to BRAD at {args.brad_host}:{port}",
flush=True,
file=sys.stderr,
)
brad = BradGrpcClient(args.brad_host, port)
brad.connect()
print(f"[{worker_index}] Connected to BRAD.")
print(f"[{worker_index}] Connected to BRAD.", flush=True, file=sys.stderr)
db = BradDatabase(brad)

return db

0 comments on commit 3353184

Please sign in to comment.