Skip to content

Commit

Permalink
Add motivational experiment code
Browse files Browse the repository at this point in the history
  • Loading branch information
geoffxy committed Nov 6, 2023
1 parent 00a131a commit 05d0103
Show file tree
Hide file tree
Showing 9 changed files with 263 additions and 4 deletions.
14 changes: 11 additions & 3 deletions experiments/15-e2e-scenarios-v2/common.sh
Original file line number Diff line number Diff line change
Expand Up @@ -138,16 +138,24 @@ function run_repeating_olap_warmup() {

function start_txn_runner() {
t_clients=$1
client_offset=$2

>&2 echo "[Transactions] Running with $t_clients..."
results_dir=$COND_OUT/t_${t_clients}
mkdir -p $results_dir

local args=(
--num-clients $t_clients
--num-front-ends $num_front_ends
)

if [[ ! -z $client_offset ]]; then
args+=(--client-offset $client_offset)
fi

log_workload_point "txn_${t_clients}"
COND_OUT=$results_dir python3 ../../../workloads/IMDB_extended/run_transactions.py \
--num-clients $t_clients \
--num-front-ends $num_front_ends \
&
"${args[@]}" &

# This is a special return value variable that we use.
runner_pid=$!
Expand Down
47 changes: 47 additions & 0 deletions experiments/15-e2e-scenarios-v2/motivation/COND
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
include("../common.cond")

# 10 queries that should be acceptable on Aurora. (IMDB_100GB/regular_test)
aurora_ok = [99, 56, 32, 92, 91, 49, 30, 83, 94, 38]

# 7 queries that should be good on Aurora relative to others (IMDB_100GB/regular_rebalanced_5k)
aurora_good = [1243, 1277, 1485, 2117, 3988, 4152, 4625]

# 5 queries that should be good on Athena and acceptable on Redshift (IMDB_100GB/regular_rebalanced_5k).
# Total cost (running once): 0.07977253171000001
athena_good = [179, 238, 382, 768, 933]

run_experiment(
name="brad_100g",
run="./run_workload.sh",
options={
"config-file": "config/config_large_100.yml",
"planner-config-file": "config/planner.yml",
"schema-name": "imdb_extended_100g",
"ra-query-bank-file": "queries.sql",
"ra-query-indexes": ",".join(map(str, range(0, 20))),
"num-front-ends": 2,
},
)

run_command(
name="set_up_router_before",
run="python3 set_up_router.py",
options={
"config-file": "config/config_large_100.yml",
"schema-name": "imdb_extended_100g",
"query-bank-file": "queries.sql",
"redshift-queries": ",".join(map(str, range(0, 20))),
},
)

run_command(
name="set_up_router_after",
run="python3 set_up_router.py",
options={
"config-file": "config/config_large_100.yml",
"schema-name": "imdb_extended_100g",
"query-bank-file": "queries.sql",
"aurora-queries": ",".join(map(str, range(0, 15))),
"athena-queries": ",".join(map(str, range(15, 20))),
},
)
24 changes: 24 additions & 0 deletions experiments/15-e2e-scenarios-v2/motivation/extract.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
import argparse


def main():
parser = argparse.ArgumentParser()
parser.add_argument("--source-file", type=str)
parser.add_argument("--out-file", type=str)
parser.add_argument("--indices", type=str, help="Comma separated.")
parser.add_argument("--overwrite", action="store_true")
args = parser.parse_args()

with open(args.source_file, "r", encoding="UTF-8") as file:
queries = [line.strip() for line in file]

indices = [int(qidx_str.strip()) for qidx_str in args.indices.split(",")]

mode = "w" if args.overwrite else "a"
with open(args.out_file, mode, encoding="UTF-8") as out_file:
for idx in indices:
print(queries[idx], file=out_file)


if __name__ == "__main__":
main()
20 changes: 20 additions & 0 deletions experiments/15-e2e-scenarios-v2/motivation/queries.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
SELECT AVG("company_type"."id") as agg_0 FROM "company_type" LEFT OUTER JOIN "movie_companies" ON "company_type"."id" = "movie_companies"."company_type_id" WHERE "movie_companies"."id" <= 1243451 AND "company_type"."kind" != '%production companies%';
SELECT MAX("title"."kind_id") AS agg_0, MAX("title"."imdb_id") AS agg_1, MAX("title"."md5sum") AS agg_2, MAX("title"."episode_nr") AS agg_3, MAX("title"."series_years") AS agg_4, MAX("movie_info"."info") AS agg_5, MAX("title"."production_year") AS agg_6, MAX("title"."episode_of_id") AS agg_7 FROM "movie_info" LEFT OUTER JOIN "title" ON "movie_info"."movie_id" = "title"."id" WHERE "title"."series_years" <> '%1978-1983%' AND "title"."episode_of_id" <= 9617;
SELECT MAX("movie_info_idx"."info") AS agg_0, MAX("movie_info_idx"."note") AS agg_1, MAX("movie_info_idx"."movie_id") AS agg_2 FROM "info_type" LEFT OUTER JOIN "movie_info_idx" ON "info_type"."id" = "movie_info_idx"."info_type_id" WHERE "info_type"."info" <> '%color info%' AND "movie_info_idx"."id" <= 24213;
SELECT SUM("link_type"."id") as agg_0, AVG("title"."episode_of_id") as agg_1 FROM "link_type" LEFT OUTER JOIN "movie_link" ON "link_type"."id" = "movie_link"."link_type_id" LEFT OUTER JOIN "title" ON "movie_link"."movie_id" = "title"."id" WHERE "title"."production_year" <= 1968.6318739735045 AND "link_type"."id" != 3;
SELECT "company_name"."country_code", SUM("company_name"."id") as agg_0 FROM "company_name" LEFT OUTER JOIN "movie_companies" ON "company_name"."id" = "movie_companies"."company_id" WHERE "company_name"."id" <= 96208 AND "movie_companies"."company_type_id" <= 2 AND "company_name"."country_code" != '%[gg]%' AND "movie_companies"."movie_id" <= 878607 GROUP BY "company_name"."country_code" ORDER BY "company_name"."country_code";
SELECT MAX("title"."imdb_index") AS agg_0, MAX("movie_companies"."company_type_id") AS agg_1, MAX("company_type"."kind") AS agg_2, MAX("movie_companies"."id") AS agg_3, MAX("movie_companies"."note") AS agg_4, MAX("title"."episode_of_id") AS agg_5, MAX("title"."production_year") AS agg_6, MAX("title"."series_years") AS agg_7, MAX("movie_companies"."company_id") AS agg_8 FROM "company_type" LEFT OUTER JOIN "movie_companies" ON "company_type"."id" = "movie_companies"."company_type_id" LEFT OUTER JOIN "title" ON "movie_companies"."movie_id" = "title"."id" WHERE "movie_companies"."movie_id" <= 15119 AND "title"."id" <= 40933;
SELECT "title"."imdb_index", SUM("title"."season_nr") as agg_0, COUNT(*) as agg_1 FROM "char_name" LEFT OUTER JOIN "cast_info" ON "char_name"."id" = "cast_info"."person_role_id" LEFT OUTER JOIN "title" ON "cast_info"."movie_id" = "title"."id" WHERE "char_name"."id" <= 558991 AND "cast_info"."id" <= 24654818 AND "char_name"."imdb_index" != '%I%' AND "cast_info"."person_id" <= 3185463 GROUP BY "title"."imdb_index" ORDER BY "title"."imdb_index";
SELECT "movie_link"."link_type_id", COUNT(*) as agg_0, COUNT(*) as agg_1 FROM "link_type" LEFT OUTER JOIN "movie_link" ON "link_type"."id" = "movie_link"."link_type_id" LEFT OUTER JOIN "title" ON "movie_link"."movie_id" = "title"."id" WHERE "movie_link"."linked_movie_id" <= 2268341 AND "title"."series_years" != '%2000-2005%' AND "movie_link"."link_type_id" >= 3 GROUP BY "movie_link"."link_type_id" ORDER BY "movie_link"."link_type_id";
SELECT MAX("char_name"."imdb_index") AS agg_0, MAX("char_name"."md5sum") AS agg_1, MAX("cast_info"."nr_order") AS agg_2, MAX("cast_info"."role_id") AS agg_3, MAX("char_name"."name") AS agg_4, MAX("char_name"."surname_pcode") AS agg_5, MAX("cast_info"."note") AS agg_6 FROM "char_name" LEFT OUTER JOIN "cast_info" ON "char_name"."id" = "cast_info"."person_role_id" WHERE "cast_info"."role_id" <= 2 AND "char_name"."imdb_index" <> '%I%' AND "char_name"."id" <= 27286;
SELECT COUNT(*) as agg_0, COUNT(*) as agg_1 FROM "keyword" LEFT OUTER JOIN "movie_keyword" ON "keyword"."id" = "movie_keyword"."keyword_id" WHERE "keyword"."id" >= 49237 AND "movie_keyword"."movie_id" <= 1690941;
SELECT MAX("title"."series_years") AS agg_0, MAX("movie_info"."info_type_id") AS agg_1, MAX("movie_info"."id") AS agg_2, MAX("title"."season_nr") AS agg_3, MAX("title"."episode_nr") AS agg_4, MAX("movie_info"."info") AS agg_5, MAX("title"."imdb_id") AS agg_6, MAX("movie_info"."note") AS agg_7 FROM "movie_info" LEFT OUTER JOIN "title" ON "movie_info"."movie_id" = "title"."id" WHERE "title"."episode_of_id" <= 8162;
SELECT MAX("title"."episode_of_id") AS agg_0, MAX("movie_info"."info_type_id") AS agg_1, MAX("movie_info"."note") AS agg_2, MAX("title"."title") AS agg_3, MAX("movie_info"."id") AS agg_4, MAX("title"."episode_nr") AS agg_5, MAX("movie_info"."info") AS agg_6, MAX("title"."production_year") AS agg_7 FROM "movie_info" LEFT OUTER JOIN "title" ON "movie_info"."movie_id" = "title"."id" WHERE "title"."season_nr" <= 2.4871985173614926 AND "title"."episode_of_id" <= 8557;
SELECT MAX("movie_info"."info") AS agg_0, MAX("movie_info"."movie_id") AS agg_1, MAX("title"."phonetic_code") AS agg_2, MAX("movie_info"."note") AS agg_3, MAX("title"."kind_id") AS agg_4, MAX("title"."imdb_index") AS agg_5, MAX("title"."title") AS agg_6, MAX("movie_info"."id") AS agg_7 FROM "movie_info" LEFT OUTER JOIN "title" ON "movie_info"."movie_id" = "title"."id" WHERE "title"."episode_of_id" <= 4826;
SELECT MAX("title"."id") AS agg_0, MAX("movie_info"."info_type_id") AS agg_1, MAX("movie_info"."info") AS agg_2, MAX("title"."production_year") AS agg_3, MAX("title"."season_nr") AS agg_4, MAX("title"."phonetic_code") AS agg_5, MAX("title"."imdb_id") AS agg_6, MAX("movie_info"."movie_id") AS agg_7 FROM "movie_info" LEFT OUTER JOIN "title" ON "movie_info"."movie_id" = "title"."id" WHERE "title"."episode_of_id" <= 4953;
SELECT MAX("title"."episode_of_id") AS agg_0, MAX("movie_info"."id") AS agg_1, MAX("title"."production_year") AS agg_2, MAX("title"."imdb_id") AS agg_3, MAX("movie_info"."note") AS agg_4, MAX("title"."title") AS agg_5, MAX("title"."md5sum") AS agg_6, MAX("title"."kind_id") AS agg_7 FROM "movie_info" LEFT OUTER JOIN "title" ON "movie_info"."movie_id" = "title"."id" WHERE "title"."episode_of_id" <= 3370;
SELECT MAX("title"."episode_nr") AS agg_0, MAX("title"."imdb_id") AS agg_1, MAX("title"."episode_of_id") AS agg_2, MAX("movie_info_idx"."id") AS agg_3, MAX("title"."id") AS agg_4, MAX("movie_info_idx"."info_type_id") AS agg_5, MAX("movie_info_idx"."note") AS agg_6, MAX("title"."production_year") AS agg_7 FROM "movie_info_idx" LEFT OUTER JOIN "title" ON "movie_info_idx"."movie_id" = "title"."id" WHERE "title"."episode_of_id" <= 2515475;
SELECT "name"."gender", SUM("person_info"."info_type_id") as agg_0 FROM "name" LEFT OUTER JOIN "person_info" ON "name"."id" = "person_info"."person_id" LEFT OUTER JOIN "aka_name" ON "name"."id" = "aka_name"."person_id" WHERE "aka_name"."surname_pcode" != '%E121%' AND "person_info"."person_id" >= 1860680 AND "person_info"."info_type_id" <= 31 GROUP BY "name"."gender" ORDER BY "name"."gender";
SELECT "name"."surname_pcode", SUM("person_info"."info_type_id") AS agg_0, COUNT(*) AS agg_1 FROM "person_info" LEFT OUTER JOIN "name" ON "person_info"."person_id" = "name"."id" GROUP BY "name"."surname_pcode" ORDER BY "name"."surname_pcode";
SELECT "aka_name"."imdb_index", COUNT(*) as agg_0, COUNT(*) as agg_1 FROM "aka_name" LEFT OUTER JOIN "name" ON "aka_name"."person_id" = "name"."id" LEFT OUTER JOIN "person_info" ON "name"."id" = "person_info"."person_id" WHERE "aka_name"."person_id" >= 4119113 AND "person_info"."id" >= 2623285 GROUP BY "aka_name"."imdb_index" ORDER BY "aka_name"."imdb_index";
SELECT SUM("title"."production_year") AS agg_0, SUM("movie_keyword"."keyword_id") AS agg_1 FROM "keyword" LEFT OUTER JOIN "movie_keyword" ON "keyword"."id" = "movie_keyword"."keyword_id" LEFT OUTER JOIN "title" ON "movie_keyword"."movie_id" = "title"."id";
43 changes: 43 additions & 0 deletions experiments/15-e2e-scenarios-v2/motivation/run_workload.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
#! /bin/bash

script_loc=$(cd $(dirname $0) && pwd -P)
cd $script_loc
source ../common.sh

# TODO: This executor file should be adapted to run against the baselines too
# (TiDB / Serverless Redshift + Aurora)

# Arguments:
# --config-file
# --planner-config-file
# --query-indexes
extract_named_arguments $@

export BRAD_IGNORE_BLUEPRINT=1
start_brad $config_file $planner_config_file
sleep 30

# 1 Analytical runner
start_repeating_olap_runner 1 3 0 $ra_query_indexes "ra_1"
rana_pid=$runner_pid

# 1 Transaction runner
start_txn_runner 1
txn_pid=$runner_pid

log_workload_point "clients_started"

function inner_cancel_experiment() {
cancel_experiment $rana_pid $txn_pid
}

trap "inner_cancel_experiment" INT
trap "inner_cancel_experiment" TERM

# Run for 10 minutes.
sleep $((60 * 10))

# Shut down everything now.
>&2 echo "Experiment done. Shutting down runners..."
graceful_shutdown $rana_pid $txn_pid
log_workload_point "shutdown_complete"
115 changes: 115 additions & 0 deletions experiments/15-e2e-scenarios-v2/motivation/set_up_router.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
import asyncio
import argparse
import logging

from brad.asset_manager import AssetManager
from brad.blueprint import Blueprint
from brad.blueprint.manager import BlueprintManager
from brad.blueprint.provisioning import Provisioning
from brad.config.engine import Engine
from brad.config.file import ConfigFile
from brad.daemon.transition_orchestrator import TransitionOrchestrator
from brad.planner.enumeration.blueprint import EnumeratedBlueprint
from brad.query_rep import QueryRep
from brad.routing.abstract_policy import FullRoutingPolicy
from brad.routing.cached import CachedLocationPolicy
from brad.utils import set_up_logging

logger = logging.getLogger(__name__)


async def run_transition(
config: ConfigFile,
blueprint_mgr: BlueprintManager,
next_blueprint: Blueprint,
) -> None:
logger.info("Starting the transition...")
assert next_blueprint is not None
await blueprint_mgr.start_transition(next_blueprint, new_score=None)
orchestrator = TransitionOrchestrator(config, blueprint_mgr)
logger.info("Running the transition...")
await orchestrator.run_prepare_then_transition()
logger.info("Running the post-transition clean up...")
await orchestrator.run_clean_up_after_transition()
logger.info("Done!")


def main():
set_up_logging(debug_mode=args.debug)
parser = argparse.ArgumentParser()
parser.add_argument(
"--config-file",
type=str,
required=True,
help="Path to BRAD's configuration file.",
)
parser.add_argument(
"--schema-name",
type=str,
required=True,
help="The name of the schema to drop.",
)
parser.add_argument("--query-bank-file", type=str)
parser.add_argument(
"--athena-queries", type=str, help="Comma separated list of indices."
)
parser.add_argument(
"--aurora-queries", type=str, help="Comma separated list of indices."
)
parser.add_argument(
"--redshift-queries", type=str, help="Comma separated list of indices."
)
args = parser.parse_args()

# 1. Load the config.
config = ConfigFile.load(args.config_file)

# 2. Load the existing blueprint.
assets = AssetManager(config)
blueprint_mgr = BlueprintManager(config, assets, args.schema_name)
blueprint_mgr.load_sync()
blueprint = blueprint_mgr.get_blueprint()

# 3. Load the query bank.
queries = []
with open(args.query_bank_file, "r", encoding="UTF-8") as file:
for line in file:
clean = line.strip()
if clean.endswith(";"):
clean = clean[:-1]
queries.append(QueryRep(clean))

# 4. Create the fixed routing policy.
query_map = {}
for qidx_str in args.athena_queries.split(","):
qidx = int(qidx_str.strip())
query_map[queries[qidx]] = Engine.Athena

for qidx_str in args.redshift_queries.split(","):
qidx = int(qidx_str.strip())
query_map[queries[qidx]] = Engine.Redshift

for qidx_str in args.aurora_queries.split(","):
qidx = int(qidx_str.strip())
query_map[queries[qidx]] = Engine.Aurora
clp = CachedLocationPolicy(query_map)

# 5. Replace the policy.
enum_blueprint = EnumeratedBlueprint(blueprint)
existing_policy = enum_blueprint.get_routing_policy()
replaced_policy = FullRoutingPolicy(
indefinite_policies=[clp], definite_policy=existing_policy.definite_policy
)
enum_blueprint.set_routing_policy(replaced_policy)

# Ensure the provisioning is as expected.
enum_blueprint.set_aurora_provisioning(Provisioning("db.t4g.medium", 2))
enum_blueprint.set_redshift_provisioning(Provisioning("dc2.large", 2))

# 6. Transition to the new blueprint.
modified_blueprint = enum_blueprint.to_blueprint()
asyncio.run(run_transition(config, blueprint_mgr, modified_blueprint))


if __name__ == "__main__":
main()
1 change: 1 addition & 0 deletions workloads/IMDB_extended/run_repeating_analytics.py
Original file line number Diff line number Diff line change
Expand Up @@ -471,6 +471,7 @@ def main():
help="Path to the distribution of number of clients for each period of a day",
)
parser.add_argument("--num-clients", type=int, default=1)
parser.add_argument("--client-offset", type=int, default=0)
parser.add_argument("--avg-gap-s", type=float)
parser.add_argument("--avg-gap-std-s", type=float, default=0.5)
parser.add_argument(
Expand Down
1 change: 1 addition & 0 deletions workloads/IMDB_extended/run_transactions.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,7 @@ def main():
default=1,
help="The number of transactional clients.",
)
parser.add_argument("--client-offset", type=int, default=0)
parser.add_argument(
"--seed", type=int, default=42, help="Random seed for reproducibility."
)
Expand Down
2 changes: 1 addition & 1 deletion workloads/IMDB_extended/workload_utils/connect.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ def connect_to_db(
db = PyodbcDatabase(pyodbc.connect(os.environ[args.cstr_var], autocommit=True))

else:
port_offset = worker_index % args.num_front_ends
port_offset = (worker_index + args.client_offset) % args.num_front_ends
brad = BradGrpcClient(args.brad_host, args.brad_port + port_offset)
brad.connect()
db = BradDatabase(brad)
Expand Down

0 comments on commit 05d0103

Please sign in to comment.