Skip to content

Commit

Permalink
New option --show-query for command example-queries (#87)
Browse files Browse the repository at this point in the history
The new option has three choices: `always`, `never`, and `on-error`. With `never`, only the query description, time, and result size are shown for each query, as before. With `always`, the SPARQL query is shown in addition for every query. With `on-error`, it is only shown when the query failed. The query is pretty-printed.
  • Loading branch information
hannahbast authored Nov 27, 2024
1 parent 4fe2447 commit 7104edf
Show file tree
Hide file tree
Showing 2 changed files with 95 additions and 38 deletions.
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ build-backend = "setuptools.build_meta"
[project]
name = "qlever"
description = "Script for using the QLever SPARQL engine."
version = "0.5.12"
version = "0.5.13"
authors = [
{ name = "Hannah Bast", email = "[email protected]" }
]
Expand Down
131 changes: 94 additions & 37 deletions src/qlever/commands/example_queries.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,25 @@ def additional_arguments(self, subparser) -> None:
default=14,
help="Width for printing the result size",
)
subparser.add_argument(
"--show-query",
choices=["always", "never", "on-error"],
default="never",
help="Show the queries that will be executed (always, never, on error)",
)

def pretty_print_query(self, query: str) -> None:
pretty_print_query_cmd = (
f"echo {shlex.quote(query)}"
f" | docker run -i --rm sparqling/sparql-formatter"
f" | sed '/^PREFIX /Id' | grep -v '^$'"
)
try:
query_pp = run_command(pretty_print_query_cmd, return_output=True)
log.info(colored(query_pp.rstrip(), "cyan"))
except Exception as e:
log.error(f"Failed to pretty-print query: {e}")
log.info(colored(query.rstrip(), "cyan"))

def execute(self, args) -> bool:
# We can't have both `--remove-offset-and-limit` and `--limit`.
Expand Down Expand Up @@ -262,6 +281,9 @@ def execute(self, args) -> bool:
# A bit of pretty-printing.
query = re.sub(r"\s+", " ", query)
query = re.sub(r"\s*\.\s*\}", " }", query)
if args.show_query == "always":
log.info("")
self.pretty_print_query(query)

# Launch query.
try:
Expand All @@ -282,55 +304,81 @@ def execute(self, args) -> bool:
params={"query": query},
result_file=result_file,
).strip()
if http_code != "200":
raise Exception(
f"HTTP code {http_code}" f" {Path(result_file).read_text()}"
)
time_seconds = time.time() - start_time
error_msg = None
if http_code == "200":
time_seconds = time.time() - start_time
error_msg = None
else:
error_msg = {
"short": f"HTTP code: {http_code}",
"long": re.sub(r"\s+", " ", Path(result_file).read_text()),
}
except Exception as e:
if args.log_level == "DEBUG":
traceback.print_exc()
error_msg = re.sub(r"\s+", " ", str(e))
error_msg = {
"short": "Exception",
"long": re.sub(r"\s+", " ", str(e)),
}

# Get result size (via the command line, in order to avoid loading
# a potentially large JSON file into Python, which is slow).
if error_msg is None:
try:
if args.download_or_count == "count":
if args.accept == "text/tab-separated-values":
result_size = run_command(
f"sed 1d {result_file}", return_output=True
)
else:
# CASE 0: Rhe result is empty despite a 200 HTTP code.
if Path(result_file).stat().st_size == 0:
result_size = 0
error_msg = {
"short": "Empty result",
"long": "curl returned with code 200, "
"but the result is empty",
}

# CASE 1: Just counting the size of the result (TSV or JSON).
elif args.download_or_count == "count":
if args.accept == "text/tab-separated-values":
result_size = run_command(
f"sed 1d {result_file}", return_output=True
)
else:
try:
result_size = run_command(
f'jq -r ".results.bindings[0]'
f" | to_entries[0].value.value"
f' | tonumber" {result_file}',
return_output=True,
)
except Exception as e:
error_msg = {
"short": "Malformed JSON",
"long": "curl returned with code 200, "
"but the JSON is malformed: "
+ re.sub(r"\s+", " ", str(e)),
}

# CASE 2: Downloading the full result (TSV, CSV, Turtle, JSON).
else:
if (
args.accept == "text/tab-separated-values"
or args.accept == "text/csv"
):
result_size = run_command(
f"sed 1d {result_file} | wc -l", return_output=True
)
elif args.accept == "text/turtle":
result_size = run_command(
f"sed '1d;/^@prefix/d;/^\\s*$/d' " f"{result_file} | wc -l",
return_output=True,
)
else:
if (
args.accept == "text/tab-separated-values"
or args.accept == "text/csv"
):
result_size = run_command(
f"sed 1d {result_file} | wc -l", return_output=True
)
elif args.accept == "text/turtle":
result_size = run_command(
f"sed '1d;/^@prefix/d;/^\\s*$/d' "
f"{result_file} | wc -l",
return_output=True,
)
else:
try:
result_size = run_command(
f'jq -r ".results.bindings | length"' f" {result_file}",
return_output=True,
)
result_size = int(result_size)
except Exception as e:
error_msg = str(e)
except Exception as e:
error_msg = {
"short": "Malformed JSON",
"long": re.sub(r"\s+", " ", str(e)),
}

# Remove the result file (unless in debug mode).
if args.log_level != "DEBUG":
Expand All @@ -341,6 +389,7 @@ def execute(self, args) -> bool:
description = description[: args.width_query_description - 3]
description += "..."
if error_msg is None:
result_size = int(result_size)
log.info(
f"{description:<{args.width_query_description}} "
f"{time_seconds:6.2f} s "
Expand All @@ -352,16 +401,24 @@ def execute(self, args) -> bool:
num_failed += 1
if (
args.width_error_message > 0
and len(error_msg) > args.width_error_message
and len(error_msg["long"]) > args.width_error_message
and args.log_level != "DEBUG"
and args.show_query != "on-error"
):
error_msg = error_msg[: args.width_error_message - 3]
error_msg += "..."
log.error(
error_msg["long"] = (
error_msg["long"][: args.width_error_message - 3] + "..."
)
seperator_short_long = "\n" if args.show_query == "on-error" else " "
log.info(
f"{description:<{args.width_query_description}} "
f"failed "
f"{colored(error_msg, 'red')}"
f"{colored('FAILED ', 'red')}"
f"{colored(error_msg['short'], 'red'):>{args.width_result_size}}"
f"{seperator_short_long}"
f"{colored(error_msg['long'], 'red')}"
)
if args.show_query == "on-error":
self.pretty_print_query(query)
log.info("")

# Check that each query has a time and a result size, or it failed.
assert len(result_sizes) == len(query_times)
Expand Down

0 comments on commit 7104edf

Please sign in to comment.