Skip to content

Commit

Permalink
1.1.4a1 release with basic standalone answer support
Browse files Browse the repository at this point in the history
  • Loading branch information
george1459 committed Apr 17, 2024
1 parent 0cad706 commit 88e274c
Show file tree
Hide file tree
Showing 3 changed files with 80 additions and 39 deletions.
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

# Package metadata
name = "suql"
version = "1.1.4a0"
version = "1.1.4a1"
description = "Structured and Unstructured Query Language (SUQL) Python API"
author = "Shicheng Liu"
author_email = "[email protected]"
Expand Down
86 changes: 48 additions & 38 deletions src/suql/free_text_fcns_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,46 @@
# engine = "gpt-3.5-turbo-0613"


def _answer(source, query, type_prompt = None, k=5, max_input_token=3800, engine="gpt-3.5-turbo-0613"):
from suql.prompt_continuation import llm_generate
if not source:
return {"result": "no information"}

text_res = []
if isinstance(source, list):
documents = compute_top_similarity_documents(
source, query, top=k
)
for i in documents:
if num_tokens_from_string("\n".join(text_res + [i])) < max_input_token:
text_res.append(i)
else:
break
else:
text_res = [source]

type_prompt = ""
if type_prompt:
if type_prompt == "date":
type_prompt = f" Output in date format, for instance 2001-09-28."
if type_prompt == "int4":
type_prompt = f" Output an integer."

continuation, _ = llm_generate(
"prompts/answer_qa.prompt",
{
"reviews": text_res,
"question": query,
"type_prompt": type_prompt,
},
engine=engine,
max_tokens=200,
temperature=0.0,
stop_tokens=["\n"],
postprocess=False,
)
return {"result": continuation}

def start_free_text_fncs_server(
host="127.0.0.1", port=8500, k=5, max_input_token=3800, engine="gpt-3.5-turbo-0613"
):
Expand Down Expand Up @@ -64,45 +104,15 @@ def answer():
if "text" not in data or "question" not in data:
return None

if not data["text"]:
return {"result": "no information"}

text_res = []
if isinstance(data["text"], list):
documents = compute_top_similarity_documents(
data["text"], data["question"], top=k
)
for i in documents:
if num_tokens_from_string("\n".join(text_res + [i])) < max_input_token:
text_res.append(i)
else:
break
else:
text_res = [data["text"]]

type_prompt = ""
if "type_prompt" in data:
if data["type_prompt"] == "date":
type_prompt = f" Output in date format, for instance 2001-09-28."
if data["type_prompt"] == "int4":
type_prompt = f" Output an integer."

continuation, _ = llm_generate(
"prompts/answer_qa.prompt",
{
"reviews": text_res,
"question": data["question"],
"type_prompt": type_prompt,
},
engine=engine,
max_tokens=200,
temperature=0.0,
stop_tokens=["\n"],
postprocess=False,
return _answer(
data["text"],
data["question"],
type_prompt=data["type_prompt"] if "type_prompt" in data else None,
k = k,
max_input_token = max_input_token,
engine = engine
)

res = {"result": continuation}
return res


@app.route("/summary", methods=["POST"])
def summary():
Expand Down
31 changes: 31 additions & 0 deletions src/suql/sql_free_text_support/execute_free_text_sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import time
import traceback
import logging
import re
from collections import defaultdict
from copy import deepcopy
from typing import List, Union
Expand All @@ -24,6 +25,7 @@
from suql.postgresql_connection import execute_sql, execute_sql_with_column_info
from suql.prompt_continuation import llm_generate
from suql.utils import num_tokens_from_string
from suql.free_text_fcns_server import _answer

# System parameters, do not modify
_SET_FREE_TEXT_FCNS = ["answer"]
Expand Down Expand Up @@ -1558,6 +1560,31 @@ def _analyze_SelectStmt(
)


def _parse_standalone_answer(suql):
# Define a regular expression pattern to match the required format
# \s* allows for any number of whitespaces around the parentheses
pattern = r"\s*answer\s*\(\s*([a-zA-Z_0-9]+)\s*,\s*['\"](.+?)['\"]\s*\)\s*"

# Use the re.match function to check if the entire string matches the pattern
match = re.match(pattern, suql)

# If a match is found, return the captured groups: source and query
if match:
return match.group(1), match.group(2)
else:
return None

def _execute_standalone_answer(suql, source_file_mapping):
source, query = _parse_standalone_answer(suql)
if source not in source_file_mapping:
return None

with open(source_file_mapping[source], "r") as fd:
source_content = fd.read()

return _answer(source_content, query)


def suql_execute(
suql,
table_w_ids,
Expand All @@ -1572,6 +1599,7 @@ def suql_execute(
select_userpswd="select_user",
create_username="creator_role",
create_userpswd="creator_role",
source_file_mapping={},
):
"""
Main entry point to the SUQL Python-based compiler.
Expand Down Expand Up @@ -1645,6 +1673,9 @@ def suql_execute(
else:
logging.basicConfig(level=logging.CRITICAL + 1)

if _parse_standalone_answer(suql) is not None:
return _execute_standalone_answer(suql, source_file_mapping)

results, column_names, cache = _suql_execute_single(
suql,
table_w_ids,
Expand Down

0 comments on commit 88e274c

Please sign in to comment.