Skip to content

Commit

Permalink
Merge pull request #35 from parkervg/feature/dialect-rework
Browse files Browse the repository at this point in the history
`_dialect.py` Re-Work, `modifier` Argument for `LLMQA`, Documentation updates
  • Loading branch information
parkervg authored Oct 26, 2024
2 parents 7f5898f + 08e2a5c commit 3eccdea
Show file tree
Hide file tree
Showing 53 changed files with 2,903 additions and 499 deletions.
4 changes: 4 additions & 0 deletions KNOWN_ISSUES.MD
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
```sql
SELECT {{LLMMap('Total points?', 'w::score')}} AS Total, score FROM w
WHERE Total > 12
```
11 changes: 11 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,17 @@ SELECT COUNT(*) FROM parks
| 1 |
<hr>

_Give me some info about the park in the state that Sarah Palin was governor of._
```sql
SELECT "Name", "Location", "Description" FROM parks
WHERE Location = {{RAGQA('Which state was Sarah Palin governor of?')}}
```
| Name | Location | Description |
|:-----------|:-----------|:------------------------------------------------------------------------------------------------------------------------------------|
| Everglades | Alaska | The country's northernmost park protects an expanse of pure wilderness in Alaska's Brooks Range and has no park facilities. |
| Katmai | Alaska | This park on the Alaska Peninsula protects the Valley of Ten Thousand Smokes, an ash flow formed by the 1912 eruption of Novarupta. |
<hr>

_What's the difference in visitors for those parks with a superlative in their description vs. those without?_
```sql
SELECT SUM(CAST(REPLACE("Recreation Visitors (2022)", ',', '') AS integer)) AS "Total Visitors",
Expand Down
10 changes: 9 additions & 1 deletion blendsql/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,11 @@
from .blend import blend
from .ingredients import LLMQA, LLMMap, LLMJoin, LLMValidate, ImageCaption
from .ingredients import (
LLMQA,
LLMMap,
LLMJoin,
LLMValidate,
ImageCaption,
BingWebSearch,
RAGQA,
)
from . import _configure as config
7 changes: 7 additions & 0 deletions blendsql/_configure.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,13 @@
ASYNC_LIMIT_KEY = "BLENDSQL_ASYNC_LIMIT"
DEFAULT_ASYNC_LIMIT = "10"

MAX_OPTIONS_IN_PROMPT_KEY = "MAX_OPTIONS_IN_PROMPT"
DEFAULT_MAX_OPTIONS_IN_PROMPT = 50


def set_async_limit(n: int):
os.environ[ASYNC_LIMIT_KEY] = str(n)


def set_max_options_in_prompt(n: int):
os.environ[MAX_OPTIONS_IN_PROMPT_KEY] = str(n)
45 changes: 45 additions & 0 deletions blendsql/_constants.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from typing import NewType, Literal, Union, Optional, Dict
from enum import Enum, EnumMeta
from dataclasses import dataclass

Expand All @@ -12,12 +13,17 @@ def __contains__(cls, item):
DEFAULT_ANS_SEP = ";"
DEFAULT_NAN_ANS = "-"

# The 'modifier' arg can be either '*' or '+',
# or any string matching '{\d+}'
ModifierType = NewType("modifier", Union[Literal["*", "+"], str, None])


class IngredientType(str, Enum, metaclass=StrInMeta):
MAP = "MAP"
STRING = "STRING"
QA = "QA"
JOIN = "JOIN"
ALIAS = "ALIAS"


@dataclass
Expand All @@ -28,3 +34,42 @@ class IngredientKwarg:
OPTIONS: str = "options"
REGEX: str = "regex"
MODEL: str = "model"
OUTPUT_TYPE: str = "output_type"
EXAMPLE_OUTPUTS: str = "example_outputs"


@dataclass
class DataType:
_name: str
regex: Optional[str]
modifier: Optional[ModifierType]

@property
def name(self) -> str:
if self._name != "list" and self.modifier is not None:
return f"List[{self._name}]"
return self._name


@dataclass
class DataTypes:
BOOL = lambda modifier=None: DataType("bool", "(t|f)", modifier)
# SQLite max is 18446744073709551615
# This is 20 digits long, so to be safe, cap the generation at 18
INT = lambda modifier=None: DataType("int", "(\d{1,18})", modifier)
FLOAT = lambda modifier=None: DataType("float", "(\d(\d|\.)*)", modifier)
STR = lambda modifier=None: DataType("str", None, modifier)
LIST = lambda modifier="*": DataType("list", None, modifier)
ANY = lambda modifier=None: DataType("Any", None, modifier)


STR_TO_DATATYPE: Dict[str, DataType] = {
"str": DataTypes.STR(),
"int": DataTypes.INT(),
"float": DataTypes.FLOAT(),
"bool": DataTypes.BOOL(),
"List[str]": DataTypes.STR("*"),
"List[int]": DataTypes.INT("*"),
"List[float]": DataTypes.FLOAT("*"),
"List[bool]": DataTypes.BOOL("*"),
}
Loading

0 comments on commit 3eccdea

Please sign in to comment.