Skip to content

Commit

Permalink
Merge branch 'stanfordnlp:main' into main
Browse files Browse the repository at this point in the history
  • Loading branch information
mlederbauer authored Mar 26, 2024
2 parents d195871 + c8375fc commit fe3e2d0
Show file tree
Hide file tree
Showing 13 changed files with 598 additions and 459 deletions.
13 changes: 7 additions & 6 deletions docs/docs/building-blocks/1-language_models.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,6 @@ For example, to use OpenAI language models, you can do it as follows.
gpt3_turbo = dspy.OpenAI(model='gpt-3.5-turbo-1106', max_tokens=300)
dspy.configure(lm=gpt3_turbo)
```
**Output:**
```text
['Hello! How can I assist you today?']
```

## Directly calling the LM.

Expand All @@ -31,11 +27,16 @@ You can simply call the LM with a string to give it a raw prompt, i.e. a string.
gpt3_turbo("hello! this is a raw prompt to GPT-3.5")
```

**Output:**
```text
['Hello! How can I assist you today?']
```

This is almost never the recommended way to interact with LMs in DSPy, but it is allowed.

## Using the LM with DSPy signatures.

You can also use the LM via DSPy [signatures] and [modules], which we discuss in more depth in the remaining guides.
You can also use the LM via DSPy [`signature` (input/output spec)](https://dspy-docs.vercel.app/docs/building-blocks/signatures) and [`modules`](https://dspy-docs.vercel.app/docs/building-blocks/modules), which we discuss in more depth in the remaining guides.

```python
# Define a module (ChainOfThought) and assign it a signature (return an answer, given a question).
Expand Down Expand Up @@ -172,4 +173,4 @@ model = 'dist/prebuilt/mlc-chat-Llama-2-7b-chat-hf-q4f16_1'
model_path = 'dist/prebuilt/lib/Llama-2-7b-chat-hf-q4f16_1-cuda.so'

llama = dspy.ChatModuleClient(model=model, model_path=model_path)
```
```
10 changes: 1 addition & 9 deletions dsp/modules/azure_openai.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,6 @@
import logging

# Configure logging
logging.basicConfig(
level=logging.INFO,
format="%(message)s",
handlers=[logging.FileHandler("azure_openai_usage.log")],
)

import functools
import json
import logging
from typing import Any, Literal, Optional, cast

import backoff
Expand Down
11 changes: 0 additions & 11 deletions dsp/modules/databricks.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,3 @@
import logging

# Configure logging
logging.basicConfig(
level=logging.INFO,
format='%(message)s',
handlers=[
logging.FileHandler('openai_usage.log'),
],
)

import functools
import json
from typing import Literal, Optional
Expand Down
10 changes: 1 addition & 9 deletions dsp/modules/gpt3.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,6 @@
import logging

# Configure logging
logging.basicConfig(
level=logging.INFO,
format="%(message)s",
handlers=[logging.FileHandler("openai_usage.log")],
)

import functools
import json
import logging
from typing import Any, Literal, Optional, cast

import backoff
Expand Down
1 change: 1 addition & 0 deletions dspy/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

# Functional must be imported after primitives, predict and signatures
from .functional import * # isort: skip
from .utils.logging import logger, set_log_level, set_log_output

settings = dsp.settings

Expand Down
7 changes: 7 additions & 0 deletions dspy/datasets/dataloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,13 @@ def from_json(self, file_path:str, fields: List[str] = None, input_keys: Tuple[s

return [dspy.Example({field:row[field] for field in fields}).with_inputs(*input_keys) for row in dataset]

def from_parquet(self, file_path: str, fields: List[str] = None, input_keys: Tuple[str] = ()) -> List[dspy.Example]:
dataset = load_dataset("parquet", data_files=file_path)["train"]

if not fields:
fields = list(dataset.features)

return [dspy.Example({field: row[field] for field in fields}).with_inputs(input_keys) for row in dataset]

def sample(
self,
Expand Down
44 changes: 21 additions & 23 deletions dspy/predict/program_of_thought.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,20 +8,22 @@


class ProgramOfThought(Module):
def __init__(self, signature, max_iters=3):
def __init__(self, signature, max_iters=3, import_white_list=None):
super().__init__()
self.signature = signature = ensure_signature(signature)
self.max_iters = max_iters
self.import_white_list = import_white_list

self.input_fields = signature.input_fields
self.output_fields = signature.output_fields

assert len(self.output_fields) == 1, "PoT only supports one output field."

self.output_field_name = next(iter(self.output_fields))
inputs_ = ", ".join(
[f"`{field_name}`" for field_name in self.input_fields.keys()],
)
outputs_ = ", ".join(
[f"`{field_name}`" for field_name in self.output_fields.keys()],
)
outputs_ = f"`{self.output_field_name}`"

assert len(self.output_fields) == 1, "PoT only supports one output field."

Expand Down Expand Up @@ -55,7 +57,6 @@ def __init__(self, signature, max_iters=3):
self._generate_instruction("answer"),
),
)

def _generate_signature(self, mode):
signature_dict = dict(self.input_fields)
fields_for_mode = {
Expand Down Expand Up @@ -92,7 +93,7 @@ def _generate_signature(self, mode):
prefix="Code Output:",
desc="output of previously-generated python code",
),
"answer": self.signature.fields["answer"],
self.output_field_name: self.signature.fields[self.output_field_name],
},
}
signature_dict.update(fields_for_mode[mode])
Expand All @@ -105,12 +106,7 @@ def _generate_instruction(self, mode):
for field_name in self._generate_signature(mode).input_fields
],
)
mode_outputs = ", ".join(
[
f"`{field_name}`"
for field_name in self._generate_signature(mode).output_fields
],
)
mode_outputs = f"`{self.output_field_name}`"
if mode == "generate":
instr = [
f"You will be given {mode_inputs} and you will respond with {mode_outputs}.",
Expand All @@ -120,7 +116,7 @@ def _generate_instruction(self, mode):
elif mode == "regenerate":
instr = [
f"You are given {mode_inputs} due to an error in previous code.",
f"Your task is to correct the error and provide the new {mode_outputs}.",
"Your task is to correct the error and provide the new `generated_code`.",
]
else: # mode == 'answer'
instr = [
Expand All @@ -129,6 +125,7 @@ def _generate_instruction(self, mode):

return "\n".join(instr)


def parse_code(self, code_data):
code = (
code_data.get("generated_code", "").split("---", 1)[0].split("\n\n\n", 1)[0]
Expand Down Expand Up @@ -156,32 +153,33 @@ def execute_code(self, code):
if not code:
return code, None, "Error: Empty code before execution."
code_prompt = CodePrompt(code, code_type="python")
interpreter = PythonInterpreter(action_space={"print": print})
interpreter = PythonInterpreter(action_space={"print": print}, import_white_list=self.import_white_list)
try:
output = str(code_prompt.execute(interpreter=interpreter)[0])
print
return code, output, None
except Exception as e:
return code, None, str(e)

def forward(self, **kwargs):
code_data = self.code_generate(question=kwargs["question"])
input_kwargs = {
field_name: kwargs[field_name] for field_name in self.input_fields
}
code_data = self.code_generate(**input_kwargs)
parsed_code, error = self.parse_code(code_data)
# FIXME: Don't try to execute the code if it didn't parse
code, output, error = self.execute_code(parsed_code)
hop = 0
while hop < self.max_iters and error:
print("Error in code execution")
code_data = self.code_regenerate(
question=kwargs["question"], previous_code=code, error=error,
)
input_kwargs.update({"previous_code": code, "error": error})
code_data = self.code_regenerate(**input_kwargs)
parsed_code, error = self.parse_code(code_data)
# FIXME: Don't try to execute the code if it didn't parse
code, output, error = self.execute_code(parsed_code)
hop += 1
if hop == self.max_iters:
print("Max hops reached. Error persists.")
return None
answer_gen_result = self.generate_answer(
question=kwargs["question"], final_generated_code=code, code_output=output,
)
return answer_gen_result
input_kwargs.update({"final_generated_code": code, "code_output": output})
answer_gen_result = self.generate_answer(**input_kwargs)
return answer_gen_result
30 changes: 21 additions & 9 deletions dspy/retrieve/weaviate_rm.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@

try:
import weaviate
import weaviate.classes as wvc
from weaviate.collections.classes.grpc import HybridFusion
except ImportError:
raise ImportError(
"The 'weaviate' extra is required to use WeaviateRM. Install it with `pip install dspy-ai[weaviate]`",
Expand All @@ -22,6 +24,9 @@ class WeaviateRM(dspy.Retrieve):
weaviate_collection_name (str): The name of the Weaviate collection.
weaviate_client (WeaviateClient): An instance of the Weaviate client.
k (int, optional): The default number of top passages to retrieve. Defaults to 3.
weaviate_collection_text_key (str, optional): The key in the collection with the content. Defaults to content.
weaviate_alpha (float, optional): The alpha value for the hybrid query. Defaults to 0.5.
weaviate_fusion_type (wvc.HybridFusion, optional): The fusion type for the query. Defaults to RELATIVE_SCORE.
Examples:
Below is a code snippet that shows how to use Weaviate as the default retriver:
Expand All @@ -44,16 +49,20 @@ class WeaviateRM(dspy.Retrieve):

def __init__(self,
weaviate_collection_name: str,
weaviate_client: weaviate.Client,
weaviate_client: weaviate.WeaviateClient,
k: int = 3,
weaviate_collection_text_key: Optional[str] = "content",
weaviate_alpha: Optional[float] = 0.5,
weaviate_fusion_type: Optional[HybridFusion] = HybridFusion.RELATIVE_SCORE,
):
self._weaviate_collection_name = weaviate_collection_name
self._weaviate_client = weaviate_client
self._weaviate_collection_text_key = weaviate_collection_text_key
self._weaviate_alpha = weaviate_alpha
self._weaviate_fusion_type = weaviate_fusion_type
super().__init__(k=k)

def forward(self, query_or_queries: Union[str, List[str]], k: Optional[int]) -> dspy.Prediction:
def forward(self, query_or_queries: Union[str, List[str]], k: Optional[int] = None) -> dspy.Prediction:
"""Search with Weaviate for self.k top passages for query
Args:
Expand All @@ -72,14 +81,17 @@ def forward(self, query_or_queries: Union[str, List[str]], k: Optional[int]) ->
queries = [q for q in queries if q]
passages = []
for query in queries:
results = self._weaviate_client.query\
.get(self._weaviate_collection_name, [self._weaviate_collection_text_key])\
.with_hybrid(query=query)\
.with_limit(k)\
.do()
collection = self._weaviate_client.collections.get(self._weaviate_collection_name)
results = collection.query.hybrid(query=query,
limit=k,
alpha=self._weaviate_alpha,
fusion_type=self._weaviate_fusion_type,
return_metadata=wvc.query.MetadataQuery(
distance=True, score=True),
)

results = results["data"]["Get"][self._weaviate_collection_name]
parsed_results = [result[self._weaviate_collection_text_key] for result in results]
parsed_results = [result.properties[self._weaviate_collection_text_key] for result in results.objects]
passages.extend(dotdict({"long_text": d}) for d in parsed_results)

# Return type not changed, needs to be a Prediction object. But other code will break if we change it.
return passages
1 change: 1 addition & 0 deletions dspy/utils/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
from .dummies import *
from .logging import *
103 changes: 103 additions & 0 deletions dspy/utils/logging.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@

import logging
import os
import sys
import typing as t

import structlog

logger = structlog.get_logger()

class LogSettings:
def __init__(self, level: str, output_type: str, method: str, file_name: t.Optional[str]) -> None:
self.level = level
self.output_type = output_type
self.method = method
self.file_name = file_name
self._configure_structlog()

def _configure_structlog(self):

if self.output_type == "str":
renderer = structlog.dev.ConsoleRenderer()
else:
renderer = structlog.processors.JSONRenderer()

structlog.configure(
processors=[
structlog.stdlib.add_logger_name,
structlog.stdlib.add_log_level,
structlog.processors.CallsiteParameterAdder(
{
structlog.processors.CallsiteParameter.FILENAME,
structlog.processors.CallsiteParameter.LINENO,
},
),
structlog.processors.TimeStamper(fmt="iso"),
structlog.processors.StackInfoRenderer(),
structlog.processors.format_exc_info,
structlog.processors.UnicodeDecoder(),
renderer,
],
logger_factory=structlog.stdlib.LoggerFactory(),
wrapper_class=structlog.stdlib.BoundLogger,
)

def set_log_level(self, level: str) -> None:
"""Set the logging level."""

level = level.upper()
if level not in ["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"]:
raise ValueError("log level provider ({level}) is not one of DEBUG, INFO, WARNING, ERROR, CRITICAL")

self.level = level

log_level = getattr(logging, level)
logger.setLevel(log_level)

def set_log_output(self, method: t.Optional[str] = None, file_name: t.Optional[str] = None, output_type: t.Optional[str] = None):

if method is not None and method not in ["console", "file"]:
raise ValueError("method provided can only be 'console', 'file'")

if method == "file" and file_name is None:
raise ValueError("file_name must be provided when method = 'file'")

if method is not None:
self.method = method
self.file_name = file_name

if output_type is not None and output_type not in ["str", "json"]:
raise ValueError("output_type provided can only be 'str', 'json'")

if output_type is not None:
self.output_type = output_type

# Update Renderer
self._configure_structlog()

# Grab the root logger
log = logging.getLogger()
for handler in log.handlers[:]:
log.removeHandler(handler)

# Add new Handler
if self.method == "file":
assert self.file_name is not None
log.addHandler(logging.FileHandler(self.file_name))
else:
log.addHandler(logging.StreamHandler(sys.stdout))


level = os.environ.get("log_level", "info").upper()

# Set Defaults
logging.basicConfig(
format="%(message)s",
stream=sys.stdout,
level=level,
)

settings = LogSettings(level=level, output_type="str", method="console", file_name=None)
set_log_level = settings.set_log_level
set_log_output = settings.set_log_output
Loading

0 comments on commit fe3e2d0

Please sign in to comment.