-
Notifications
You must be signed in to change notification settings - Fork 131
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
contrib: OpenAI code generation (#675)
* code generation chain added * reviews incorporated * dag.png updated --------- Co-authored-by: zilto <tjean@DESKTOP-V6JDCS2>
- Loading branch information
Showing
6 changed files
with
170 additions
and
0 deletions.
There are no files selected for viewing
37 changes: 37 additions & 0 deletions
37
contrib/hamilton/contrib/user/zilto/llm_generate_code/README.md
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
# Purpose of this module | ||
|
||
This module uses the OpenAI completion API to generate code. | ||
|
||
For any language, you can request `generated_code` to get the generated response. If you are generating Python code, you can execute it in a subprocess by requesting `execution_output` and `execution_error`. | ||
|
||
## Example | ||
```python | ||
from hamilton import driver | ||
import __init__ as llm_generate_code | ||
|
||
dr = driver.Builder().with_modules(llm_generate_code).build() | ||
|
||
dr.execute( | ||
["execution_output", "execution_error"], | ||
inputs=dict( | ||
query="Retrieve the primary type from a `typing.Annotated` object`", | ||
) | ||
) | ||
``` | ||
|
||
## Configuration Options | ||
### Config.when | ||
This module doesn't receive configurations. | ||
|
||
### Inputs | ||
- `query`: The query for which you want code generated. | ||
- `api_key`: Set the OpenAI API key to use. If None, read the environment variable `OPENAI_API_KEY` | ||
- `code_language`: Set the code language to generate the reponse in. Defaults to `python` | ||
|
||
### Overrides | ||
- `prompt_template_to_generate_code`: Create a new prompt template with the fields `query` and `code_language`. | ||
- `prompt_to_generate_code`: Manually provide a prompt to generate Python code | ||
|
||
## Extension / Limitations | ||
- Executing arbitrary generated code is a security risk. Proceed with caution. | ||
- You need to manually install dependencies for your generated code to be executed (i.e., you need to `pip install pandas` yourself) |
127 changes: 127 additions & 0 deletions
127
contrib/hamilton/contrib/user/zilto/llm_generate_code/__init__.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,127 @@ | ||
import logging | ||
import os | ||
import subprocess | ||
from typing import Optional | ||
|
||
from hamilton.function_modifiers import extract_fields | ||
|
||
logger = logging.getLogger(__name__) | ||
|
||
from hamilton import contrib | ||
|
||
with contrib.catch_import_errors(__name__, __file__, logger): | ||
import openai | ||
|
||
|
||
def llm_client(api_key: Optional[str] = None) -> openai.OpenAI: | ||
"""Create an OpenAI client.""" | ||
if api_key is None: | ||
api_key = os.environ.get("OPENAI_API_KEY") | ||
|
||
return openai.OpenAI(api_key=api_key) | ||
|
||
|
||
def prompt_template_to_generate_code() -> str: | ||
"""Prompt template to generate code. | ||
It must include the fields `code_language` and `query`. | ||
""" | ||
return """Write some {code_language} code to solve the user's problem. | ||
Return only python code in Markdown format, e.g.: | ||
```{code_language} | ||
.... | ||
``` | ||
user problem | ||
{query} | ||
{code_language} code | ||
""" | ||
|
||
|
||
def prompt_to_generate_code( | ||
prompt_template_to_generate_code: str, query: str, code_language: str = "python" | ||
) -> str: | ||
"""Fill the prompt template with the code language and the user query.""" | ||
return prompt_template_to_generate_code.format( | ||
query=query, | ||
code_language=code_language, | ||
) | ||
|
||
|
||
def response_generated_code(llm_client: openai.OpenAI, prompt_to_generate_code: str) -> str: | ||
"""Call the OpenAI API completion endpoint with the prompt to generate code.""" | ||
response = llm_client.completions.create( | ||
model="gpt-3.5-turbo-instruct", | ||
prompt=prompt_to_generate_code, | ||
) | ||
return response.choices[0].text | ||
|
||
|
||
def parsed_generated_code(response_generated_code: str, code_language: str = "python") -> str: | ||
"""Retrieve the code section from the generated text.""" | ||
_, _, lower_part = response_generated_code.partition(f"```{code_language}") | ||
code_part, _, _ = lower_part.partition("```") | ||
return code_part | ||
|
||
|
||
def code_prepared_for_execution(parsed_generated_code: str, code_language: str = "python") -> str: | ||
"""If code is Python, append to it statements prepare it to be run in a subprocess. | ||
We collect all local variables in a directory and filter out Python builtins to keep | ||
only the variables from the generated code. print() is used to send string data from | ||
the subprocess back to the parent proceess via its `stdout`. | ||
""" | ||
|
||
if code_language != "python": | ||
raise ValueError("Can only execute the generated code if `code_language` = 'python'") | ||
|
||
code_to_get_vars = ( | ||
"excluded_vars = { 'excluded_vars', '__builtins__', '__annotations__'} | set(dir(__builtins__))\n" | ||
"local_vars = {k:v for k,v in locals().items() if k not in excluded_vars}\n" | ||
"print(local_vars)" | ||
) | ||
|
||
return parsed_generated_code + code_to_get_vars | ||
|
||
|
||
@extract_fields( | ||
dict( | ||
execution_output=str, | ||
execution_error=str, | ||
) | ||
) | ||
def executed_output(code_prepared_for_execution: str) -> dict: | ||
"""Execute the generated Python code + appended utilities in a subprocess. | ||
The output and errors from the code are collected as strings. Executing | ||
the code in a subprocess provides isolation, but isn't a security guarantee. | ||
""" | ||
process = subprocess.Popen( | ||
["python", "-c", code_prepared_for_execution], | ||
stdout=subprocess.PIPE, | ||
stderr=subprocess.PIPE, | ||
universal_newlines=True, | ||
) | ||
output, errors = process.communicate() | ||
return dict(execution_output=output, execution_error=errors) | ||
|
||
|
||
# run as a script to test dataflow | ||
if __name__ == "__main__": | ||
import __init__ as llm_generate_code | ||
|
||
from hamilton import driver | ||
|
||
dr = driver.Builder().with_modules(llm_generate_code).build() | ||
|
||
dr.display_all_functions("dag.png", orient="TB") | ||
|
||
res = dr.execute( | ||
["execution_output", "execution_error"], | ||
overrides=dict(generated_code="s = 'hello world'"), | ||
) | ||
|
||
print(res) |
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
1 change: 1 addition & 0 deletions
1
contrib/hamilton/contrib/user/zilto/llm_generate_code/requirements.txt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
openai |
5 changes: 5 additions & 0 deletions
5
contrib/hamilton/contrib/user/zilto/llm_generate_code/tags.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
{ | ||
"schema": "1.0", | ||
"use_case_tags": ["LLM", "OpenAI", "code generation"], | ||
"secondary_tags": {} | ||
} |
Empty file.