-
Notifications
You must be signed in to change notification settings - Fork 11
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Integration tests on formatter (#49)
* wip * Add Yann Lecun IIT Madras lecture * WIP * Update * Add uv.lock change * Fix for linting * Fix run pytest
- Loading branch information
1 parent
75587e8
commit f1e5a90
Showing
9 changed files
with
363 additions
and
327 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,100 @@ | ||
import typing | ||
|
||
from instructor import Instructor | ||
from pydantic import BaseModel, AfterValidator | ||
|
||
|
||
class LLMAdapter: | ||
def __init__(self, llm_client: Instructor, llm_model: str) -> None: | ||
self.llm_client = llm_client | ||
self.llm_model = llm_model | ||
|
||
def get_topic_changing_paragraph_indexes( | ||
self, paragraphs: typing.List[typing.List[str]] | ||
) -> typing.List[int]: | ||
def validate_paragraph_indexes(v: typing.List[int]) -> typing.List[int]: | ||
n = len(paragraphs) | ||
unique_values = set(v) | ||
if len(unique_values) != len(v): | ||
raise ValueError("All elements must be unique") | ||
for i in v: | ||
if i <= 0: | ||
raise ValueError( | ||
f"All elements must be greater than 0 and less than {n}. Paragraph index {i} is less than or equal to 0" | ||
) | ||
if i >= n: | ||
raise ValueError( | ||
f"All elements must be greater than 0 and less than {n}. Paragraph index {i} is greater or equal to {n}" | ||
) | ||
|
||
return v | ||
|
||
paragraph_texts = ["\n\n".join(p) for p in paragraphs] | ||
|
||
class Result(BaseModel): | ||
paragraph_indexes: typing.Annotated[ | ||
typing.List[int], AfterValidator(validate_paragraph_indexes) | ||
] | ||
|
||
result = self.llm_client.chat.completions.create( | ||
model=self.llm_model, | ||
response_model=Result, | ||
messages=[ | ||
{ | ||
"role": "system", | ||
"content": """ | ||
You are a smart assistant who reads paragraphs of text from an audio transcript and | ||
find the paragraphs that significantly change topic from the previous paragraph. | ||
Make sure only mark paragraphs that talks about a VERY DIFFERENT topic from the previous one. | ||
The response should be an array of the index number of such paragraphs, such as `[1, 3, 5]` | ||
If there is no paragraph that changes topic, then return an empty list. | ||
""", | ||
}, | ||
{ | ||
"role": "user", | ||
"content": """ | ||
{% for paragraph in paragraphs %} | ||
<paragraph {{ loop.index0 }}> | ||
{{ paragraph }} | ||
</ paragraph {{ loop.index0 }}> | ||
{% endfor %} | ||
""", | ||
}, | ||
], | ||
context={ | ||
"paragraphs": paragraph_texts, | ||
}, | ||
) | ||
return result.paragraph_indexes | ||
|
||
def generate_title_for_paragraphs( | ||
self, paragraphs: typing.List[typing.List[str]] | ||
) -> str: | ||
text = "\n\n".join(["".join(p) for p in paragraphs]) | ||
title = self.llm_client.chat.completions.create( | ||
model=self.llm_model, | ||
response_model=str, | ||
messages=[ | ||
{ | ||
"role": "system", | ||
"content": """ | ||
Please generate a short title for the following text. | ||
Be VERY SUCCINCT. No more than 6 words. | ||
""", | ||
}, | ||
{ | ||
"role": "user", | ||
"content": """ | ||
{{ text }} | ||
""", | ||
}, | ||
], | ||
context={ | ||
"text": text, | ||
}, | ||
) | ||
return title |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.