-
Notifications
You must be signed in to change notification settings - Fork 1
/
utils.py
137 lines (109 loc) · 4.21 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
from typing import List
import bibtexparser
import instructor
import pandas as pd
from bibtexparser.bparser import BibTexParser
from bibtexparser.customization import convert_to_unicode
from openai import OpenAI
from pydantic import BaseModel, RootModel
from pydantic import Field
class PaperInfo(BaseModel):
"""Data model for paper classification."""
evaluation_example: str
use_case: str
application_domain: str
technology_used: List[str] = Field(
description="List of all the technologies used in the paper"
)
class PaperInfoList(RootModel):
root: List[PaperInfo]
def send_text_to_chatgpt(
api_key,
user_message,
system_message="You are a helpful assistant.",
model="gpt-3.5-turbo",
):
"""Send a text prompt to the ChatGPT model and return the response."""
client = OpenAI(api_key=api_key)
try:
completion = client.chat.completions.create(
model=model,
messages=[
{
"role": "user",
"content": user_message,
},
],
)
# Extract the assistant's response
assistant_message = completion.choices[0].message.content
return assistant_message
except Exception as e:
return f"An error occurred: {e}"
def send_text_to_chatgpt_structured(
api_key,
user_message,
system_message="You are a helpful assistant.",
model="gpt-3.5-turbo",
) -> PaperInfo:
client = instructor.from_openai(
OpenAI(api_key=api_key),
mode=instructor.Mode.JSON,
)
paper_info = client.chat.completions.create(
model=model,
response_model=PaperInfo,
messages=[{"role": "user", "content": user_message}],
)
return paper_info
def read_bibtex(file_path):
"""Reads a BibTeX file and returns a list of entries."""
try:
with open(file_path, "r", encoding="utf-8") as bib_file:
parser = BibTexParser()
parser.customization = convert_to_unicode # Convert LaTeX characters
bib_data = bibtexparser.load(bib_file, parser=parser)
print(f"Loaded {len(bib_data.entries)} entries from {file_path}.")
return bib_data.entries
except FileNotFoundError:
print(f"File not found: {file_path}")
return []
except Exception as e:
print(f"An error occurred: {e}")
return []
def bibtex_to_dataframe(bib_entries):
"""Converts a list of BibTeX entries to a Pandas DataFrame."""
if not bib_entries:
print("No entries found to convert.")
return pd.DataFrame() # Return empty DataFrame if no entries
# Convert list of dictionaries to a DataFrame
df = pd.DataFrame(bib_entries)
# Optional: Fill NaN for missing columns if needed
df.fillna("N/A", inplace=True)
return df
def retrieve_additional_data(abstract, api_key):
"""Retrieve additional data from the abstract using GPT-3."""
prompt = f"""Given the abstract below, please provide additional insights or information.
present the Information as comma separated values. Give nothing else as output.
Additional information: Technology that was used, Evaluation Example, Use Case, Application Domain.
Abstract:
{abstract}
Technology that was used, Evaluation Example, Use Case, Application Domain
"""
return send_text_to_chatgpt(api_key, prompt)
def retrieve_additional_structured_data(abstract, api_key, model) -> PaperInfo:
"""Retrieve additional data from the abstract using GPT-3."""
prompt = f"Analyze the following abstract, try to be short and precise {abstract}"
return send_text_to_chatgpt_structured(
api_key=api_key, user_message=prompt, model=model
)
def iterate_over_entries(data, api_key) -> PaperInfoList:
analyzed_abstracts = []
for index, row in data.iterrows():
abstract = row["abstract"]
additional_data = retrieve_additional_structured_data(
abstract, api_key, model="meta-llama-3.1-70b-instruct"
)
print(f"Analyzed: {abstract[0:45]}...")
analyzed_abstracts.append(additional_data)
return PaperInfoList(analyzed_abstracts)