Skip to content

Commit

Permalink
add tables and cols
Browse files Browse the repository at this point in the history
  • Loading branch information
noah-art3mis committed Jul 29, 2024
1 parent 0711af1 commit 1257196
Show file tree
Hide file tree
Showing 7 changed files with 42 additions and 38 deletions.
11 changes: 4 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@

Lightweight prompt evaluation package.

Uses [ollama](https://github.com/ollama/ollama-python) to run LLMs locally if needed.
Use online. Can also be used locally through streamlit. Can use [ollama](https://github.com/ollama/ollama-python) to run LLMs locally if necessary.

Cost estimation gives very rough estimates (input times 2).
Cost estimation is very rough (input \* 2).

## How to use

Expand Down Expand Up @@ -59,11 +59,8 @@ Cost estimation gives very rough estimates (input times 2).

## TODO

- add 'get report' to runner
- fix: urgent does not call proper model
- turn query into an interface
- save logs in txt
- add [asyncio](https://github.com/ollama/ollama-python?tab=readme-ov-file#async-client)
- add tables
- add tests

## Resources

Expand Down
2 changes: 1 addition & 1 deletion poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ tiktoken = "^0.7.0"
python-dotenv = "^1.0.1"
ollama = "^0.2.1"
streamlit = "^1.37.0"
pandas = "^2.2.2"


[tool.poetry.group.dev.dependencies]
Expand Down
Binary file added simulacro_1.jpeg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
10 changes: 1 addition & 9 deletions src/crucible/classes/Runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,12 +124,4 @@ def calculate_costs(self) -> float:
def _check_exists(self, *args) -> None:
for arg in args:
if arg is None:
raise ValueError(f"No globals provided for {arg}")

def generate_report(self) -> str:
for task in self.tasks:
self.report.add_result(task)

self.report.calculate_total_cost()

return self.report.generate_report()
raise ValueError(f"No globals provided for {arg}")
53 changes: 34 additions & 19 deletions src/crucible/streamlit_app.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
import pandas as pd
import streamlit as st

from crucible.utils.grading import GradingType
from crucible.classes.Runner import Runner
from crucible.classes.Model import Source
Expand All @@ -22,8 +24,7 @@
st.caption("Lightweight prompt evaluation")
st.caption("An AUTOMATON tool")

st.markdown("---")
st.subheader("Configuration")
st.header("Configuration")

with st.expander("Models"):

Expand Down Expand Up @@ -69,7 +70,7 @@
prompts.append(Prompt(prompt_id, prompt_slot, prompt_content))


with st.expander("Variable"):
with st.expander("Variables"):
n_var = st.number_input("Number of variables", min_value=1, max_value=20, value=2)
variables = []
for i in range(int(n_var)):
Expand All @@ -91,10 +92,11 @@

st.markdown("---")
st.subheader("Other configs")
grading_type = st.selectbox(
"Select grading type", available_gradings, format_func=lambda x: x.name
a1, a2 = st.columns(2)
grading_type = a1.selectbox(
"Select grading type", available_gradings, format_func=lambda x: x.name.lower()
)
temperature = st.slider("Select temperature", 0.0, 1.0, 0.0, 0.2)
temperature = a2.slider("Select temperature", 0.0, 1.0, 0.0, 0.2)


st.markdown("---")
Expand All @@ -110,7 +112,7 @@ def click_button():
st.button("Compile", on_click=click_button)

if st.session_state.compiled:
st.subheader("Summary:")
st.header("Summary:")

runner = Runner(
models=models,
Expand All @@ -124,19 +126,20 @@ def click_button():
)
estimated_costs = runner.estimate_all_costs()

st.markdown("- **Models**")
c1, c2, c3 = st.columns(3)
c1.markdown("**Models**")
for model in runner.models:
st.markdown(f"\t- {model.id}")
c1.markdown(f"\t- {model.id}")

st.markdown("- **Prompts**")
c2.markdown("**Prompts**")
for prompt in runner.prompts:
st.markdown(f"\t- {prompt.id}")
c2.markdown(f"\t- {prompt.id}")

st.markdown("- **Variables**")
c3.markdown("**Variables**")
for variable in runner.variables:
st.markdown(f"\t- {variable.id}")
c3.markdown(f"\t- {variable.id}")

st.markdown(f"**Grading Type**: {runner.grading_type}")
st.markdown(f"**Grading Type**: {runner.grading_type.name.lower()}")
st.markdown(f"**Temperature**: {runner.temperature}")
st.markdown(f"**Total cases**: {len(runner.tasks)}")
st.markdown(f"**Estimated costs**: ${estimated_costs:.2f} USD")
Expand All @@ -153,9 +156,21 @@ def click_button():
progress_bar.progress(progress, text="Running CRUCIBLE...")

report = Report(runner)
result = report.__dict__
st.subheader("Results:")
st.write(result)

# Uncomment and implement if needed
# st.download_button("Download report", file)
st.header("Results:")
col1, col2 = st.columns(2)
col1.metric("Cost (USD)", report.cost)
col2.metric("Time (seconds)", report.time)

st.subheader("Per model:")
st.data_editor(pd.DataFrame(report.per_model))
st.subheader("Per prompt:")
st.data_editor(pd.DataFrame(report.per_prompt))
st.subheader("Per variable:")
st.data_editor(pd.DataFrame(report.per_variable))

st.subheader("All tasks:")
st.data_editor(pd.DataFrame(report.tasks))

st.subheader("Result(json):")
st.write(report.__dict__)
3 changes: 1 addition & 2 deletions src/crucible/utils/grading.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,11 +41,10 @@ def grade_response(
content=GRADING_PROMPT.replace("{expected}", variable.expected[0]),
)

response = _model.query(
response, cost = _model.query(
prompt=_prompt,
variable=_variable,
temp=0,
danger_mode=True,
api_key=openai_api_key,
)

Expand Down

0 comments on commit 1257196

Please sign in to comment.