forked from rawsh/mirrorllm
-
Notifications
You must be signed in to change notification settings - Fork 0
/
test_vllm_prm.py
117 lines (95 loc) · 4.18 KB
/
test_vllm_prm.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
from openai import AsyncOpenAI
import asyncio
import math
class RewardModelClient:
def __init__(self):
self.client = AsyncOpenAI(
base_url="https://rawsh--vllm-qwen-prm-serve.modal.run/v1/",
api_key="9FF74944EED19865193F979942FB1"
)
self.model_name = "MetaMath-Qwen2.5-0.5b-PRM"
async def get_token_probability(self, response) -> float:
"""Extract probability of + token from response"""
logprobs = response.choices[0].logprobs.content[0].top_logprobs
# Print tokens and their probabilities for debugging
token_probs = {lp.token: math.exp(lp.logprob) for lp in logprobs}
print("Available tokens and probs:", token_probs)
# Get raw probabilities, defaulting to very small number if token not found
prob_plus = next((math.exp(lp.logprob) for lp in logprobs if lp.token == "+"), 1e-10)
prob_minus = next((math.exp(lp.logprob) for lp in logprobs if lp.token == "-"), 1e-10)
# Normalize between + and -
return prob_plus / (prob_plus + prob_minus) if (prob_plus + prob_minus) > 0 else 0.5
async def evaluate_steps(self, question: str, steps: list[str]) -> list[float]:
"""
Evaluate each step in the solution getting probabilities of + vs -
Returns probability of + for each step
"""
probabilities = []
# First evaluate question + first step
messages = [
{"role": "user", "content": f"{question}\n{steps[0]}"}
]
try:
response = await self.client.chat.completions.create(
model=self.model_name,
messages=messages,
max_tokens=1,
temperature=0,
logprobs=True,
top_logprobs=20
)
prob = await self.get_token_probability(response)
probabilities.append(prob)
except Exception as e:
print(f"Error evaluating first step: {str(e)}")
probabilities.append(0.5)
# For remaining steps
for i in range(1, len(steps)):
try:
# Build conversation including previous steps
messages = [
{"role": "user", "content": f"{question}\n{steps[0]}"}
]
for prev_step in steps[1:i]:
messages.extend([
{"role": "assistant", "content": "+"},
{"role": "user", "content": prev_step}
])
messages.append({"role": "assistant", "content": "+"})
messages.append({"role": "user", "content": steps[i]})
response = await self.client.chat.completions.create(
model=self.model_name,
messages=messages,
max_tokens=1,
temperature=0,
logprobs=True,
top_logprobs=20
)
prob = await self.get_token_probability(response)
probabilities.append(prob)
except Exception as e:
print(f"Error evaluating step {i+1}: {str(e)}")
probabilities.append(0.5)
return probabilities
async def main():
# Initialize client
reward_model = RewardModelClient()
# Example problem
question = "Janet has 3 apples and buys 2 more. How many apples does she have?"
steps = [
"Step 1: If Janet has 3 apples and buys 2 more, total apples = 3 + 2 = 5.",
"Step 2: Therefore, Janet has 5 apples. The answer is: 5",
]
try:
# Get evaluations
probabilities = await reward_model.evaluate_steps(question, steps)
# Print results
print("\nResults:")
print("Question:", question)
print("\nStep Evaluations:")
for step, prob in zip(steps, probabilities):
print(f"P(+) = {prob:.3f}: {step}")
except Exception as e:
print(f"Error occurred: {str(e)}")
if __name__ == "__main__":
asyncio.run(main())