-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdivide.py
105 lines (78 loc) · 4.09 KB
/
divide.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
import torch
import vllm
import os
import re
import json
from openreviewer.common import vicuna_system_prompt
from openreviewer.utils import build_vicuna_input
# messages = [
# ["USER", "Put your prompt here"],
# ["ASSISTANT", "This part is not used when producing prompt."]
# ]
# _, prompt, _ = build_vicuna_input(messages, vicuna_system_prompt)
# print(f"prompt:\n{prompt}")
# model_path = "/root/autodl-tmp/model/vicuna-7b-v1.5-16k"
# gpu_memory_utilization = 0.8
# llm = vllm.LLM(model=model_path, tensor_parallel_size=torch.cuda.device_count(), gpu_memory_utilization=gpu_memory_utilization)
# prompts = [prompt] # you can put all prompts in this list
# sampling_params = vllm.SamplingParams(
# n=2, # num samples
# temperature=0.7,
# max_tokens=4096
# )
# outputs = llm.generate(prompts, sampling_params)
# all_reponses = [[output.text for output in output.outputs] for output in outputs]
# # len(all_reponses) == len(prompts)
# # len(reponses) == num_samples
# for prompt, reponses in zip(prompts, all_reponses):
# print(prompt)
# print(reponses)
# print()
model_path = "/root/autodl-tmp/model/vicuna-7b-v1.5-16k"
gpu_memory_utilization = 0.8
llm = vllm.LLM(model=model_path, tensor_parallel_size=torch.cuda.device_count(), gpu_memory_utilization=gpu_memory_utilization)
def get_divided_text(text, category):
messages = [
["USER", "The input is the"+category+"part of a review of a paper. "+"Hierarchically analyze this input passage after 'Input:' and provide a breakdown using points 1, 2, 3, and so on. Your output shouldn't add or delete any words comparing with the input. You only need to add breakdown numbers to the iuput. /n For example, if my input is 'This paper proposed an end-to-end general-purpose any-to-any MM-LLM system, NExT-GPT, by connecting an LLM with multimodal adaptors and different diffusion decoders.', your output should be: '1.This paper proposed an end-to-end general-purpose any-to-any MM-LLM system, NExT-GPT /n 2.by connecting an LLM with multimodal adaptors and different diffusion decoders. /n'. /n Input:/n" + text["value"]],
["ASSISTANT", "This part is not used when producing prompt."]
]
_, prompt, _ = build_vicuna_input(messages, vicuna_system_prompt)
prompts = [prompt] # you can put all prompts in this list
sampling_params = vllm.SamplingParams(
n=1, # num samples
temperature=0.7,
max_tokens=4096
)
outputs = llm.generate(prompts, sampling_params)
outputs = outputs[0].outputs[0].text
split_points = re.split(r'\d+\.', outputs)[1:]
result_list = [point.strip() for point in split_points if point.strip()]
return result_list
def reform_review(review):
reformed_review = dict()
reformed_review["summary"] = get_divided_text(review["summary"], "summary")
reformed_review["strengths"] = get_divided_text(review["strengths"], "strengths")
reformed_review["weaknesses"] = get_divided_text(review["weaknesses"], "weaknesses")
reformed_review["questions"] = get_divided_text(review["questions"], "questions")
reformed_review["soundness"] = review["soundness"]["value"]
reformed_review["presentation"] = review["presentation"]["value"]
reformed_review["rating"] = review["rating"]["value"]
reformed_review["contribution"] = review["contribution"]["value"]
reformed_review["confidence"] = review["confidence"]["value"]
return reformed_review
def extract_reviews_from_json(jsonString):
reviews = json.loads(jsonString)
data = []
for i in range(0, len(reviews)):
data.append({"Reviews": reform_review(reviews[i]["content"])})
return data
dir = '/root/autodl-tmp/workspace/openreviewer/data/iclr2024/reviews/'
target_dir = '/root/autodl-tmp/workspace/openreviewer/data/iclr2024/reviews_2/'
entries = os.listdir(dir)
for entry in entries:
if os.path.exists(target_dir + entry) == False :
print(entry)
with open(dir + entry, "r", encoding='utf-8') as f:
data = extract_reviews_from_json(f.read())
with open(target_dir + entry, "w") as write:
json.dump(data, write)