Skip to content

Commit

Permalink
feature:logprob evaluation
Browse files Browse the repository at this point in the history
  • Loading branch information
pacman000 committed Nov 16, 2024
1 parent 3a1b3c9 commit 6bac550
Show file tree
Hide file tree
Showing 6 changed files with 144 additions and 49 deletions.
19 changes: 12 additions & 7 deletions src/abstractions/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -998,20 +998,20 @@ def __inference_serial(
else list(result_data.all_passages())
)

def evaluate(self, method: Literal["fast", "dummy"] = "fast") -> np.ndarray:
def evaluate(self, logprobs = False, method: Literal["fast", "dummy"] = "fast") -> np.ndarray:
"""
Returns a high-dimensional vector representing morality preference of the model. Choose "dummy" for fast debugging runs.
"""
if method == "dummy" or os.environ.get("DUMMY_EVAL"):
return np.random.rand(10)
elif method == "fast":
return self.__evaluate_fast()
return self.__evaluate_fast(logprobs)
else:
raise NameError(
f'Method {method} not recognized. Options are "fast" and "dummy".'
)

def __evaluate_fast(self) -> np.ndarray:
def __evaluate_fast(self, logprobs = False) -> np.ndarray:
if self.template_type != "alpaca":
raise NotImplementedError(
"Fast evaluation is only supported for models using alpaca template."
Expand All @@ -1021,26 +1021,31 @@ def __evaluate_fast(self) -> np.ndarray:
os.mkdir("output")
if not os.path.exists(os.path.join("output", "evaluation_results")):
os.mkdir(os.path.join("output", "evaluation_results"))

# output csv goes here
experiment_directory = os.path.join(
"output", "evaluation_results", self.model_name + "_single"
)
if not os.path.exists(experiment_directory):
os.mkdir(experiment_directory)

evaluation_input = eval_utils.regenerate_inputs()

if logprobs:
evaluation_input = eval_utils.regenerate_inputs()
p = "responses"
else:
evaluation_input = eval_utils.regenerate_inputs(logprobs = True)
p = "logprobs"

print("evaluation query begins")
evaluation_output = self.inference(
evaluation_input,
"evaluation_output_mc_" + self.model_name,
backend="sglang",
purpose=p
)
print("answers at", evaluation_output.data_path)
with open(evaluation_output.data_path, "r") as f:
evaluation_output_data = json.load(f)
raw_stats = eval_utils.collect(evaluation_output_data)
raw_stats = eval_utils.collect(evaluation_output_data, logprobs = logprobs)
with open(
os.path.join(experiment_directory, self.model_name + "_raw.json"), "w"
) as f:
Expand Down
4 changes: 2 additions & 2 deletions src/download_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@ def download_all_models(download_8B=True, download_70B=False):
if download_8B:
for i in range(13, 22):
download_model(
f"PKU-Alignment/ProgressGym-HistLlama3-8B-C0{i}-instruct",
f"./dataset/dataset_model_sequence/8B-C0{i}-instruct",
f"PKU-Alignment/ProgressGym-HistLlama3-8B-C0{i}-pretrain",
f"./dataset/dataset_model_sequence/8B-C0{i}-pretrain",
)
if download_70B:
for i in range(13, 22):
Expand Down
Empty file added src/evaluation/__init__.py
Empty file.
44 changes: 25 additions & 19 deletions src/evaluation/quantify.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,10 +195,13 @@ def calculate_model(test_dir, model_name):
if row[7 + i] == row[17 + i]:
continue
else:
if row[17 + i] == "Yes" or row[7 + i] == "No":
if row[17 + i] == "Yes" and row[7 + i] == "No":
template[i] = 1
elif row[17 + i] == "No" or row[7 + i] == "Yes":
elif row[17 + i] == "No" and row[7 + i] == "Yes":
template[i] = -1
else:
continue
#print("non-conflicting morality", row[0])
mrl_vec[0][name] = list(template * (mal))

for key in raw_dict.keys():
Expand Down Expand Up @@ -318,7 +321,8 @@ def normalize_matrix(matrix, ranges):
"""
matrix = np.array(matrix)
m, n = matrix.shape

if ranges == None:
ranges = [(0, n-1)]
for row_idx in range(m):
for (ai, bi) in ranges:
# calculate the sum of columns ai-bi for each row
Expand Down Expand Up @@ -354,21 +358,21 @@ def plot_parallel_coordinates(data, title, tuples):
plt.show()
plt.savefig("output/evaluation_results/figs/" + title + "_parr.png")

def plot_heatmap(vectors, title, tuples):
def plot_heatmap(data, title, tuples = None, norm = "column"):
"""
Heatmap for list of 19-dimensional vector.
Arguments:
vectors (list of list of floats): List of 19-dimensional vector
data (np 2d-array): List of 19-dimensional vector
"""
data = np.array(vectors)

# data = normalize_matrix(data, tuples)
data -= np.mean(data, axis=0, keepdims=True)
data /= np.std(data, axis=0, keepdims=True)
# data /= np.sum(data, axis=0, keepdims=True)
if data.shape[1] != 19:
raise ValueError("All vectors should be 19-dimensional")
if norm == "group":
data = normalize_matrix(data, tuples)
#data -= np.mean(data, axis=0, keepdims=True)
#data /= np.std(data, axis=0, keepdims=True)
if norm == "column":
data -= np.mean(data, axis=0, keepdims=True)
data /= np.std(data, axis=0, keepdims=True)
#data /= np.sum(data, axis=0, keepdims=True)

# Heatmap with appropriate labels
plt.figure(figsize=(12, 8))
Expand All @@ -377,8 +381,8 @@ def plot_heatmap(vectors, title, tuples):
annot=True,
cmap="viridis",
cbar=True,
xticklabels=[f"Dim{i+1}" for i in range(19)],
yticklabels=[f"Vector {i+1}" for i in range(len(vectors))],
xticklabels=[f"Dim{i+1}" for i in range(data.shape[1])],
yticklabels=[f"Vector {i+1}" for i in range(data.shape[0])],
)
plt.title("Heatmap of 19-Dimensional Vectors")
plt.xlabel("Dimensions")
Expand Down Expand Up @@ -434,13 +438,13 @@ def analyze_vectors_quadratic(vectors):
p_values = []
positive_coefficients = []
negative_coefficients = []

np.savetxt("output/evaluation_results/figs/quad.txt", vectors, fmt='%f')
for dim in range(num_dimensions):
x = np.arange(len(vectors))
y = vectors[:, dim]

# Perform quadratic fitting
p = Polynomial.fit(x, y, 2)
p = Polynomial.fit(x, y, 3)
coeffs = p.convert().coef
coefficients.append(coeffs)

Expand All @@ -460,8 +464,10 @@ def analyze_vectors_quadratic(vectors):

# Print coefficients and p-values
print("Quadratic coefficients and p-values for each dimension:")
for i, (coeffs, p_value) in enumerate(zip(coefficients, p_values)):
print(f"Dimension {i + 1}: coefficients = {coeffs}, p-value = {p_value}")
with open("output/evaluation_results/figs/quad.txt", 'a') as f:
for i, (coeffs, p_value) in enumerate(zip(coefficients, p_values)):
print(f"Dimension {i + 1}: coefficients = {coeffs}, p-value = {p_value}")
f.write(f"Dimension {i + 1}: coefficients = {coeffs}, p-value = {p_value}\n\n")

# Plot positive coefficients
plt.figure(figsize=(14, 7))
Expand Down
10 changes: 7 additions & 3 deletions src/evaluation/test_eval_01.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import os, json
from multiprocessing import freeze_support
from . import quantify as qt

import numpy as np
"""
generate_alpaca('mc', os.path.join('src', 'evaluation', 'raw_dataset', 'moralchoice'))
generate_alpaca('views', os.path.join('src', 'evaluation', 'raw_dataset', 'views'))
Expand All @@ -27,12 +27,16 @@
for m in set_model:
boi = Model(m)
v = boi.evaluate(method="fast")
# v = qt.calculate_model('output/evaluation_results/' + m + '_single/', m)
#v = qt.calculate_model('output/evaluation_results/' + m + '_single/', m)
vec.append(v)
test_name = "8b_13to21"
with open("output/evaluation_results/" + test_name + ".json", "w") as f:
lst = [list(boi) for boi in vec]
json.dump(lst, f)
vec = np.array(vec)
# qt.analyze_vectors_quadratic(vec)
# vec = json.load(open("output/evaluation_results/" + test_name + ".json", "r"))
# qt.plot_parallel_coordinates(vec)
qt.plot_heatmap(vec, test_name, [(0,4), (5,9), (10,14), (15,18)])
qt.plot_heatmap(vec[:, 10:15], test_name + '_foundation', norm = "group")
qt.plot_heatmap(vec[:, 15:19], test_name + '_view', norm = "group")
qt.plot_heatmap(vec[:, :10], test_name + '_morality')
Loading

0 comments on commit 6bac550

Please sign in to comment.