Skip to content

Enhance Logging in ExperimentManager #12

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 12 additions & 20 deletions experiment_manager.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@

import logging
import random
from typing import List, Dict
Expand Down Expand Up @@ -35,15 +36,13 @@ def __init__(self, test_df: pd.DataFrame, train_df: pd.DataFrame, model: PCWMode
self._initialize_labels_and_logit_processor(labels)

def _initialize_labels_and_logit_processor(self, labels: List[str]) -> None:
_logger.info(f"Provided labels: {labels}")
_logger.info(f"Initializing labels and logit processor with provided labels: {labels}")
labels_tokens = encode_labels(self.tokenizer, labels)
labels_tokens_array = self.minimize_labels_tokens(labels_tokens)
_logger.info(f"Provided labels average n_tokens: {np.round(np.mean([len(lt) for lt in labels_tokens]), 3)}")
# we fix the labels accordingly in the test set:
shorten_label_tokens = [t[t != self.tokenizer.eos_token_id].tolist() for t in labels_tokens_array]
_logger.info(
f"shortened labels average n_tokens: {np.round(np.mean([len(lt) for lt in shorten_label_tokens]), 3)}")
# Moving the test set label tokens to their shorter version:
f"Shortened labels average n_tokens: {np.round(np.mean([len(lt) for lt in shorten_label_tokens]), 3)}")
map_labels = {old_label: self.tokenizer.decode(t).lstrip() for old_label, t in
zip(labels, shorten_label_tokens)}
self.test_df[LABEL_TOKENS] = self.test_df[LABEL_TOKENS].map(map_labels)
Expand All @@ -57,9 +56,6 @@ def _initialize_labels_and_logit_processor(self, labels: List[str]) -> None:
self.possible_labels = set(map_labels.values())

def minimize_labels_tokens(self, labels_tokens: List[List[int]]) -> npt.NDArray[int]:
"""
Minimize the number of tokens per label to be the shortest possible unique one.
"""
pad = len(max(labels_tokens, key=len))
labels_tokens_array = np.array([i + [self.tokenizer.eos_token_id] * (pad - len(i)) for i in labels_tokens])
for i, tokens in enumerate(labels_tokens):
Expand All @@ -71,19 +67,15 @@ def minimize_labels_tokens(self, labels_tokens: List[List[int]]) -> npt.NDArray[
break
return labels_tokens_array

def pad_contained_labels_with_stop_seq(self, labels_tokens: List, labels_tokens_array: npt.NDArray[int]) \
def pad_contained_labels_with_stop_seq(self, labels_tokens: List, labels_tokens_array: npt.NDArray[int])
-> npt.NDArray[int]:
"""
In case we have two labels, where one label contains the other label (for example: "A" and "A B") we need
to allow the restrictive decoding to produce the output "A". We support it by adding "\n" to the shorter label.
"""
stop_seq_token_id = encode_stop_seq(self.tokenizer, STOP_SEQUENCE)
for i, tokens in enumerate(labels_tokens):
labels_with_shared_beginnings = np.sum(
np.all(labels_tokens_array[:, :len(tokens)] == np.array(tokens), axis=1))
if labels_with_shared_beginnings > 1:
_logger.info(f"label{self.tokenizer.decode(tokens)} is the beginning of one of the other labels,"
f"adding stop sequence to its end")
_logger.info(f"Label '{self.tokenizer.decode(tokens)}' is the beginning of one of the other labels, "
f"adding stop sequence token to its end.")
labels_tokens_array[i, len(tokens)] = stop_seq_token_id
return labels_tokens_array

Expand All @@ -105,7 +97,7 @@ def get_predicted_labels(self, windows_few_shots: List[str]) -> List[str]:
return predicted_labels

def predict_label(self, task_text: str, cache: Dict) -> str:
assert task_text == task_text.rstrip(), "prompt ends with a space!"
assert task_text == task_text.rstrip(), "Prompt ends with a space!"
res = self.model.pcw_generate(task_text=task_text,
contexts_cache=cache,
restrictive_logit_preprocessor=self.logit_processor,
Expand All @@ -117,14 +109,14 @@ def predict_label(self, task_text: str, cache: Dict) -> str:
def calc_acc(self, predicted_labels: List) -> float:
predicted_labels = pd.Series(predicted_labels, index=self.test_df.index)
acc = np.mean(predicted_labels == self.test_df[LABEL_TOKENS])
_logger.info(f"accuracy = {np.round(acc, 3)}")
_logger.info(f"Calculated accuracy = {np.round(acc, 3)}")
return acc

def run_experiment_across_shots(self, n_shots_to_test: List[int], n_runs: int,
too_long_patience: float = 0.2):
accuracies = np.zeros((len(n_shots_to_test), n_runs))
for i, n_shots in enumerate(tqdm(n_shots_to_test)):
_logger.info(f"starting with n = {n_shots}")
_logger.info(f"Starting experiment with n = {n_shots}")
self._set_random_seed(self.base_random_seed + n_shots)
j = 0
n_errors = 0
Expand All @@ -139,15 +131,15 @@ def run_experiment_across_shots(self, n_shots_to_test: List[int], n_runs: int,
+ self.max_n_tokens) > self.model.context_window_size:
_logger.warning("Drawn training shots were too long, trying again")
n_errors += 1
assert n_errors <= too_long_patience * n_runs, "too many long inputs were drawn!"
assert n_errors <= too_long_patience * n_runs, "Too many long inputs were drawn!"
continue
accuracies[i, j] = self.get_few_shots_acc(windows_few_shots)
j += 1
return accuracies

def sample_n_shots(self, n_shots: int) -> npt.NDArray[int]:
few_shots_df = self.train_df.sample(n_shots)
assert few_shots_df.index.is_unique, "few shots samples were not unique!"
assert few_shots_df.index.is_unique, "Few shots samples were not unique!"
window_size = self.n_shots_per_window or n_shots
n_windows = int(len(few_shots_df) / window_size)
if not self.n_shots_per_window or n_windows == 1:
Expand All @@ -163,7 +155,6 @@ def balance_windows_sizes(self, n_windows: int, few_shots_df: pd.DataFrame) -> n
order = np.argsort((np.sum(sizes[:i, :], axis=0)))
sizes[i, :] = sizes[i, order]
indexes[i, :] = indexes[i, order]
# shuffle the order in each window:
for i in range(n_windows):
np.random.shuffle(indexes[:, i])
indexes = indexes.T.flatten()
Expand All @@ -175,3 +166,4 @@ def build_windows_few_shots_text(few_shots_prompts: List, window_size: int) -> L
window_size = len(few_shots_prompts)
return [TEXT_BETWEEN_SHOTS.join(few_shots_prompts[i: i + window_size]) for i in
range(0, len(few_shots_prompts), window_size)]
}