Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Two more patches #100

Open
wants to merge 8 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 17 additions & 5 deletions src/ecco/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,10 @@


__version__ = '0.1.2'
transformer_deprecated_version = '4.22.1'
from ecco.lm import LM
from transformers import AutoTokenizer, AutoModelForCausalLM, AutoModel, AutoModelForSeq2SeqLM
import transformers
from typing import Any, Dict, Optional, List
from ecco.util import load_config, pack_tokenizer_config

Expand All @@ -26,7 +28,10 @@ def from_pretrained(hf_model_id: str,
hidden_states: Optional[bool] = True,
activations_layer_nums: Optional[List[int]] = None,
verbose: Optional[bool] = True,
gpu: Optional[bool] = True
gpu: Optional[bool] = True,
multi_gpu = False,
cache_dir = None,
torch_dtype=None
):
"""
Constructs a [LM][ecco.lm.LM] object based on a string identifier from HuggingFace Transformers. This is
Expand Down Expand Up @@ -71,16 +76,23 @@ def from_pretrained(hf_model_id: str,
else:
config = load_config(hf_model_id)

tokenizer = AutoTokenizer.from_pretrained(hf_model_id)
# if transformers.__version__ != transformer_deprecated_version:
tokenizer = AutoTokenizer.from_pretrained(hf_model_id, torch_dtype=torch_dtype)
# else:
# tokenizer = AutoTokenizer.from_pretrained(hf_model_id)

if config['type'] == 'enc-dec':
model_cls = AutoModelForSeq2SeqLM
elif config['type'] == 'causal':
model_cls = AutoModelForCausalLM
else:
model_cls = AutoModel

model = model_cls.from_pretrained(hf_model_id, output_hidden_states=hidden_states, output_attentions=attention)

print("transformer version:", transformers.__version__)
# if transformers.__version__ != transformer_deprecated_version:
model = model_cls.from_pretrained(hf_model_id, output_hidden_states=hidden_states, output_attentions=attention, device_map="auto" if multi_gpu else None, cache_dir=cache_dir, torch_dtype=torch_dtype)
# else:
# model = model_cls.from_pretrained(hf_model_id, output_hidden_states=hidden_states, output_attentions=attention, cache_dir=cache_dir)

lm_kwargs = {
'model_name': hf_model_id,
Expand All @@ -90,6 +102,6 @@ def from_pretrained(hf_model_id: str,
'verbose': verbose,
'gpu': gpu}

lm = LM(model, tokenizer, **lm_kwargs)
lm = LM(model, tokenizer, torch_dtype=torch_dtype, **lm_kwargs)

return lm
83 changes: 76 additions & 7 deletions src/ecco/attribution.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from functools import partial
from time import time
import torch
from typing import Any, Dict
from captum.attr import (
Expand All @@ -10,11 +11,19 @@
GuidedBackprop,
GuidedGradCam,
Deconvolution,
LRP
LRP,
Lime,
LimeBase,
KernelShap,
GradientShap,
Occlusion,
)
import numpy as np
import numpy.linalg as la
from torch.nn import functional as F
import transformers

IG_N_STEPS=50

ATTR_NAME_ALIASES = {
'ig': 'integrated_gradients',
Expand All @@ -24,7 +33,12 @@
'gb': 'guided_backprop',
'gg': 'guided_gradcam',
'deconv': 'deconvolution',
'lrp': 'layer_relevance_propagation'
'lrp': 'layer_relevance_propagation',
'lime': 'lime',
'limebase': 'limebase',
'shap': 'shap',
'gshap': 'gshap',
'occlusion': 'occlusion'
}

ATTR_NAME_TO_CLASS = { # TODO: Add more Captum Primary attributions with needed computed arguments
Expand All @@ -36,12 +50,18 @@
'guided_backprop': GuidedBackprop,
'guided_gradcam': GuidedGradCam,
'deconvolution': Deconvolution,
'layer_relevance_propagation': LRP
'layer_relevance_propagation': LRP,
'lime': Lime,
'limebase': LimeBase,
'shap': KernelShap,
'gshap': GradientShap,
'occlusion': Occlusion
}


def compute_primary_attributions_scores(attr_method : str, model: transformers.PreTrainedModel,
forward_kwargs: Dict[str, Any], prediction_id: torch.Tensor,
supertoken_range: [],
aggregation: str = "L2") -> torch.Tensor:
"""
Computes the primary attributions with respect to the specified `prediction_id`.
Expand Down Expand Up @@ -83,9 +103,11 @@ def normalize_attributes(attributes: torch.Tensor) -> torch.Tensor:
input_ = forward_kwargs.get('inputs_embeds')
decoder_ = forward_kwargs.get('decoder_inputs_embeds')

# for dec-only models
if decoder_ is None:
forward_func = partial(model_forward, decoder_=decoder_, model=model, extra_forward_args=extra_forward_args)
inputs = input_
# for enc-dec models
else:
forward_func = partial(model_forward, model=model, extra_forward_args=extra_forward_args)
inputs = tuple([input_, decoder_])
Expand All @@ -97,12 +119,59 @@ def normalize_attributes(attributes: torch.Tensor) -> torch.Tensor:
f"Please choose one of the methods: {list(ATTR_NAME_TO_CLASS.keys())}"
)

ig = attr_method_class(forward_func=forward_func)
attributions = ig.attribute(inputs, target=prediction_id)
# ig = attr_method_class(forward_func=forward_func, multiply_by_inputs=True) # for [saliency, ig]
ig = attr_method_class(forward_func=forward_func) # for [lime, shap]

# print("inputs shape is", inputs.shape)
# attributions = ig.attribute(inputs, target=prediction_id, n_steps=IG_N_STEPS) # for [ig]
# attributions = ig.attribute(inputs, target=prediction_id) # for [saliency, lime, shap]

# feature_mask should be of size torch.Size([1, 216, 768]), with all the same number in each row
# like this: [[[0, 0, 0, ..., 0, 0, 0], [1, 1, 1, ..., 1, 1, 1], ..., [215, 215, 215, ..., 215, 215, 215]]]
feature_mask = torch.zeros(inputs.shape, dtype=torch.long)
for i in range(inputs.shape[1]):
feature_mask[0][i] = i
feature_mask = feature_mask.to(inputs.device)

# feature_mask should be of size torch.Size([1, 216, 768]), with all the same numbe for each citation range
# feature_mask = torch.zeros(inputs.shape, dtype=torch.long)
# num_citations = len(supertoken_range)
# j = 0
# for i in range(inputs.shape[1]):
# # a trick here, don't be puzzled lol, j-1 initially gives -1 which is consistent
# # draw on a piece of paper how this algorithm works
# if j < num_citations:
# if i > supertoken_range[j-1] and i < supertoken_range[j]:
# feature_mask[0][i] = j
# elif i == supertoken_range[j] and j < num_citations - 1:
# j += 1
# feature_mask[0][i] = j
# else:
# feature_mask[0][i] = 0
# else:
# feature_mask[0][i] = 0
# feature_mask = feature_mask.to(inputs.device)
# print("feature_mask shape is", feature_mask.shape)
# print("feature_mask is", feature_mask)
feature_mask_idxs = [0, 27, 203, 424, 572, 743, 919]
# for feature_mask_idx in feature_mask_idxs:
# print("feature_mask[0][{}] is".format(feature_mask_idx), feature_mask[0][feature_mask_idx])

attributions = ig.attribute(
inputs, # add batch dimension for Captum
target=prediction_id,
feature_mask=feature_mask,
n_samples=5,
show_progress=True
) # for [limebase]

if decoder_ is not None:
# Does it make sense to concatenate encoder and decoder attributions before normalization?
# We assume that the encoder/decoder embeddings are the same
return normalize_attributes(torch.cat(attributions, dim=1))
normalized_attributes = normalize_attributes(torch.cat(attributions, dim=1))
else:
return normalize_attributes(attributions)
normalized_attributes = normalize_attributes(attributions)

# print("normalized_attributes is", normalized_attributes)

return normalized_attributes
Loading