-
Notifications
You must be signed in to change notification settings - Fork 4
/
test_model.py
64 lines (48 loc) · 1.67 KB
/
test_model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
import pytest
import numpy as np
import torch
np_model = "./stories15M.model.npz"
pt_model = "./stories15M.pt"
dct = np.load(np_model)
ckpt = torch.load(pt_model)
state_dict = ckpt["model"]
args = ckpt["model_args"]
n_layers = args["n_layers"]
@pytest.mark.parametrize("lid", range(n_layers))
def test_ffn(lid):
assert np.allclose(
state_dict[f"layers.{lid}.feed_forward.w1.weight"],
dct[f"model.layers.{lid}.mlp.gate_proj.weight"]
)
assert np.allclose(
state_dict[f"layers.{lid}.feed_forward.w2.weight"],
dct[f"model.layers.{lid}.mlp.down_proj.weight"]
)
assert np.allclose(
state_dict[f"layers.{lid}.feed_forward.w3.weight"],
dct[f"model.layers.{lid}.mlp.up_proj.weight"]
)
@pytest.mark.parametrize("lid", range(n_layers))
def test_rms(lid):
assert np.allclose(
state_dict[f"layers.{lid}.attention_norm.weight"],
dct[f"model.layers.{lid}.input_layernorm.weight"]
)
assert np.allclose(
state_dict[f"layers.{lid}.ffn_norm.weight"],
dct[f"model.layers.{lid}.post_attention_layernorm.weight"]
)
@pytest.mark.parametrize("name", ["q", "k", "v", "o"])
@pytest.mark.parametrize("lid", range(n_layers))
def test_attn(name, lid):
assert np.allclose(
state_dict[f"layers.{lid}.attention.w{name}.weight"],
dct[f"model.layers.{lid}.self_attn.{name}_proj.weight"]
)
def test_un_block():
assert np.allclose(
state_dict["tok_embeddings.weight"],
dct["model.embed_tokens.weight"]
)
assert np.allclose(state_dict["norm.weight"], dct["model.norm.weight"])
assert np.allclose(state_dict["output.weight"], dct["lm_head.weight"])