File size: 5,166 Bytes
de071e9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
import torch
from transformers import AutoModelForCausalLM

from tracing.utils.llama.model import rotate_model_t5

torch.set_default_dtype(torch.bfloat16)

model_name = "meta-llama/Llama-2-7b-hf"
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16).to("cuda")

model_rot_name = "yahma/llama-7b-hf"
model_rotated = AutoModelForCausalLM.from_pretrained(model_rot_name, torch_dtype=torch.bfloat16).to(
    "cuda"
)
rotate_model_t5(model_rotated)
# rotate_model(model_rotated)

# Fixing the layer norms to 1's (HUReF works)
# """
# fix_layer_norm(model)
# fix_layer_norm(model_rotated)
# """

# base_tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False)
# dataset = prepare_hf_dataset("dlwh/wikitext_103_detokenized",512,base_tokenizer)
# dataloader = prepare_hf_dataloader(dataset,1)

# evaluate_model = evaluate(model, dataloader)
# evaluate_rotated = evaluate(model_rotated, dataloader)

# print("outputs are aligned: ")
# print([abs(evaluate_model[i] - evaluate_rotated[i]) <= 0.01 for i in range(len(evaluate_model))])

weights = model.state_dict()
weights_rotated = model_rotated.state_dict()

# model.to('cuda')
# print("invariant 1")
# print(weights['model.embed_tokens.weight']@weights['model.layers.0.self_attn.q_proj.weight'].T@weights['model.layers.0.self_attn.k_proj.weight']@weights['model.embed_tokens.weight'].T)
# print("invariant 2")
# print(weights['model.embed_tokens.weight']@weights['model.layers.0.self_attn.v_proj.weight'].T@weights['model.layers.0.self_attn.o_proj.weight'].T@weights['model.embed_tokens.weight'].T)
# print("invariant 3")
# print(weights['model.embed_tokens.weight']@weights[f'model.layers.0.mlp.up_proj.weight'].T@weights[f'model.layers.0.mlp.down_proj.weight'].T@weights['model.embed_tokens.weight'].T)
# print()
# model.to('cpu')

# model_rotated.to('cuda')
# print("rotated")
# print("invariant 1")
# print(weights_rotated['model.embed_tokens.weight']@weights_rotated['model.layers.0.self_attn.q_proj.weight'].T@weights_rotated['model.layers.0.self_attn.k_proj.weight']@weights_rotated['model.embed_tokens.weight'].T)


# print("invariant 2")
# print(weights_rotated['model.embed_tokens.weight']@weights_rotated['model.layers.0.self_attn.v_proj.weight'].T@weights_rotated['model.layers.0.self_attn.o_proj.weight'].T@weights_rotated['model.embed_tokens.weight'].T)
# print("invariant 3")
# print(weights_rotated['model.embed_tokens.weight']@weights_rotated[f'model.layers.0.mlp.up_proj.weight'].T@weights_rotated[f'model.layers.0.mlp.down_proj.weight'].T@weights_rotated['model.embed_tokens.weight'].T)
# print()
# model_rotated.to('cpu')

# Cosine similarity

print("cosine similarity")

model.to("cuda")
print("invariant 1")
invariant = (
    weights["model.embed_tokens.weight"]
    @ weights["model.layers.0.self_attn.q_proj.weight"].T
    @ weights["model.layers.0.self_attn.k_proj.weight"]
    @ (weights["model.embed_tokens.weight"]).T
)
model.to("cpu")
model_rotated.to("cuda")
invariant_rotated = (
    weights_rotated["model.embed_tokens.weight"]
    @ weights_rotated["model.layers.0.self_attn.q_proj.weight"].T
    @ weights_rotated["model.layers.0.self_attn.k_proj.weight"]
    @ (weights_rotated["model.embed_tokens.weight"]).T
)
model_rotated.to("cpu")
invariant.to("cuda")
invariant_rotated.to("cuda")
invariant = torch.flatten(invariant)
invariant_rotated = torch.flatten(invariant_rotated)
print(
    torch.dot(invariant, invariant_rotated)
    / (torch.norm(invariant) * torch.norm(invariant_rotated))
)

model.to("cuda")
print("invariant 2")
invariant = (
    weights["model.embed_tokens.weight"]
    @ weights["model.layers.0.self_attn.v_proj.weight"].T
    @ weights["model.layers.0.self_attn.o_proj.weight"].T
    @ weights["model.embed_tokens.weight"].T
)
model.to("cpu")
model_rotated.to("cuda")
invariant_rotated = (
    weights_rotated["model.embed_tokens.weight"]
    @ weights_rotated["model.layers.0.self_attn.v_proj.weight"].T
    @ weights_rotated["model.layers.0.self_attn.o_proj.weight"].T
    @ weights_rotated["model.embed_tokens.weight"].T
)
model_rotated.to("cpu")
invariant.to("cuda")
invariant_rotated.to("cuda")
invariant = torch.flatten(invariant)
invariant_rotated = torch.flatten(invariant_rotated)
print(
    torch.dot(invariant, invariant_rotated)
    / (torch.norm(invariant) * torch.norm(invariant_rotated))
)

model.to("cuda")
print("invariant 3")
invariant = (
    weights["model.embed_tokens.weight"]
    @ weights["model.layers.0.mlp.up_proj.weight"].T
    @ weights["model.layers.0.mlp.down_proj.weight"].T
    @ weights["model.embed_tokens.weight"].T
)
model.to("cpu")
model_rotated.to("cuda")
invariant_rotated = (
    weights_rotated["model.embed_tokens.weight"]
    @ weights_rotated["model.layers.0.mlp.up_proj.weight"].T
    @ weights_rotated["model.layers.0.mlp.down_proj.weight"].T
    @ weights_rotated["model.embed_tokens.weight"].T
)
model_rotated.to("cpu")
invariant.to("cuda")
invariant_rotated.to("cuda")
invariant = torch.flatten(invariant)
invariant_rotated = torch.flatten(invariant_rotated)
print(
    torch.dot(invariant, invariant_rotated)
    / (torch.norm(invariant) * torch.norm(invariant_rotated))
)