Ffftdtd5dtft commited on
Commit
8ee52fa
verified
1 Parent(s): cf7876c

Create main.py

Browse files
Files changed (1) hide show
  1. main.py +383 -0
main.py ADDED
@@ -0,0 +1,383 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from transformers import (
3
+ AutoTokenizer,
4
+ AutoModelForCausalLM,
5
+ AutoModelForSeq2SeqLM,
6
+ AutoProcessor,
7
+ AutoModelForSpeechSeq2Seq,
8
+ AutoModelForTextToWaveform
9
+ )
10
+ from diffusers import DiffusionPipeline
11
+ import time
12
+ import os
13
+ from dotenv import load_dotenv
14
+ from huggingface_hub import HfApi, HfFolder, Repository
15
+
16
+ load_dotenv()
17
+
18
+ def prune_model(model, amount=0.5):
19
+ from torch.nn.utils import prune
20
+ for name, module in model.named_modules():
21
+ if isinstance(module, (torch.nn.Linear, torch.nn.Conv2d)):
22
+ prune.l1_unstructured(module, name='weight', amount=amount)
23
+ prune.remove(module, 'weight')
24
+ return model
25
+
26
+ def quantize_to_q1_with_min(tensor, min_value=-1):
27
+ tensor = torch.sign(tensor)
28
+ tensor[tensor < min_value] = min_value
29
+ return tensor
30
+
31
+ def quantize_model_to_q1_with_min(model, min_value=-1):
32
+ for name, param in model.named_parameters():
33
+ if param.dtype in [torch.float32, torch.float16]:
34
+ with torch.no_grad():
35
+ param.copy_(quantize_to_q1_with_min(param.data, min_value))
36
+
37
+ def disable_unnecessary_components(model):
38
+ for name, module in model.named_modules():
39
+ if isinstance(module, torch.nn.Dropout):
40
+ module.p = 0.0
41
+ elif isinstance(module, torch.nn.BatchNorm1d):
42
+ module.eval()
43
+
44
+ def ultra_max_compress(model):
45
+ model = prune_model(model, amount=0.8)
46
+ quantize_model_to_q1_with_min(model, min_value=-0.05)
47
+ disable_unnecessary_components(model)
48
+ with torch.no_grad():
49
+ for name, param in model.named_parameters():
50
+ if param.requires_grad:
51
+ param.requires_grad = False
52
+ param.data = torch.nn.functional.hardtanh(param.data, min_val=-1.0, max_val=1.0)
53
+ param.data = param.data.half()
54
+ try:
55
+ model = torch.jit.script(model)
56
+ except Exception:
57
+ pass
58
+ prune_model(model, amount=0.9)
59
+ model.eval()
60
+ for buffer_name, buffer in model.named_buffers():
61
+ if buffer.numel() == 0:
62
+ model._buffers.pop(buffer_name)
63
+ return model
64
+
65
+ def optimize_model_resources(model):
66
+ torch.set_grad_enabled(False)
67
+ model.eval()
68
+ for name, param in model.named_parameters():
69
+ param.requires_grad = False
70
+ if param.dtype == torch.float32:
71
+ param.data = param.data.half()
72
+ if hasattr(model, 'config'):
73
+ if hasattr(model.config, 'max_position_embeddings'):
74
+ model.config.max_position_embeddings = min(model.config.max_position_embeddings, 512)
75
+ if hasattr(model.config, 'hidden_size'):
76
+ model.config.hidden_size = min(model.config.hidden_size, 768)
77
+ model = torch.jit.optimize_for_inference(model)
78
+ return model
79
+
80
+ def generate_random_responses(model, tokenizer, prompt, num_responses=5, max_length=50):
81
+ responses = []
82
+ for _ in range(num_responses):
83
+ input_ids = tokenizer.encode(prompt, return_tensors="pt")
84
+ output = model.generate(input_ids, max_length=max_length, do_sample=True, top_k=50)
85
+ response = tokenizer.decode(output[0], skip_special_tokens=True)
86
+ responses.append(response)
87
+ return responses
88
+
89
+ def patched_distilbert_forward(self, input_ids=None, attention_mask=None, head_mask=None, inputs_embeds=None, output_attentions=None, output_hidden_states=None, return_dict=None):
90
+ return_dict = return_dict if return_dict is not None else self.config.use_return_dict
91
+ outputs = DistilBertModel.forward(self, input_ids, attention_mask=attention_mask, head_mask=head_mask, inputs_embeds=inputs_embeds, output_attentions=output_attentions, output_hidden_states=output_hidden_states, return_dict=return_dict)
92
+ if not return_dict:
93
+ output_tuple = []
94
+ for v in [outputs.last_hidden_state, outputs.hidden_states, outputs.attentions]:
95
+ if v is not None:
96
+ output_tuple.append(v)
97
+ return tuple(output_tuple)
98
+ return outputs
99
+
100
+ def patched_forward(self, input_ids=None, attention_mask=None, head_mask=None, inputs_embeds=None, labels=None, output_attentions=None, output_hidden_states=None, return_dict=None):
101
+ return_dict = return_dict if return_dict is not None else self.config.use_return_dict
102
+ outputs = self.distilbert(input_ids, attention_mask=attention_mask, head_mask=head_mask, inputs_embeds=inputs_embeds, output_attentions=output_attentions, output_hidden_states=output_hidden_states, return_dict=return_dict)
103
+ hidden_state = outputs[0]
104
+ pooled_output = self.pre_classifier(hidden_state[:, 0])
105
+ pooled_output = self.dropout(pooled_output)
106
+ logits = self.classifier(pooled_output)
107
+ if not return_dict:
108
+ output = (logits,) + outputs[1:]
109
+ return output
110
+ return logits
111
+
112
+ def patched_roberta_forward(self, input_ids=None, attention_mask=None, head_mask=None, inputs_embeds=None, labels=None, output_attentions=None, output_hidden_states=None, return_dict=None):
113
+ return_dict = return_dict if return_dict is not None else self.config.use_return_dict
114
+ outputs = self.roberta(input_ids, attention_mask=attention_mask, head_mask=head_mask, inputs_embeds=inputs_embeds, output_attentions=output_attentions, output_hidden_states=output_hidden_states, return_dict=return_dict)
115
+ hidden_state = outputs[0]
116
+ pooled_output = hidden_state[:, 0]
117
+ pooled_output = self.dropout(pooled_output)
118
+ logits = self.classifier(pooled_output)
119
+ if not return_dict:
120
+ output = (logits,) + outputs[1:]
121
+ return output
122
+ return logits
123
+
124
+ def optimize_for_low_resources(model):
125
+ model = ultra_max_compress(model)
126
+ model = optimize_model_resources(model)
127
+ model.config.max_position_embeddings = 256
128
+ model.config.hidden_size = 384
129
+ return model
130
+
131
+ def optimize_for_very_low_resources(model):
132
+ model = ultra_max_compress(model)
133
+ model = optimize_model_resources(model)
134
+ model.config.max_position_embeddings = 128
135
+ model.config.hidden_size = 256
136
+ return model
137
+
138
+ def remove_unused_model_components(model):
139
+ for name, param in model.named_parameters():
140
+ if param.numel() == 0:
141
+ model._parameters.pop(name)
142
+ return model
143
+
144
+ def auto_train_model(model, train_data, epochs=3):
145
+ optimizer = torch.optim.Adam(model.parameters(), lr=1e-5)
146
+ model.train()
147
+ for epoch in range(epochs):
148
+ for batch in train_data:
149
+ inputs, labels = batch
150
+ optimizer.zero_grad()
151
+ outputs = model(**inputs, labels=labels)
152
+ loss = outputs.loss
153
+ loss.backward()
154
+ optimizer.step()
155
+ return model
156
+
157
+ def apply_extreme_filters(model):
158
+ model = ultra_max_compress(model)
159
+ model = optimize_model_resources(model)
160
+ model.config.max_position_embeddings = 128
161
+ model.config.hidden_size = 256
162
+ model = torch.jit.optimize_for_inference(model)
163
+ model = prune_model(model, amount=0.95)
164
+ quantize_model_to_q1_with_min(model, min_value=-0.1)
165
+ return model
166
+
167
+ def reduce_latency(model, tokenizer, prompt, num_responses=5, max_length=50):
168
+ responses = []
169
+ start_time = time.time()
170
+ for _ in range(num_responses):
171
+ input_ids = tokenizer.encode(prompt, return_tensors="pt")
172
+ output = model.generate(input_ids, max_length=max_length, do_sample=True, top_k=50)
173
+ response = tokenizer.decode(output[0], skip_special_tokens=True)
174
+ responses.append(response)
175
+ end_time = time.time()
176
+ latency = (end_time - start_time) / num_responses * 1000
177
+ return responses, latency
178
+
179
+ def create_gpt_distill_model():
180
+ gpt_model = GPT2LMHeadModel.from_pretrained("gpt2")
181
+ gpt_tokenizer = AutoTokenizer.from_pretrained("gpt2")
182
+ return gpt_model, gpt_tokenizer
183
+
184
+ def create_gemma_distill_model():
185
+ gemma_model = AutoModelForCausalLM.from_pretrained("google/gemma-2-9b")
186
+ gemma_tokenizer = AutoTokenizer.from_pretrained("google/gemma-2-9b")
187
+ return gemma_model, gemma_tokenizer
188
+
189
+ def measure_performance(model, tokenizer, sequence_length=20, num_tokens=100):
190
+ inputs = tokenizer("A" * sequence_length, return_tensors="pt")
191
+ start_time = time.time()
192
+ for _ in range(num_tokens):
193
+ model.generate(**inputs)
194
+ end_time = time.time()
195
+ latency = (end_time - start_time) / num_tokens * 1000
196
+ tokens_per_second = num_tokens / (end_time - start_time)
197
+ return latency, tokens_per_second
198
+
199
+ def apply_diffusion_pipeline(prompt):
200
+ diffusion_pipeline = DiffusionPipeline.from_pretrained("black-forest-labs/FLUX.1-schnell")
201
+ images = diffusion_pipeline(prompt).images
202
+ return images
203
+
204
+ def generate_responses_with_diffusion(prompt, use_diffusion):
205
+ if "imagina" in prompt.lower() or "imagine" in prompt.lower():
206
+ images = apply_diffusion_pipeline(prompt)
207
+ return images
208
+ return None
209
+
210
+ def generate_summary_with_bart(prompt):
211
+ tokenizer = AutoTokenizer.from_pretrained("facebook/bart-large-cnn")
212
+ model = AutoModelForSeq2SeqLM.from_pretrained("facebook/bart-large-cnn")
213
+ inputs = tokenizer.encode(prompt, return_tensors="pt")
214
+ summary_ids = model.generate(inputs, max_length=130, min_length=30, length_penalty=2.0, num_beams=4, early_stopping=True)
215
+ summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
216
+ return summary
217
+
218
+ def generate_responses_with_bart(prompt):
219
+ if "resumir" in prompt.lower() or "resumime" in prompt.lower():
220
+ summary = generate_summary_with_bart(prompt)
221
+ return summary
222
+ return None
223
+
224
+ def apply_whisper_pipeline(prompt):
225
+ processor = AutoProcessor.from_pretrained("openai/whisper-small")
226
+ model = AutoModelForSpeechSeq2Seq.from_pretrained("openai/whisper-small")
227
+ inputs = processor(prompt, return_tensors="pt")
228
+ outputs = model.generate(**inputs)
229
+ transcription = processor.batch_decode(outputs, skip_special_tokens=True)
230
+ return transcription
231
+
232
+ def generate_transcription_with_whisper(prompt):
233
+ if "transcribe" in prompt.lower() or "transcribime" in prompt.lower():
234
+ transcription = apply_whisper_pipeline(prompt)
235
+ return transcription
236
+ return None
237
+
238
+ def apply_translation_pipeline(prompt):
239
+ tokenizer = AutoTokenizer.from_pretrained("google-t5/t5-base")
240
+ model = AutoModelForSeq2SeqLM.from_pretrained("google-t5/t5-base")
241
+ inputs = tokenizer.encode(prompt, return_tensors="pt")
242
+ translated_ids = model.generate(inputs, max_length=50)
243
+ translated_text = tokenizer.decode(translated_ids[0], skip_special_tokens=True)
244
+ return translated_text
245
+
246
+ def generate_translation_with_t5(prompt):
247
+ if "traducir" in prompt.lower() or "traducime" in prompt.lower():
248
+ translation = apply_translation_pipeline(prompt)
249
+ return translation
250
+ return None
251
+
252
+ def apply_musicgen_pipeline(prompt):
253
+ tokenizer = AutoTokenizer.from_pretrained("facebook/musicgen-small")
254
+ model = AutoModelForTextToWaveform.from_pretrained("facebook/musicgen-small")
255
+ inputs = tokenizer(prompt, return_tensors="pt")
256
+ audio = model.generate(inputs)
257
+ return audio
258
+
259
+ def generate_music_with_musicgen(prompt):
260
+ if "m煤sica" in prompt.lower() or "canci贸n" in prompt.lower():
261
+ music = apply_musicgen_pipeline(prompt)
262
+ return music
263
+ return None
264
+
265
+ def apply_musicgen_melody_pipeline(prompt):
266
+ tokenizer = AutoTokenizer.from_pretrained("facebook/musicgen-melody")
267
+ model = AutoModelForTextToWaveform.from_pretrained("facebook/musicgen-melody")
268
+ inputs = tokenizer(prompt, return_tensors="pt")
269
+ audio = model.generate(inputs)
270
+ return audio
271
+
272
+ def generate_music_with_musicgen_melody(prompt):
273
+ if "melod铆a" in prompt.lower() or "melodia" in prompt.lower():
274
+ music = apply_musicgen_melody_pipeline(prompt)
275
+ return music
276
+ return None
277
+
278
+ def apply_stable_diffusion_pipeline(prompt):
279
+ pipeline = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-2-1")
280
+ images = pipeline(prompt).images
281
+ return images
282
+
283
+ def generate_responses_with_stable_diffusion(prompt):
284
+ if "imagen" in prompt.lower() or "image" in prompt.lower():
285
+ images = apply_stable_diffusion_pipeline(prompt)
286
+ return images
287
+ return None
288
+
289
+ def unify_models(*models):
290
+ combined_model = torch.nn.ModuleList(models)
291
+ return combined_model
292
+
293
+ def combined_filter(model):
294
+ model = ultra_max_compress(model)
295
+ model = optimize_model_resources(model)
296
+ model.config.max_position_embeddings = 128
297
+ model.config.hidden_size = 256
298
+ model = torch.jit.optimize_for_inference(model)
299
+ model = prune_model(model, amount=0.95)
300
+ quantize_model_to_q1_with_min(model, min_value=-0.1)
301
+ return model
302
+
303
+ def apply_filters_and_unify(model):
304
+ model = combined_filter(model)
305
+ model = remove_unused_model_components(model)
306
+ return model
307
+
308
+ def upload_to_huggingface(model, repo_name):
309
+ api = HfApi()
310
+ try:
311
+ api.create_repo(repo_id=repo_name, repo_type="model")
312
+ except Exception:
313
+ pass
314
+ model.save_pretrained(repo_name)
315
+ tokenizer.save_pretrained(repo_name)
316
+ repo = Repository(repo_name)
317
+ repo.push_to_hub()
318
+
319
+ def apply_extreme_filters_and_upload(model, repo_name):
320
+ model = apply_extreme_filters(model)
321
+ upload_to_huggingface(model, repo_name)
322
+
323
+ model_infos = [
324
+ {"model_name": "gpt2", "class": GPT2LMHeadModel},
325
+ {"model_name": "google/gemma-2-9b", "class": AutoModelForCausalLM}
326
+ ]
327
+
328
+ for model_info in model_infos:
329
+ model = model_info["class"].from_pretrained(model_info["model_name"])
330
+ tokenizer = AutoTokenizer.from_pretrained(model_info["model_name"])
331
+ optimized_model, responses, latency = optimize_model_with_all_optimizations(model, tokenizer, "Sample prompt for optimization.")
332
+ print(f"Model: {model_info['model_name']}")
333
+ print(f"Latency: {latency:.2f} ms")
334
+ print(f"Sample Responses: {responses}")
335
+
336
+ gpt_model, gpt_tokenizer = create_gpt_distill_model()
337
+ gemma_model, gemma_tokenizer = create_gemma_distill_model()
338
+
339
+ optimized_gpt_model, gpt_responses, gpt_latency = optimize_model_with_all_optimizations(gpt_model, gpt_tokenizer, "Sample prompt for GPT optimization.")
340
+ optimized_gemma_model, gemma_responses, gemma_latency = optimize_model_with_all_optimizations(gemma_model, gemma_tokenizer, "Sample prompt for Gemma optimization.")
341
+
342
+ combined_model = unify_models(optimized_gpt_model, optimized_gemma_model)
343
+
344
+ optimized_gpt_model_1gb = optimize_for_1gb_ram(optimized_gpt_model)
345
+ optimized_gemma_model_1gb = optimize_for_1gb_ram(optimized_gemma_model)
346
+ optimized_gpt_model_low = optimize_for_very_low_resources(optimized_gpt_model)
347
+ optimized_gemma_model_low = optimize_for_very_low_resources(optimized_gemma_model)
348
+ optimized_gpt_model_cpu = optimize_for_old_cpu(optimized_gpt_model)
349
+ optimized_gemma_model_cpu = optimize_for_old_cpu(optimized_gemma_model)
350
+ optimized_gpt_model_gpu = optimize_for_old_gpu(optimized_gpt_model)
351
+ optimized_gemma_model_gpu = optimize_for_old_gpu(optimized_gemma_model)
352
+
353
+ print("Models optimized for various resource constraints.")
354
+
355
+ diffusion_response = generate_responses_with_diffusion("Imagine a serene landscape", True)
356
+ if diffusion_response:
357
+ print("Diffusion response generated.")
358
+
359
+ summary_response = generate_responses_with_bart("Resumir este texto para obtener un resumen efectivo.", True)
360
+ if summary_response:
361
+ print("Summary response generated.")
362
+
363
+ transcription_response = generate_transcription_with_whisper("Transcribe this audio file.", True)
364
+ if transcription_response:
365
+ print("Transcription response generated.")
366
+
367
+ translation_response = generate_translation_with_t5("Traducir este texto al ingl茅s.", True)
368
+ if translation_response:
369
+ print("Translation response generated.")
370
+
371
+ music_response = generate_music_with_musicgen("M煤sica para una tarde tranquila.", True)
372
+ if music_response:
373
+ print("Music response generated.")
374
+
375
+ melody_music_response = generate_music_with_musicgen_melody("Melod铆a para relajaci贸n.", True)
376
+ if melody_music_response:
377
+ print("Melody music response generated.")
378
+
379
+ image_response = generate_responses_with_stable_diffusion("Imagen de un paisaje sereno.", True)
380
+ if image_response:
381
+ print("Image response generated.")
382
+
383
+ upload_to_huggingface(combined_model, "xilixmeaty40/my_model")