Bertug1911 commited on
Commit
1dd5b4a
·
verified ·
1 Parent(s): 77e2dbb

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +131 -0
app.py ADDED
@@ -0,0 +1,131 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import subprocess
2
+ import sys
3
+ import time
4
+ from collections import defaultdict, deque
5
+
6
+ # Otomatik kurulum
7
+
8
+ def install_and_import(package):
9
+ try:
10
+ __import__(package)
11
+ except ImportError:
12
+ print(f"{package} is not installed, installing...")
13
+ subprocess.check_call([sys.executable, "-m", "pip", "install", package])
14
+
15
+ install_and_import("gradio")
16
+ install_and_import("transformers")
17
+ install_and_import("torch")
18
+
19
+ import gradio as gr
20
+ from transformers import AutoTokenizer, AutoModelForCausalLM
21
+ import torch
22
+
23
+ # === RATE LIMIT ===
24
+ click_logs = defaultdict(lambda: {"minute": deque(), "hour": deque(), "day": deque()})
25
+ LIMITS = {"minute": (13, 60), "hour": (90, 3600), "day": (1350, 86400)}
26
+
27
+ def check_rate_limit(session_id):
28
+ now = time.time()
29
+ logs = click_logs[session_id]
30
+ remaining, reset_times = {}, {}
31
+ for key, (limit, interval) in LIMITS.items():
32
+ # Geçmiş istekleri temizle
33
+ while logs[key] and now - logs[key][0] > interval:
34
+ logs[key].popleft()
35
+ used = len(logs[key])
36
+ remaining[key] = max(0, limit - used)
37
+ reset_times[key] = int(interval - (now - logs[key][0]) if logs[key] else interval)
38
+ if used >= limit:
39
+ return False, f"⛔ {key.capitalize()} rate limit exceeded ({limit}/{key})", remaining, reset_times
40
+ # Limit aşılmadıysa log'a şimdi ekle
41
+ for key in LIMITS:
42
+ logs[key].append(now)
43
+ return True, None, remaining, reset_times
44
+
45
+ # === CHAT ÜRETİM FONKSİYONU ===
46
+ def extract_response_between_tokens(text: str) -> str:
47
+
48
+ start = "<|im_start|>assistant<|im_sep|>"
49
+ end = "<|im_end|>"
50
+ try:
51
+ return text.split(start)[1].split(end)[0]
52
+ except Exception:
53
+ return text
54
+
55
+ # Model yükleme
56
+ model_name = "Bertug1911/BrtGPT-1-Pre"
57
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
58
+ model = AutoModelForCausalLM.from_pretrained(model_name)
59
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
60
+ model.to(device)
61
+ model.eval()
62
+
63
+ # Özel token ID
64
+ im_end_id = tokenizer.convert_tokens_to_ids("<|im_end|>")
65
+
66
+ # Üretim fonksiyonu chat_generate
67
+ def chat_generate(prompt, temperature, top_k, max_new_tokens, session_id):
68
+ ok, msg, rem, resets = check_rate_limit(session_id)
69
+ if not ok:
70
+ return msg, format_status(rem, resets)
71
+
72
+ # Jinja chat format
73
+ messages = [{"role": "user", "content": prompt}]
74
+ formatted = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
75
+ inputs = tokenizer(formatted, return_tensors="pt").to(device)
76
+ gen = inputs["input_ids"]
77
+
78
+ # Döngüsel üretim
79
+ for _ in range(int(max_new_tokens)):
80
+ out = model(gen)
81
+ logits = out.logits[:, -1, :] / float(temperature)
82
+ if int(top_k) > 0:
83
+ vals, idxs = torch.topk(logits, int(top_k))
84
+ filt = torch.full_like(logits, float('-inf'))
85
+ filt.scatter_(1, idxs, vals)
86
+ logits = filt
87
+ probs = torch.softmax(logits, dim=-1)
88
+ nxt = torch.multinomial(probs, num_samples=1)
89
+ gen = torch.cat([gen, nxt], dim=1)
90
+ if nxt.item() == im_end_id:
91
+ break
92
+
93
+ out_text = tokenizer.decode(gen[0], skip_special_tokens=False)
94
+ # Format düzeltme
95
+ no_sp = out_text.replace(" ", "").replace("Ġ", " ")
96
+ formatted_out = no_sp.replace("Ċ", "\n")
97
+ if not formatted_out.strip().endswith("<|im_end|>"):
98
+ formatted_out += "<|im_end|>"
99
+ resp = extract_response_between_tokens(formatted_out)
100
+ return resp, format_status(rem, resets)
101
+
102
+ # Durum metni formatlama
103
+ def format_status(rem, resets):
104
+ return "\n".join([f"🕒 {k.capitalize()}: {rem[k]} left — resets in {resets[k]} sec" for k in ["minute","hour","day"]])
105
+
106
+ # === UI ===
107
+ with gr.Blocks() as app:
108
+ session_id = gr.State(str(time.time()))
109
+ gr.Markdown("""
110
+ # 🤖 BrtGPT-1-Pre
111
+ """ )
112
+
113
+ with gr.Row():
114
+ prompt = gr.Textbox(lines=3, placeholder="Enter your message...", label="Prompt")
115
+ output = gr.Textbox(label="Response")
116
+
117
+ with gr.Row():
118
+ temperature = gr.Slider(0.01,1.0,value=0.5,step=0.01,label="Temperature")
119
+ top_k = gr.Slider(1,50,value=10,step=1,label="Top-K")
120
+ max_new_tokens = gr.Slider(1,128,value=15,step=1,label="Max New Tokens")
121
+
122
+ generate_button = gr.Button("Generate")
123
+ status = gr.Markdown()
124
+
125
+ generate_button.click(
126
+ fn=chat_generate,
127
+ inputs=[prompt, temperature, top_k, max_new_tokens, session_id],
128
+ outputs=[output, status]
129
+ )
130
+
131
+ app.launch()