import subprocess import sys import time from collections import defaultdict, deque # Otomatik kurulum def install_and_import(package): try: __import__(package) except ImportError: print(f"{package} is not installed, installing...") subprocess.check_call([sys.executable, "-m", "pip", "install", package]) install_and_import("gradio") install_and_import("transformers") install_and_import("torch") import gradio as gr from transformers import AutoTokenizer, AutoModelForCausalLM import torch # === RATE LIMIT === click_logs = defaultdict(lambda: {"minute": deque(), "hour": deque(), "day": deque()}) LIMITS = {"minute": (13, 60), "hour": (90, 3600), "day": (1350, 86400)} def check_rate_limit(session_id): now = time.time() logs = click_logs[session_id] remaining, reset_times = {}, {} for key, (limit, interval) in LIMITS.items(): # Geçmiş istekleri temizle while logs[key] and now - logs[key][0] > interval: logs[key].popleft() used = len(logs[key]) remaining[key] = max(0, limit - used) reset_times[key] = int(interval - (now - logs[key][0]) if logs[key] else interval) if used >= limit: return False, f"⛔ {key.capitalize()} rate limit exceeded ({limit}/{key})", remaining, reset_times # Limit aşılmadıysa log'a şimdi ekle for key in LIMITS: logs[key].append(now) return True, None, remaining, reset_times # === CHAT ÜRETİM FONKSİYONU === def extract_response_between_tokens(text: str) -> str: start = "<|im_start|>assistant<|im_sep|>" end = "<|im_end|>" try: return text.split(start)[1].split(end)[0] except Exception: return text # Model yükleme model_name = "Bertug1911/BrtGPT-1-Pre" tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForCausalLM.from_pretrained(model_name) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model.to(device) model.eval() # Özel token ID im_end_id = tokenizer.convert_tokens_to_ids("<|im_end|>") # Üretim fonksiyonu chat_generate def chat_generate(prompt, temperature, top_k, max_new_tokens, session_id): ok, msg, rem, resets = check_rate_limit(session_id) if not ok: return msg, format_status(rem, resets) # Jinja chat format messages = [{"role": "user", "content": prompt}] formatted = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) inputs = tokenizer(formatted, return_tensors="pt").to(device) gen = inputs["input_ids"] # Döngüsel üretim for _ in range(int(max_new_tokens)): out = model(gen) logits = out.logits[:, -1, :] / float(temperature) if int(top_k) > 0: vals, idxs = torch.topk(logits, int(top_k)) filt = torch.full_like(logits, float('-inf')) filt.scatter_(1, idxs, vals) logits = filt probs = torch.softmax(logits, dim=-1) nxt = torch.multinomial(probs, num_samples=1) gen = torch.cat([gen, nxt], dim=1) if nxt.item() == im_end_id: break out_text = tokenizer.decode(gen[0], skip_special_tokens=False) # Format düzeltme no_sp = out_text.replace(" ", "").replace("Ġ", " ") formatted_out = no_sp.replace("Ċ", "\n") if not formatted_out.strip().endswith("<|im_end|>"): formatted_out += "<|im_end|>" resp = extract_response_between_tokens(formatted_out) return resp, format_status(rem, resets) # Durum metni formatlama def format_status(rem, resets): return "\n".join([f"🕒 {k.capitalize()}: {rem[k]} left — resets in {resets[k]} sec" for k in ["minute","hour","day"]]) # === UI === with gr.Blocks() as app: session_id = gr.State(str(time.time())) gr.Markdown(""" # 🤖 BrtGPT-1-Pre """ ) with gr.Row(): prompt = gr.Textbox(lines=3, placeholder="Enter your message...", label="Prompt") output = gr.Textbox(label="Response") with gr.Row(): temperature = gr.Slider(0.01,1.0,value=0.5,step=0.01,label="Temperature") top_k = gr.Slider(1,50,value=10,step=1,label="Top-K") max_new_tokens = gr.Slider(1,128,value=15,step=1,label="Max New Tokens") generate_button = gr.Button("Generate") status = gr.Markdown() generate_button.click( fn=chat_generate, inputs=[prompt, temperature, top_k, max_new_tokens, session_id], outputs=[output, status] ) app.launch()