Spaces:
Sleeping
Sleeping
import subprocess | |
import sys | |
import time | |
from collections import defaultdict, deque | |
# Otomatik kurulum | |
def install_and_import(package): | |
try: | |
__import__(package) | |
except ImportError: | |
print(f"{package} is not installed, installing...") | |
subprocess.check_call([sys.executable, "-m", "pip", "install", package]) | |
install_and_import("gradio") | |
install_and_import("transformers") | |
install_and_import("torch") | |
import gradio as gr | |
from transformers import AutoTokenizer, AutoModelForCausalLM | |
import torch | |
# === RATE LIMIT === | |
click_logs = defaultdict(lambda: {"minute": deque(), "hour": deque(), "day": deque()}) | |
LIMITS = {"minute": (15, 60), "hour": (100, 3600), "day": (1500, 86400)} | |
def check_rate_limit(session_id): | |
now = time.time() | |
logs = click_logs[session_id] | |
remaining, reset_times = {}, {} | |
for key, (limit, interval) in LIMITS.items(): | |
# Geçmiş istekleri temizle | |
while logs[key] and now - logs[key][0] > interval: | |
logs[key].popleft() | |
used = len(logs[key]) | |
remaining[key] = max(0, limit - used) | |
reset_times[key] = int(interval - (now - logs[key][0]) if logs[key] else interval) | |
if used >= limit: | |
return False, f"⛔ {key.capitalize()} rate limit exceeded ({limit}/{key})", remaining, reset_times | |
# Limit aşılmadıysa log'a şimdi ekle | |
for key in LIMITS: | |
logs[key].append(now) | |
return True, None, remaining, reset_times | |
# === CHAT ÜRETİM FONKSİYONU === | |
def extract_response_between_tokens(text: str) -> str: | |
start = "<|im_start|>assistant<|im_sep|>" | |
end = "<|im_end|>" | |
try: | |
return text.split(start)[1].split(end)[0] | |
except Exception: | |
return text | |
# Model yükleme | |
model_name = "Bertug1911/BrtGPT-1-Pre" | |
tokenizer = AutoTokenizer.from_pretrained(model_name) | |
model = AutoModelForCausalLM.from_pretrained(model_name) | |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
model.to(device) | |
model.eval() | |
# Özel token ID | |
im_end_id = tokenizer.convert_tokens_to_ids("<|im_end|>") | |
# Üretim fonksiyonu chat_generate | |
def chat_generate(prompt, temperature, top_k, max_new_tokens, session_id): | |
ok, msg, rem, resets = check_rate_limit(session_id) | |
if not ok: | |
return msg, format_status(rem, resets) | |
# Jinja chat format | |
messages = [{"role": "user", "content": prompt}] | |
formatted = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) | |
inputs = tokenizer(formatted, return_tensors="pt").to(device) | |
gen = inputs["input_ids"] | |
# Döngüsel üretim | |
for _ in range(int(max_new_tokens)): | |
out = model(gen) | |
logits = out.logits[:, -1, :] / float(temperature) | |
if int(top_k) > 0: | |
vals, idxs = torch.topk(logits, int(top_k)) | |
filt = torch.full_like(logits, float('-inf')) | |
filt.scatter_(1, idxs, vals) | |
logits = filt | |
probs = torch.softmax(logits, dim=-1) | |
nxt = torch.multinomial(probs, num_samples=1) | |
gen = torch.cat([gen, nxt], dim=1) | |
if nxt.item() == im_end_id: | |
break | |
out_text = tokenizer.decode(gen[0], skip_special_tokens=False) | |
# Format düzeltme | |
no_sp = out_text.replace(" ", "").replace("Ġ", " ") | |
formatted_out = no_sp.replace("Ċ", "\n") | |
if not formatted_out.strip().endswith("<|im_end|>"): | |
formatted_out += "<|im_end|>" | |
resp = extract_response_between_tokens(formatted_out) | |
return resp, format_status(rem, resets) | |
# Durum metni formatlama | |
def format_status(rem, resets): | |
return "\n".join([f"🕒 {k.capitalize()}: {rem[k]} left — resets in {resets[k]} sec" for k in ["minute","hour","day"]]) | |
# === UI === | |
with gr.Blocks() as app: | |
session_id = gr.State(str(time.time())) | |
gr.Markdown(""" | |
# 🤖 BrtGPT-1-Pre | |
""" ) | |
with gr.Row(): | |
prompt = gr.Textbox(lines=3, placeholder="Enter your message...", label="Prompt") | |
output = gr.Textbox(label="Response") | |
with gr.Row(): | |
temperature = gr.Slider(0.01,1.0,value=0.5,step=0.01,label="Temperature") | |
top_k = gr.Slider(1,50,value=10,step=1,label="Top-K") | |
max_new_tokens = gr.Slider(1,128,value=15,step=1,label="Max New Tokens") | |
generate_button = gr.Button("Generate") | |
status = gr.Markdown() | |
generate_button.click( | |
fn=chat_generate, | |
inputs=[prompt, temperature, top_k, max_new_tokens, session_id], | |
outputs=[output, status] | |
) | |
app.launch() | |