from fastapi import FastAPI, UploadFile, File, Request
import torch
from transformers import pipeline
import io
import soundfile as sf
import tempfile
import logging

# Logging yapılandırması
logging.basicConfig(level=logging.INFO)

app = FastAPI()


# Whisper modelini yükle
pipe = pipeline("automatic-speech-recognition", model="openai/whisper-small")


chatbot_pipe = pipeline("text-generation", model="TinyLlama/TinyLlama-1.1B-Chat-v1.0", torch_dtype=torch.float16, device_map="auto")

torch.set_num_threads(4) 
torch.backends.mkldnn.enabled = True  
torch.backends.cuda.matmul.allow_tf32 = True  


def extract_assistant_response(text):
    # Metni "<|assistant|>" ifadesine göre böl
    parts = text.split("<|assistant|>")
    
    # Eğer "<|assistant|>" ifadesi varsa, ondan sonrasını döndür
    return parts[1].strip() if len(parts) > 1 else ""

@app.post("/process")
async def transcribe_audio(file: UploadFile = File(...)):
    # Geçici dosya oluştur
    with tempfile.NamedTemporaryFile(delete=True, suffix=".wav") as temp_audio:
        temp_audio.write(await file.read())  # Dosyayı geçici olarak kaydet
        temp_audio.flush()  # Yazılan veriyi diske zorla

        # Soundfile ile oku
        waveform, sample_rate = sf.read(temp_audio.name)

    # Whisper modelini çalıştır
    transcription = pipe(waveform)

    logging.info(f"Transcription: {transcription}")

   
    messages = [
        {"role": "user", "content": transcription['text'].strip()},
    ]

    prompt = chatbot_pipe.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
    response  = chatbot_pipe(prompt, max_new_tokens=128, do_sample=True, temperature=0.7, top_k=20, top_p=0.8)
  
    
    logging.info(f"Response: {response}")
    
    
    generated_text = extract_assistant_response(response[0]['generated_text'])

    #parts = generated_text.split("\n", 1)
    

    #return transcription['text'] + "\n" + generated_text
    return generated_text


@app.post("/message")
async def get_message(request: Request):
    body = await request.body()
    text_msg = body.decode('utf-8')

    logging.info(f"prompt: {text_msg}")


    messages = [
        {"role": "user", "content": text_msg},
    ]

    prompt = chatbot_pipe.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
    response_msg  = chatbot_pipe(prompt, max_new_tokens=128, do_sample=True, temperature=0.7, top_k=20, top_p=0.8)

    
    logging.info(f"Response: {response_msg}")

    
    generated_text_msg = extract_assistant_response(response_msg[0]['generated_text'])
    
    return generated_text_msg