import os
from cerebras.cloud.sdk import Cerebras
from PyPDF2 import PdfReader
from docx import Document
import gradio as gr


Cerekey = os.getenv.get("LitReview")

# Initialize Cerebras AI client with the API key
client = Cerebras(Cerekey)


def extract_text_from_file(file):
    """Extracts text from uploaded PDF or DOCX files."""
    if file.name.endswith(".pdf"):
        reader = PdfReader(file)
        text = ""
        for page in reader.pages:
            text += page.extract_text()
        return text
    elif file.name.endswith(".docx"):
        doc = Document(file)
        text = "\n".join([p.text for p in doc.paragraphs])
        return text
    else:
        return "Unsupported file format. Please upload a PDF or DOCX file."

def chunk_text(text, max_tokens=4000):
    """
    Splits text into chunks small enough for the Llama model to process.
    Each chunk is limited to `max_tokens` for safe processing.
    """
    words = text.split()
    chunks = []
    current_chunk = []

    for word in words:
        current_chunk.append(word)
        if len(" ".join(current_chunk)) > max_tokens:
            chunks.append(" ".join(current_chunk))
            current_chunk = []
    if current_chunk:
        chunks.append(" ".join(current_chunk))

    return chunks

def analyze_chunk(chunk):
    """
    Analyzes a single chunk of text using the Cerebras Llama model.
    """
    messages = [
        {
            "role": "system",
            "content": (
                "You are an experienced scholar tasked with analyzing research articles. "
                "Focus on extracting insights based on: Author (APA format with et al if applicable), Year of publication, Title of the article; "
                "Problem addressed; Methodology (datasets, tools, techniques, algorithms); "
                "Results (specific, quantifiable metrics); and Remarks (strengths, weaknesses, improvements). "
                "Summarize only insights related to these fields and disregard irrelevant content."
            )
        },
        {
            "role": "user",
            "content": chunk
        }
    ]

    try:
        # Use Cerebras AI for processing
        stream = client.chat.completions.create(
            messages=messages,
            model="llama-3.3-70b",
            stream=True,
            max_completion_tokens=1024,
            temperature=0.2,
            top_p=1
        )
        result = ""
        for chunk in stream:
            result += chunk.choices[0].delta.content or ""
        return result
    except Exception as e:
        return f"An error occurred while processing a chunk: {e}"

def analyze_document(file):
    """Processes and analyzes the uploaded document."""
    text = extract_text_from_file(file)
    if text.startswith("Unsupported file format"):
        yield f"**Error:** {text}"
        return

    chunks = chunk_text(text)
    all_insights = []

    yield "**Processing the document. Please wait...**\n"
    for i, chunk in enumerate(chunks, 1):
        yield f"**Processing chunk {i} of {len(chunks)}...**"
        result = analyze_chunk(chunk)
        if result.strip():  # Only append non-empty results
            all_insights.append(result)

    if not all_insights:
        yield "**Error:** No valid insights were extracted from the document."
        return

    yield "**Consolidating all insights into a final summary...**"
    consolidated_summary_prompt = (
        "Below are insights extracted from multiple chunks of a document. "
        "Consolidate these insights into a single output organized as follows: "
        "Author, Year, Title; Problem addressed; Methodology; Results; and Remarks. "
        "Make the final output concise and coherent."
    )

    try:
        stream = client.chat.completions.create(
            messages=[
                {"role": "system", "content": consolidated_summary_prompt},
                {"role": "user", "content": "\n\n".join(all_insights)}
            ],
            model="llama-3.3-70b",
            stream=True,
            max_completion_tokens=1024,
            temperature=0.2,
            top_p=1
        )
        final_summary = ""
        for chunk in stream:
            final_summary += chunk.choices[0].delta.content or ""
        yield f"**Final Summary:**\n\n{final_summary}"
    except Exception as e:
        yield f"**Error:** An error occurred during consolidation: {e}"

# Define the Gradio interface
interface = gr.Interface(
    fn=analyze_document,
    inputs=gr.File(label="Upload a PDF or DOCX file"),
    outputs=gr.Markdown(label="Literature Analysis"),
    title="Automated Literature Review",
    description=(
        "Upload a PDF or DOCX document, and this tool will analyze it to extract and consolidate its content. "
        "It might take a while, be patient. You are advised to upload smaller documents with shorter text as it may take a while to process longer files."
    ),
)

# Launch the interface
if __name__ == "__main__":
    interface.launch()