import os
import json
from dotenv import load_dotenv
import openai
from presidio_analyzer import AnalyzerEngine

load_dotenv()

INSTRUCTION_TEMPLATE = """
Generate a compelling social media posts in a {tone} tone based on the PDF content provided, following these guidelines:

1. STYLE & TONE:
   - Write in first-person perspective as someone who has personally read and been impacted by the document
   - Use a conversational, thoughtful tone that reflects genuine interest in the topic
   - Include 1-2 personal reflections or opinions that demonstrate engagement with the material
   - Vary sentence structure and length to create natural rhythm and flow

2. STRUCTURE (1300-2000 characters):
   - Start with an attention-grabbing opening that poses a question or shares a surprising insight
   - Break content into 2-3 short paragraphs with strategic spacing for readability
   - Include 1-2 specific facts or statistics from the document to establish credibility
   - End with a thought-provoking question or call-to-action that encourages comments

3. CONTENT ELEMENTS:
   - Mention authors and publication date naturally within the flow of text
   - Reference that you've been reading/reviewing this document (without explicitly saying "PDF")
   - Focus on 1-3 key takeaways rather than attempting to summarize everything
   - Include your perspective on why these insights matter to your professional network

4. ATTRIBUTION & FORMATTING:
   - Use 1-3 emojis maximum, placed strategically (not in succession)
   - At the end of the post, include a clear attribution line with authors and publication date
   - Follow the attribution with these hashtag related to the content; always include #llm
   - Format example: "Based on work by [Authors] ([Publication Date]) #llm #sports #innovation"
   - DO NOT include character counts, introductory phrases, or any meta-commentary
   - DO NOT present as a formal summary or book report - write as a professional sharing valuable insights

The final post should read as if a thoughtful professional read something interesting and wanted to share their genuine takeaways with their network, while properly crediting the original authors.
"""

# Initialize the Presidio PII Analyzer
analyzer = AnalyzerEngine()

# Define which PII entities to check for
PII_ENTITIES_TO_CHECK = [
    #"EMAIL_ADDRESS",
    "PHONE_NUMBER",
    "CREDIT_CARD",
    "US_SSN"
]

MIN_CONFIDENCE = 0.8  # Minimum confidence threshold for detected entities

def contains_pii(text: str) -> bool:
    """
    Analyze the text for presence of specified PII entities above a confidence threshold.
    Returns True if any PII entities are found, False otherwise.
    """
    results = analyzer.analyze(text=text, entities=PII_ENTITIES_TO_CHECK, language='en')
    high_confidence_results = [r for r in results if r.score >= MIN_CONFIDENCE]
    if high_confidence_results:
        print("Detected PII:", [(r.entity_type, text[r.start:r.end], r.score) for r in high_confidence_results])
        return True
    return False

def generate_linkedin_post(pdf_content: str, tone: str = "Professional", retry_num: int = 0) -> str:
    api_key = os.getenv("OPENROUTER_API_KEY")
    if not api_key:
        raise ValueError("OPENROUTER_API_KEY environment variable is not set")

    # Set up OpenAI client (module-level config)
    openai.api_key = api_key
    openai.api_base = "https://openrouter.ai/api/v1"
    
    if contains_pii(pdf_content):
        return (
            "⚠️ The uploaded PDF appears to contain personal or sensitive information. "
            "Please remove such details before generating a post."
        )

    instruction = INSTRUCTION_TEMPLATE.format(tone=tone)
    temperature = 0.7 + 0.1 * retry_num  # Add variability on retries

    try:
        response = openai.ChatCompletion.create(
        model="mistralai/mistral-small-3.2-24b-instruct:free",
         extra_headers={
        "HTTP-Referer": "https://huggingface.co/spaces/mrme77/PDF-To-Social-Media-Post-Generator", 
        "X-Title": "PDF to Social Media Post Generator",
        },
        messages=[
        {"role": "system", "content": instruction},
        {"role": "user", "content": f"PDF Content:\n{pdf_content}"}
        ],
        temperature=temperature,
        max_tokens=2000,
        top_p=0.85,
)


        # response = openai.ChatCompletion.create(
        #     #model="meta-llama/llama-3.3-8b-instruct:free",
        #     #odel = "google/gemma-3n-e4b-it:free"
        #     model="mistralai/mistral-small-3.2-24b-instruct:free",
        #     messages=[
        #         {"role": "system", "content": instruction},
        #         {"role": "user", "content": f"PDF Content:\n{pdf_content}"}
        #     ],
        #     temperature=temperature,
        #     max_tokens=2000,
        #     top_p=0.85,
        # )

        if response and "choices" in response and response["choices"]:
            return response["choices"][0]["message"]["content"].strip()
        else:
            raise RuntimeError("No content returned by the language model.")

    except Exception as e:
        return f"Error generating Social Media post: {str(e)}"