mrme77's picture
Update llm_integration.py
159da36 verified
import os
import json
from dotenv import load_dotenv
import openai
from presidio_analyzer import AnalyzerEngine
load_dotenv()
INSTRUCTION_TEMPLATE = """
Generate a compelling social media posts in a {tone} tone based on the PDF content provided, following these guidelines:
1. STYLE & TONE:
- Write in first-person perspective as someone who has personally read and been impacted by the document
- Use a conversational, thoughtful tone that reflects genuine interest in the topic
- Include 1-2 personal reflections or opinions that demonstrate engagement with the material
- Vary sentence structure and length to create natural rhythm and flow
2. STRUCTURE (1300-2000 characters):
- Start with an attention-grabbing opening that poses a question or shares a surprising insight
- Break content into 2-3 short paragraphs with strategic spacing for readability
- Include 1-2 specific facts or statistics from the document to establish credibility
- End with a thought-provoking question or call-to-action that encourages comments
3. CONTENT ELEMENTS:
- Mention authors and publication date naturally within the flow of text
- Reference that you've been reading/reviewing this document (without explicitly saying "PDF")
- Focus on 1-3 key takeaways rather than attempting to summarize everything
- Include your perspective on why these insights matter to your professional network
4. ATTRIBUTION & FORMATTING:
- Use 1-3 emojis maximum, placed strategically (not in succession)
- At the end of the post, include a clear attribution line with authors and publication date
- Follow the attribution with these hashtag related to the content; always include #llm
- Format example: "Based on work by [Authors] ([Publication Date]) #llm #sports #innovation"
- DO NOT include character counts, introductory phrases, or any meta-commentary
- DO NOT present as a formal summary or book report - write as a professional sharing valuable insights
The final post should read as if a thoughtful professional read something interesting and wanted to share their genuine takeaways with their network, while properly crediting the original authors.
"""
# Initialize the Presidio PII Analyzer
analyzer = AnalyzerEngine()
# Define which PII entities to check for
PII_ENTITIES_TO_CHECK = [
#"EMAIL_ADDRESS",
"PHONE_NUMBER",
"CREDIT_CARD",
"US_SSN"
]
MIN_CONFIDENCE = 0.8 # Minimum confidence threshold for detected entities
def contains_pii(text: str) -> bool:
"""
Analyze the text for presence of specified PII entities above a confidence threshold.
Returns True if any PII entities are found, False otherwise.
"""
results = analyzer.analyze(text=text, entities=PII_ENTITIES_TO_CHECK, language='en')
high_confidence_results = [r for r in results if r.score >= MIN_CONFIDENCE]
if high_confidence_results:
print("Detected PII:", [(r.entity_type, text[r.start:r.end], r.score) for r in high_confidence_results])
return True
return False
def generate_linkedin_post(pdf_content: str, tone: str = "Professional", retry_num: int = 0) -> str:
api_key = os.getenv("OPENROUTER_API_KEY")
if not api_key:
raise ValueError("OPENROUTER_API_KEY environment variable is not set")
# Set up OpenAI client (module-level config)
openai.api_key = api_key
openai.api_base = "https://openrouter.ai/api/v1"
if contains_pii(pdf_content):
return (
"⚠️ The uploaded PDF appears to contain personal or sensitive information. "
"Please remove such details before generating a post."
)
instruction = INSTRUCTION_TEMPLATE.format(tone=tone)
temperature = 0.7 + 0.1 * retry_num # Add variability on retries
try:
response = openai.ChatCompletion.create(
model="mistralai/mistral-small-3.2-24b-instruct:free",
extra_headers={
"HTTP-Referer": "https://huggingface.co/spaces/mrme77/PDF-To-Social-Media-Post-Generator",
"X-Title": "PDF to Social Media Post Generator",
},
messages=[
{"role": "system", "content": instruction},
{"role": "user", "content": f"PDF Content:\n{pdf_content}"}
],
temperature=temperature,
max_tokens=2000,
top_p=0.85,
)
# response = openai.ChatCompletion.create(
# #model="meta-llama/llama-3.3-8b-instruct:free",
# #odel = "google/gemma-3n-e4b-it:free"
# model="mistralai/mistral-small-3.2-24b-instruct:free",
# messages=[
# {"role": "system", "content": instruction},
# {"role": "user", "content": f"PDF Content:\n{pdf_content}"}
# ],
# temperature=temperature,
# max_tokens=2000,
# top_p=0.85,
# )
if response and "choices" in response and response["choices"]:
return response["choices"][0]["message"]["content"].strip()
else:
raise RuntimeError("No content returned by the language model.")
except Exception as e:
return f"Error generating Social Media post: {str(e)}"