# core_logic.py import os import traceback from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM from huggingface_hub import HfApi, hf_hub_download from diffusers import StableDiffusionPipeline, DPMSolverMultistepScheduler import torch import requests from bs4 import BeautifulSoup # For basic scraping (fragile) from serpapi import GoogleSearch HF_TOKEN = os.getenv("HUGGINGFACE_TOKEN") api = HfApi(token=HF_TOKEN) SERPAPI_API_KEY = os.getenv("SERPAPI_API_KEY") def get_topic_suggestions_hf(topic, hf_token): try: generator = pipeline('text2text-generation', model='google/flan-t5-xl', tokenizer='google/flan-t5-xl', device=0 if torch.cuda.is_available() else -1) prompt = f"Generate 5 related blog post topic suggestions for: {topic}" suggestions = generator(prompt, max_length=100, num_return_sequences=5) return [s['generated_text'] for s in suggestions] except Exception as e: print(f"Error in get_topic_suggestions_hf: {e}") traceback.print_exc() return [] def generate_titles_hf(topic, keywords, hf_token): try: generator = pipeline('text2text-generation', model='google/flan-t5-xl', tokenizer='google/flan-t5-xl', device=0 if torch.cuda.is_available() else -1) keyword_str = ", ".join(keywords) if keywords else "" prompt = f"Generate 5 catchy blog post titles for topic: '{topic}' using keywords: {keyword_str}" titles = generator(prompt, max_length=150, num_return_sequences=5) return [t['generated_text'] for t in titles] except Exception as e: print(f"Error in generate_titles_hf: {e}") traceback.print_exc() return [] def generate_headings_hf(title, keywords, heading_count, hf_token): try: generator = pipeline('text2text-generation', model='google/flan-t5-xl', tokenizer='google/flan-t5-xl', device=0 if torch.cuda.is_available() else -1) keyword_str = ", ".join(keywords) if keywords else "" prompt = f"Generate {heading_count} headings for a blog post titled: '{title}' using keywords: {keyword_str}" headings_data = generator(prompt, max_length=150, num_return_sequences=1) headings = headings_data[0]['generated_text'].split('\n') cleaned_headings = [h.strip() for h in headings if h.strip() and not h.strip().startswith(('1.', '2.', '3.', '4.', '5.', '6.', '7.', '8.', '9.', '10.'))] return cleaned_headings[:heading_count] except Exception as e: print(f"Error in generate_headings_hf: {e}") traceback.print_exc() return [] def generate_article_hf(config, hf_token): try: generator = pipeline('text-generation', model='google/flan-t5-xl', tokenizer='google/flan-t5-xl', device=0 if torch.cuda.is_available() else -1) prompt = f"Write a blog post with the following specifications:\n\n" prompt += f"**Title:** {config.get('selectedTitle', 'Generated Title')}\n" prompt += f"**Topic:** {config.get('topic', '')}\n" prompt += f"**Primary Keyword:** {config.get('primaryKeyword', '')}\n" if config.get('secondaryKeywords'): prompt += f"**Secondary Keywords:** {', '.join(config.get('secondaryKeywords'))}\n" prompt += f"**Article Type:** {config.get('articleType', '')}\n" prompt += f"**Word Count:** {'Short' if config.get('articleLength') == 'short' else 'Medium' if config.get('articleLength') == 'medium' else 'Long'}\n" prompt += f"**Writing Style:** {config.get('writingStyle', '')}\n" if config.get('selectedHeadings'): prompt += "\n**Headings:**\n" for heading in config.get('selectedHeadings'): prompt += f"- {heading}\n" print(f"Full Generation Prompt:\n{prompt}\n") # Print the full prompt for debugging generated_text = generator(prompt, max_length=3000, num_return_sequences=1)[0]['generated_text'] return generated_text except Exception as e: print(f"Error in generate_article_hf: {e}") traceback.print_exc() return "Error generating article. Please check the server logs." def generate_images_hf(prompt, num_images=1, hf_token=None): try: repo_id = "stabilityai/stable-diffusion-2-1-base" cache_dir = "stable_diffusion_cache" if os.path.exists(cache_dir): pipe = StableDiffusionPipeline.from_pretrained(cache_dir, torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32, local_files_only=True,) else: pipe = StableDiffusionPipeline.from_pretrained( repo_id, torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32, use_auth_token=hf_token, scheduler=DPMSolverMultistepScheduler.from_pretrained(repo_id, subfolder="scheduler"), cache_dir=cache_dir ) if torch.cuda.is_available(): pipe = pipe.to("cuda") images = pipe(prompt, num_images_per_prompt=num_images).images image_urls = ["https://via.placeholder.com/512.png?text=Generated+Image"] * num_images # Replace with your image handling! return image_urls except Exception as e: print(f"Error in generate_images_hf: {e}") traceback.print_exc() return [] def get_competitors_hf(topic, hf_token): if SERPAPI_API_KEY: try: search_params = { "q": topic, "gl": "us", # Region "hl": "en", # Language "api_key": SERPAPI_API_KEY, "num": 3 # Number of results } search = GoogleSearch(search_params) results = search.get_dict() competitors = [] for organic_result in results.get("organic_results", []): competitors.append({ "id": organic_result.get("position"), "title": organic_result.get("title"), "url": organic_result.get("link"), "domain": organic_result.get("displayed_link"), "wordCount": 0, # Add word count logic here if needed (e.g., via scraping or estimation) "backlinks": 0 # Backlinks are difficult - need a dedicated API if this is a requirement }) print("Competitor Data from SerpApi:", competitors) # Log competitor data return competitors except Exception as e: print(f"Error fetching competitors with SerpApi: {e}") traceback.print_exc() return [] else: print("WARNING: SERPAPI_API_KEY not set. Returning dummy data for competitors.") return [ {"id": 1, "title": f'Dummy: Guide to {topic}', "url": '#', "domain": 'dummy.com', "wordCount": 1500, "backlinks": 50}, {"id": 2, "title": f'Dummy: Mastering {topic}', "url": '#', "domain": 'mocksite.org', "wordCount": 2000, "backlinks": 75}, {"id": 3, "title": f'Dummy: {topic} Explained', "url": '#', "domain": 'example.net', "wordCount": 1200, "backlinks": 30} ] def get_keywords_hf(topic, primary_keyword, competitor_articles, hf_token): if SERPAPI_API_KEY: try: search_params_related = { "engine": "google", "q": topic, "api_key": SERPAPI_API_KEY } search_related = GoogleSearch(search_params_related) data_related = search_related.get_dict() related_keywords = [item.get('query') for item in data_related.get("related_searches", [])] search_params_keyword_metrics = { "engine": "google", "q": primary_keyword, "api_key": SERPAPI_API_KEY } search_metrics = GoogleSearch(search_params_keyword_metrics) data_metrics = search_metrics.get_dict() keywords_for_frontend = [] if data_metrics: search_volume = data_metrics.get('search_information', {}).get('total_results') try: # Attempt to convert search volume to integer search_volume = int(search_volume) if search_volume else 0 except ValueError: search_volume = 0 keywords_for_frontend.append({ "keyword": primary_keyword, "volume": search_volume, "difficulty": "medium" # Placeholder for difficulty - needs additional logic/API if required }) for kw in related_keywords: keywords_for_frontend.append({"keyword": kw}) return keywords_for_frontend except Exception as e: print(f"Error fetching keywords with SerpApi: {e}") traceback.print_exc() return [] else: print("WARNING: SERPAPI_API_KEY not set. Using basic keyword extraction or dummy data.") try: keyword_extractor = pipeline("keyword-extraction", model="ml6team/keyphrase-extraction-kbir-inspec") extracted_keywords = keyword_extractor(topic) print("Keywords extracted from topic:", extracted_keywords) if competitor_articles and isinstance(competitor_articles, list): competitor_texts = [] for article in competitor_articles: try: response = requests.get(article.get('url')) response.raise_for_status() soup = BeautifulSoup(response.content, 'html.parser') text = soup.get_text(separator=' ', strip=True) competitor_texts.append(text) except Exception as e: print(f"WARNING: Could not extract text from {article.get('url')}: {e}") for text in competitor_texts: kws = keyword_extractor(text) extracted_keywords.extend(kws) print(f"Keywords extracted from competitor text: {kws}") unique_keywords = list(set([kw.lower() for kw in extracted_keywords])) keywords_for_frontend = [{"keyword": kw} for kw in unique_keywords] return keywords_for_frontend except Exception as e: print(f"Error extracting keywords with Hugging Face model: {e}") return []