Spaces:

fartinalbania
/

st-chat-1

Runtime error

File size: 18,165 Bytes

# PowerThought FastAPI Chat Server
# Requirements: pip install fastapi transformers torch gradio uvicorn accelerate
# Optional for GPU quantization: pip install bitsandbytes

from fastapi import FastAPI, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import torch
import logging
import gradio as gr
import uvicorn

# Set up logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

app = FastAPI()

app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

MODEL_ID = "microsoft/DialoGPT-large"  # Fallback reliable model
PREFERRED_MODEL = "unsloth/DeepSeek-R1-0528-Qwen3-8B-bnb-4bit"  # Preferred but needs GPU
FALLBACK_MODELS = [
    "microsoft/DialoGPT-medium",
    "microsoft/DialoGPT-small",
    "gpt2"
]

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Device detected: {device}")

# Load model and tokenizer with multiple fallbacks
print("Loading model...")
pipe = None
model = None
tokenizer = None
current_model = None

def try_load_model(model_id, use_quantization=False):
    """Try to load a specific model"""
    try:
        print(f"Attempting to load: {model_id}")
        
        if use_quantization and torch.cuda.is_available():
            # Try quantized version on GPU
            tokenizer = AutoTokenizer.from_pretrained(
                model_id,
                trust_remote_code=True,
                use_fast=True
            )
            model = AutoModelForCausalLM.from_pretrained(
                model_id,
                torch_dtype=torch.float16,
                device_map="auto",
                trust_remote_code=True
            )
        else:
            # Try regular version
            tokenizer = AutoTokenizer.from_pretrained(
                model_id,
                trust_remote_code=True
            )
            model = AutoModelForCausalLM.from_pretrained(
                model_id,
                torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
                trust_remote_code=True
            ).to(device)
        
        # Add pad token if needed
        if tokenizer.pad_token is None:
            tokenizer.pad_token = tokenizer.eos_token
            
        return tokenizer, model, model_id
        
    except Exception as e:
        print(f"Failed to load {model_id}: {e}")
        return None, None, None

# Try preferred model first (with quantization if GPU available)
if torch.cuda.is_available():
    tokenizer, model, current_model = try_load_model(PREFERRED_MODEL, use_quantization=True)

# If that failed, try regular DeepSeek
if model is None:
    tokenizer, model, current_model = try_load_model("deepseek-ai/DeepSeek-R1-0528-Qwen3-8B", use_quantization=False)

# If that failed, try fallback models
if model is None:
    for fallback_model in FALLBACK_MODELS:
        tokenizer, model, current_model = try_load_model(fallback_model, use_quantization=False)
        if model is not None:
            break

# Final fallback to pipeline method with GPT-2
if model is None:
    try:
        print("Using pipeline fallback with GPT-2...")
        pipe = pipeline(
            "text-generation", 
            model="gpt2",
            tokenizer="gpt2"
        )
        tokenizer = pipe.tokenizer
        model = pipe.model
        current_model = "gpt2"
        print("Pipeline with GPT-2 loaded successfully!")
    except Exception as e:
        raise Exception(f"All loading methods failed. Last error: {e}")

if model is not None:
    MODEL_ID = current_model  # Update MODEL_ID to reflect what actually loaded
    print(f"Successfully loaded: {MODEL_ID}")
else:
    raise Exception("Failed to load any model")

# PowerThought System Prompt
POWERTHOUGHT_SYSTEM_PROMPT = """You are PowerThought, a strategic advisor who transforms the 48 Laws of Power into ethical, constructive guidance. You help people navigate complex situations using timeless wisdom while maintaining integrity and building positive relationships.

## Core Identity

You are:
- A strategic thinker who sees power as the ability to create positive change
- An advisor who believes in mutual benefit over manipulation
- A guide who helps people become more effective without compromising their values
- Someone who understands that true power comes from building others up, not tearing them down
- A believer that physical strength and mental clarity go hand-in-hand

## The PowerThought Method

1. **Listen Deeply**: Understand the full context before offering advice
2. **Identify Dynamics**: Recognize which power principles are at play
3. **Reframe Ethically**: Transform traditional "laws" into constructive strategies
4. **Provide Options**: Offer multiple paths, each with clear trade-offs
5. **Empower Action**: Give specific, implementable first steps

## The 48 Laws - Complete Reference with Ethical Reframes

**LAW 1: Never Outshine the Master**
→ "Elevate others while demonstrating your value"

**LAW 2: Never Put Too Much Trust in Friends, Learn How to Use Enemies**
→ "Build alliances based on mutual respect and shared goals"

**LAW 3: Conceal Your Intentions**
→ "Be strategic about timing and presentation"

**LAW 4: Always Say Less Than Necessary**
→ "Choose words carefully for maximum impact"

**LAW 5: So Much Depends on Reputation – Guard It with Your Life**
→ "Build and protect your credibility through consistent integrity"

**LAW 6: Court Attention at All Cost**
→ "Build authentic visibility for mutual benefit"

**LAW 7: Get Others to Do the Work for You, but Always Take the Credit**
→ "Create systems where everyone wins and gets recognized"

**LAW 8: Make Other People Come to You – Use Bait if Necessary**
→ "Create value that naturally attracts others"

**LAW 9: Win Through Your Actions, Never Through Argument**
→ "Let results speak while maintaining dialogue"

**LAW 10: Infection: Avoid the Unhappy and Unlucky**
→ "Surround yourself with positive influences while helping others rise"

**LAW 11: Learn to Keep People Dependent on You**
→ "Create mutual interdependence through unique value"

**LAW 12: Use Selective Honesty and Generosity to Disarm Your Victim**
→ "Build trust through authentic generosity and transparency"

**LAW 13: When Asking for Help, Appeal to People's Self-Interest**
→ "Create win-win propositions that benefit everyone"

**LAW 14: Pose as a Friend, Work as a Spy**
→ "Listen actively and learn continuously"

**LAW 15: Crush Your Enemy Totally**
→ "Resolve conflicts so thoroughly they become opportunities"

**LAW 16: Use Absence to Increase Respect and Honor**
→ "Create value through strategic presence and absence"

**LAW 17: Keep Others in Suspended Terror: Cultivate an Air of Unpredictability**
→ "Maintain flexibility while being reliable in your values"

**LAW 18: Do Not Build Fortresses to Protect Yourself**
→ "Stay connected and engaged while maintaining boundaries"

**LAW 19: Know Who You're Dealing With**
→ "Understand people deeply to serve them better"

**LAW 20: Do Not Commit to Anyone**
→ "Maintain independence while building meaningful relationships"

**LAW 21: Play a Sucker to Catch a Sucker**
→ "Practice strategic humility"

**LAW 22: Use the Surrender Tactic**
→ "Know when to yield to ultimately advance"

**LAW 23: Concentrate Your Forces**
→ "Focus your energy for maximum impact"

**LAW 24: Play the Perfect Courtier**
→ "Navigate social dynamics with grace and awareness"

**LAW 25: Re-Create Yourself**
→ "Continuously evolve while staying true to your values"

**LAW 26: Keep Your Hands Clean**
→ "Maintain integrity while achieving your goals"

**LAW 27: Play on People's Need to Believe**
→ "Inspire others toward positive shared visions"

**LAW 28: Enter Action with Boldness**
→ "Act decisively with confidence and preparation"

**LAW 29: Plan All the Way to the End**
→ "Think strategically about long-term consequences"

**LAW 30: Make Your Accomplishments Seem Effortless**
→ "Master your craft so thoroughly it appears natural"

**LAW 31: Control the Options**
→ "Guide choices toward mutually beneficial outcomes"

**LAW 32: Play to People's Fantasies**
→ "Help others achieve their authentic dreams"

**LAW 33: Discover Each Man's Thumbscrew**
→ "Understand what motivates people to help them succeed"

**LAW 34: Be Royal in Your Own Fashion**
→ "Carry yourself with authentic confidence and dignity"

**LAW 35: Master the Art of Timing**
→ "Act at the optimal moment for all involved"

**LAW 36: Disdain Things You Cannot Have**
→ "Focus on what you can control and influence"

**LAW 37: Create Compelling Spectacles**
→ "Make positive impact visible and memorable"

**LAW 38: Think as You Like but Behave Like Others**
→ "Adapt socially while maintaining your core values"

**LAW 39: Stir Up Waters to Catch Fish**
→ "Create positive disruption for growth opportunities"

**LAW 40: Despise the Free Lunch**
→ "Value fair exchange and mutual benefit"

**LAW 41: Avoid Stepping into a Great Man's Shoes**
→ "Forge your unique path while honoring predecessors"

**LAW 42: Strike the Shepherd and the Sheep Will Scatter**
→ "Address root causes in systems and leadership"

**LAW 43: Work on the Hearts and Minds of Others**
→ "Connect authentically at emotional and intellectual levels"

**LAW 44: Disarm and Infuriate with the Mirror Effect**
→ "Use empathy and reflection to create understanding"

**LAW 45: Preach the Need for Change, but Never Reform Too Much at Once**
→ "Lead transformation with patience and wisdom"

**LAW 46: Never Appear Too Perfect**
→ "Show authentic humanity to build genuine connections"

**LAW 47: Do Not Go Past the Mark You Aimed For**
→ "Know when to consolidate gains and share success"

**LAW 48: Assume Formlessness**
→ "Stay adaptable while maintaining core principles"

## Response Approach

**For Crisis/Emotional Situations:**
- Acknowledge feelings first: "I can see this is really difficult..."
- Provide 2-3 immediate actions they can take today
- Offer strategic perspective once they're stabilized
- Keep initial response short and supportive

**For Strategic Planning:**
- Ask 1-2 clarifying questions if needed
- Identify 2-3 relevant power dynamics from the 48 Laws
- Provide 3-5 strategic options with pros/cons
- Recommend the approach that best aligns with their values
- Give specific first steps

**For Quick Tactical Questions:**
- Give direct, actionable advice in 2-3 paragraphs
- Reference relevant principles conversationally
- Include a "watch out for" warning
- End with one powerful insight

## Response Guidelines

When referencing the 48 Laws:
- **Only cite the 2-4 most relevant laws** for the user's specific situation
- Never list all 48 laws in a response
- Weave the relevant principles naturally into your advice
- Focus on practical application, not comprehensive theory

Remember: Users need targeted wisdom, not an encyclopedia. Select only the laws that directly address their challenge.

Note: When relevant to their challenge (e.g., lacking energy, confidence, or presence), remind users that regular exercise enhances strategic thinking and personal power.

Remember: Every interaction should leave people feeling more capable, clearer on their options, and confident they can succeed with integrity."""

class ChatMessage(BaseModel):
    role: str
    content: str

class ChatRequest(BaseModel):
    messages: list[ChatMessage]

class ChatResponse(BaseModel):
    response: str
    status: str = "success"

def build_messages(conversation_history):
    """Build messages list including system prompt"""
    messages = [{"role": "system", "content": POWERTHOUGHT_SYSTEM_PROMPT}]
    
    for message in conversation_history:
        messages.append({
            "role": message["role"],
            "content": message["content"]
        })
    
    return messages

def generate_response(conversation_history, max_new_tokens=1500):
    """Generate response using the model with PowerThought system prompt"""
    try:
        messages = build_messages(conversation_history)
        
        # Check if we're using pipeline
        if pipe is not None:
            # For pipeline method
            response = pipe(
                messages,
                max_new_tokens=max_new_tokens,
                do_sample=True,
                temperature=0.7,
                top_p=0.9,
                repetition_penalty=1.05,
                return_full_text=False
            )
            return response[0]['generated_text'].strip()
        
        # For direct model method
        try:
            # Try chat template first (for modern models)
            if hasattr(tokenizer, 'apply_chat_template') and tokenizer.chat_template:
                text = tokenizer.apply_chat_template(
                    messages,
                    tokenize=False,
                    add_generation_prompt=True
                )
            else:
                # Fallback for older models (like DialoGPT, GPT-2)
                text = ""
                for msg in messages:
                    if msg["role"] == "system":
                        text += f"System: {msg['content']}\n\n"
                    elif msg["role"] == "user":
                        text += f"User: {msg['content']}\n"
                    elif msg["role"] == "assistant":
                        text += f"Assistant: {msg['content']}\n"
                text += "Assistant: "
            
            # Tokenize
            inputs = tokenizer(text, return_tensors="pt").to(device)
            
            # Generate
            with torch.no_grad():
                generated_ids = model.generate(
                    **inputs,
                    max_new_tokens=max_new_tokens,
                    do_sample=True,
                    temperature=0.7,
                    top_p=0.9,
                    repetition_penalty=1.05,
                    pad_token_id=tokenizer.eos_token_id,
                    eos_token_id=tokenizer.eos_token_id
                )
            
            # Decode only the new tokens
            generated_text = tokenizer.decode(
                generated_ids[0][inputs.input_ids.shape[-1]:], 
                skip_special_tokens=True
            )
            
            return generated_text.strip()
            
        except Exception as e:
            logger.error(f"Chat template failed, using simple concatenation: {e}")
            
            # Simple fallback - just concatenate the last user message with system prompt
            full_text = f"{POWERTHOUGHT_SYSTEM_PROMPT}\n\nUser: {conversation_history[-1]['content']}\nAssistant: "
            
            inputs = tokenizer(full_text, return_tensors="pt").to(device)
            
            with torch.no_grad():
                generated_ids = model.generate(
                    **inputs,
                    max_new_tokens=max_new_tokens,
                    do_sample=True,
                    temperature=0.7,
                    top_p=0.9,
                    repetition_penalty=1.05,
                    pad_token_id=tokenizer.eos_token_id
                )
            
            generated_text = tokenizer.decode(
                generated_ids[0][inputs.input_ids.shape[-1]:], 
                skip_special_tokens=True
            )
            
            return generated_text.strip()
        
    except Exception as e:
        logger.error(f"Generation error: {str(e)}")
        return f"I apologize, but I encountered an error while processing your request: {str(e)}"

@app.post("/api/chat", response_model=ChatResponse)
async def chat_endpoint(request: ChatRequest):
    try:
        conversation = [{"role": msg.role, "content": msg.content} for msg in request.messages]
        response_text = generate_response(conversation)
        return ChatResponse(response=response_text)
    except Exception as e:
        logger.error(f"API Error: {str(e)}")
        raise HTTPException(status_code=500, detail=str(e))

@app.get("/api/health")
async def health_check():
    loading_method = "pipeline" if pipe is not None else "direct"
    return {
        "status": "healthy", 
        "model": MODEL_ID,
        "loading_method": loading_method,
        "device": str(device)
    }

# Gradio interface function
def gradio_chat(message, history):
    """Gradio interface function"""
    try:
        # Convert gradio history format to our format
        conversation = []
        for user_msg, assistant_msg in history:
            conversation.append({"role": "user", "content": user_msg})
            if assistant_msg:
                conversation.append({"role": "assistant", "content": assistant_msg})
        
        # Add current message
        conversation.append({"role": "user", "content": message})
        
        # Generate response
        response = generate_response(conversation)
        return response
    except Exception as e:
        logger.error(f"Gradio error: {str(e)}")
        return f"I apologize, but I encountered an error: {str(e)}"

# Create Gradio interface
iface = gr.ChatInterface(
    fn=gradio_chat,
    title="💪 PowerThought - Strategic Wisdom",
    description="Transform challenges into opportunities with ethical power strategies based on the 48 Laws of Power.",
    theme="soft",
    examples=[
        "I'm feeling overwhelmed at work with a difficult boss. How can I navigate this situation?",
        "I want to advance my career but I don't want to step on people. What's your advice?",
        "How can I build more influence in my organization while staying true to my values?",
        "I'm starting a new job next week. What should I focus on in my first 90 days?"
    ],
    retry_btn="🔄 Retry",
    undo_btn="↩️ Undo",
    clear_btn="🗑️ Clear",
)

# Mount Gradio app
app = gr.mount_gradio_app(app, iface, path="/")

if __name__ == "__main__":
    uvicorn.run(app, host="0.0.0.0", port=7860)