Spaces:

redfernstech
/

cpp_llama

Build error

App Files Files Community

redfernstech commited on 16 days ago

Commit

17479d2

verified ·

1 Parent(s): da0a72c

Upload 4 files

Browse files

Files changed (4) hide show

.env +2 -0
Dockerfile +45 -45
main.py +81 -0
requirements.txt +9 -0

.env ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ API_KEY_USER1=your-secure-key-1
2	+ API_KEY_USER2=your-secure-key-2

Dockerfile CHANGED Viewed

@@ -1,45 +1,45 @@
-# Use Ubuntu as the base image
-FROM ubuntu:22.04
-# Set the working directory in the container
-WORKDIR /app
-# Install system dependencies and Python
-RUN apt-get update && apt-get install -y \
-    python3 \
-    python3-pip \
-    curl \
-    git \
-    build-essential \
-    cmake \
-    ninja-build \
-    wget \
-    && rm -rf /var/lib/apt/lists/*
-# Set Python3 as the default
-RUN ln -s /usr/bin/python3 /usr/bin/python
-# Copy the requirements file and install dependencies
-COPY requirements.txt ./
-RUN pip install --no-cache-dir -r requirements.txt
-# Install llama.cpp using CMake
-RUN git clone https://github.com/ggerganov/llama.cpp.git /app/llama.cpp && \
-    cd /app/llama.cpp && \
-    mkdir build && cd build && \
-    cmake .. -G Ninja && ninja install
-# Ensure llama.cpp binaries are in the system path
-ENV PATH="/usr/local/bin:$PATH"
-# Copy the Llama model into the Docker image
-COPY Meta-Llama-3-8B-Instruct.Q4_0.gguf /app/
-# Copy the application files
-COPY . .
-# Expose the FastAPI default port
-EXPOSE 8000
-# Start llama.cpp server, then start FastAPI
-CMD ["sh", "-c", "/usr/local/bin/server -m /app/Meta-Llama-3-8B-Instruct.Q4_0.gguf & sleep 5 && uvicorn main:app --host 0.0.0.0 --port 8000"]

+# Use Ubuntu as the base image
+FROM ubuntu:22.04
+# Set the working directory in the container
+WORKDIR /app
+# Install system dependencies and Python
+RUN apt-get update && apt-get install -y \
+    python3 \
+    python3-pip \
+    curl \
+    git \
+    build-essential \
+    cmake \
+    ninja-build \
+    wget \
+    && rm -rf /var/lib/apt/lists/*
+# Set Python3 as the default
+RUN ln -s /usr/bin/python3 /usr/bin/python
+# Copy the requirements file and install dependencies
+COPY requirements.txt ./
+RUN pip install --no-cache-dir -r requirements.txt
+# Install llama.cpp using CMake
+RUN git clone https://github.com/ggerganov/llama.cpp.git /app/llama.cpp && \
+    cd /app/llama.cpp && \
+    mkdir build && cd build && \
+    cmake .. -G Ninja && ninja install
+# Ensure llama.cpp binaries are in the system path
+ENV PATH="/usr/local/bin:$PATH"
+# Copy the Llama model into the Docker image
+COPY Meta-Llama-3-8B-Instruct.Q4_0.gguf /app/
+# Copy the application files
+COPY . .
+# Expose the FastAPI default port
+EXPOSE 8000
+# Start llama.cpp server, then start FastAPI
+CMD ["sh", "-c", "/usr/local/bin/server -m /app/Meta-Llama-3-8B-Instruct.Q4_0.gguf & sleep 5 && uvicorn main:app --host 0.0.0.0 --port 8000"]

main.py ADDED Viewed

	@@ -0,0 +1,81 @@

+from fastapi import FastAPI, HTTPException, Depends, Header, Request
+from pydantic import BaseModel
+import os
+import logging
+import time
+from langchain_community.llms import LlamaCpp
+from dotenv import load_dotenv
+# Load environment variables
+load_dotenv()
+# Configure logging
+logging.basicConfig(level=logging.INFO)
+# API keys from .env
+API_KEYS = {
+    "user1": os.getenv("API_KEY_USER1"),
+    "user2": os.getenv("API_KEY_USER2"),
+}
+app = FastAPI()
+# API Key Authentication
+def verify_api_key(request: Request, api_key: str = Header(None, alias="X-API-Key")):
+    logging.info(f"Received Headers: {request.headers}")
+    if not api_key:
+        raise HTTPException(status_code=401, detail="API key is missing")
+    api_key = api_key.strip()
+    if api_key not in API_KEYS.values():
+        raise HTTPException(status_code=401, detail="Invalid API key")
+    return api_key
+# OpenAI-compatible request format
+class OpenAIRequest(BaseModel):
+    model: str
+    messages: list
+    stream: bool = False
+# Initialize LangChain with Llama.cpp
+def get_llm():
+    model_path =  "/app/Meta-Llama-3-8B-Instruct.Q4_0.gguf"
+    return LlamaCpp(model_path=model_path, n_ctx=2048)
+@app.post("/v1/chat/completions")
+def generate_text(request: OpenAIRequest, api_key: str = Depends(verify_api_key)):
+    try:
+        llm = get_llm()
+        # Extract last user message
+        user_message = next((msg["content"] for msg in reversed(request.messages) if msg["role"] == "user"), None)
+        if not user_message:
+            raise HTTPException(status_code=400, detail="User message is required")
+        response_text = llm.invoke(user_message)
+        response = {
+            "id": "chatcmpl-123",
+            "object": "chat.completion",
+            "created": int(time.time()),
+            "model": request.model,
+            "choices": [
+                {
+                    "index": 0,
+                    "message": {"role": "assistant", "content": response_text},
+                    "finish_reason": "stop",
+                }
+            ],
+            "usage": {
+                "prompt_tokens": len(user_message.split()),
+                "completion_tokens": len(response_text.split()),
+                "total_tokens": len(user_message.split()) + len(response_text.split()),
+            }
+        }
+        return response
+    except Exception as e:
+        logging.error(f"Error generating response: {e}")
+        raise HTTPException(status_code=500, detail="Internal server error")

requirements.txt ADDED Viewed

	@@ -0,0 +1,9 @@

+fastapi
+uvicorn
+openai
+langchain
+requests
+langchain_community
+python-dotenv
+cloudflared
+llama-cpp-python