redfernstech commited on
Commit
17479d2
·
verified ·
1 Parent(s): da0a72c

Upload 4 files

Browse files
Files changed (4) hide show
  1. .env +2 -0
  2. Dockerfile +45 -45
  3. main.py +81 -0
  4. requirements.txt +9 -0
.env ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ API_KEY_USER1=your-secure-key-1
2
+ API_KEY_USER2=your-secure-key-2
Dockerfile CHANGED
@@ -1,45 +1,45 @@
1
- # Use Ubuntu as the base image
2
- FROM ubuntu:22.04
3
-
4
- # Set the working directory in the container
5
- WORKDIR /app
6
-
7
- # Install system dependencies and Python
8
- RUN apt-get update && apt-get install -y \
9
- python3 \
10
- python3-pip \
11
- curl \
12
- git \
13
- build-essential \
14
- cmake \
15
- ninja-build \
16
- wget \
17
- && rm -rf /var/lib/apt/lists/*
18
-
19
- # Set Python3 as the default
20
- RUN ln -s /usr/bin/python3 /usr/bin/python
21
-
22
- # Copy the requirements file and install dependencies
23
- COPY requirements.txt ./
24
- RUN pip install --no-cache-dir -r requirements.txt
25
-
26
- # Install llama.cpp using CMake
27
- RUN git clone https://github.com/ggerganov/llama.cpp.git /app/llama.cpp && \
28
- cd /app/llama.cpp && \
29
- mkdir build && cd build && \
30
- cmake .. -G Ninja && ninja install
31
-
32
- # Ensure llama.cpp binaries are in the system path
33
- ENV PATH="/usr/local/bin:$PATH"
34
-
35
- # Copy the Llama model into the Docker image
36
- COPY Meta-Llama-3-8B-Instruct.Q4_0.gguf /app/
37
-
38
- # Copy the application files
39
- COPY . .
40
-
41
- # Expose the FastAPI default port
42
- EXPOSE 8000
43
-
44
- # Start llama.cpp server, then start FastAPI
45
- CMD ["sh", "-c", "/usr/local/bin/server -m /app/Meta-Llama-3-8B-Instruct.Q4_0.gguf & sleep 5 && uvicorn main:app --host 0.0.0.0 --port 8000"]
 
1
+ # Use Ubuntu as the base image
2
+ FROM ubuntu:22.04
3
+
4
+ # Set the working directory in the container
5
+ WORKDIR /app
6
+
7
+ # Install system dependencies and Python
8
+ RUN apt-get update && apt-get install -y \
9
+ python3 \
10
+ python3-pip \
11
+ curl \
12
+ git \
13
+ build-essential \
14
+ cmake \
15
+ ninja-build \
16
+ wget \
17
+ && rm -rf /var/lib/apt/lists/*
18
+
19
+ # Set Python3 as the default
20
+ RUN ln -s /usr/bin/python3 /usr/bin/python
21
+
22
+ # Copy the requirements file and install dependencies
23
+ COPY requirements.txt ./
24
+ RUN pip install --no-cache-dir -r requirements.txt
25
+
26
+ # Install llama.cpp using CMake
27
+ RUN git clone https://github.com/ggerganov/llama.cpp.git /app/llama.cpp && \
28
+ cd /app/llama.cpp && \
29
+ mkdir build && cd build && \
30
+ cmake .. -G Ninja && ninja install
31
+
32
+ # Ensure llama.cpp binaries are in the system path
33
+ ENV PATH="/usr/local/bin:$PATH"
34
+
35
+ # Copy the Llama model into the Docker image
36
+ COPY Meta-Llama-3-8B-Instruct.Q4_0.gguf /app/
37
+
38
+ # Copy the application files
39
+ COPY . .
40
+
41
+ # Expose the FastAPI default port
42
+ EXPOSE 8000
43
+
44
+ # Start llama.cpp server, then start FastAPI
45
+ CMD ["sh", "-c", "/usr/local/bin/server -m /app/Meta-Llama-3-8B-Instruct.Q4_0.gguf & sleep 5 && uvicorn main:app --host 0.0.0.0 --port 8000"]
main.py ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, HTTPException, Depends, Header, Request
2
+ from pydantic import BaseModel
3
+ import os
4
+ import logging
5
+ import time
6
+ from langchain_community.llms import LlamaCpp
7
+ from dotenv import load_dotenv
8
+
9
+ # Load environment variables
10
+ load_dotenv()
11
+
12
+ # Configure logging
13
+ logging.basicConfig(level=logging.INFO)
14
+
15
+ # API keys from .env
16
+ API_KEYS = {
17
+ "user1": os.getenv("API_KEY_USER1"),
18
+ "user2": os.getenv("API_KEY_USER2"),
19
+ }
20
+
21
+ app = FastAPI()
22
+
23
+ # API Key Authentication
24
+ def verify_api_key(request: Request, api_key: str = Header(None, alias="X-API-Key")):
25
+ logging.info(f"Received Headers: {request.headers}")
26
+ if not api_key:
27
+ raise HTTPException(status_code=401, detail="API key is missing")
28
+
29
+ api_key = api_key.strip()
30
+ if api_key not in API_KEYS.values():
31
+ raise HTTPException(status_code=401, detail="Invalid API key")
32
+
33
+ return api_key
34
+
35
+ # OpenAI-compatible request format
36
+ class OpenAIRequest(BaseModel):
37
+ model: str
38
+ messages: list
39
+ stream: bool = False
40
+
41
+ # Initialize LangChain with Llama.cpp
42
+ def get_llm():
43
+ model_path = "/app/Meta-Llama-3-8B-Instruct.Q4_0.gguf"
44
+ return LlamaCpp(model_path=model_path, n_ctx=2048)
45
+
46
+ @app.post("/v1/chat/completions")
47
+ def generate_text(request: OpenAIRequest, api_key: str = Depends(verify_api_key)):
48
+ try:
49
+ llm = get_llm()
50
+
51
+ # Extract last user message
52
+ user_message = next((msg["content"] for msg in reversed(request.messages) if msg["role"] == "user"), None)
53
+ if not user_message:
54
+ raise HTTPException(status_code=400, detail="User message is required")
55
+
56
+ response_text = llm.invoke(user_message)
57
+
58
+ response = {
59
+ "id": "chatcmpl-123",
60
+ "object": "chat.completion",
61
+ "created": int(time.time()),
62
+ "model": request.model,
63
+ "choices": [
64
+ {
65
+ "index": 0,
66
+ "message": {"role": "assistant", "content": response_text},
67
+ "finish_reason": "stop",
68
+ }
69
+ ],
70
+ "usage": {
71
+ "prompt_tokens": len(user_message.split()),
72
+ "completion_tokens": len(response_text.split()),
73
+ "total_tokens": len(user_message.split()) + len(response_text.split()),
74
+ }
75
+ }
76
+
77
+ return response
78
+
79
+ except Exception as e:
80
+ logging.error(f"Error generating response: {e}")
81
+ raise HTTPException(status_code=500, detail="Internal server error")
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ fastapi
2
+ uvicorn
3
+ openai
4
+ langchain
5
+ requests
6
+ langchain_community
7
+ python-dotenv
8
+ cloudflared
9
+ llama-cpp-python