Spaces:

abven
/

Customer-Support-Chatbot

Runtime error

App Files Files Community

VenkateshRoshan commited on Nov 10, 2024

Commit

b1d9c58

1 Parent(s): f0c8e4c

deployment file update

Browse files

Files changed (8) hide show

.github/workflows/deploy.yml +44 -19
app.py +61 -39
dockerfile +4 -4
local_app.py +150 -0
requirements.txt +0 -1
src/deploy_sagemaker.py +41 -76
src/local_deploy_sagemaker.py +89 -0
src/push_to_s3.py +52 -0

.github/workflows/deploy.yml CHANGED Viewed

@@ -1,4 +1,4 @@
-name: Deploy to Sagemaker
 on:
   push:
@@ -6,21 +6,46 @@ on:
       - main
 jobs:
-    deploy:
-        runs-on: ubuntu-latest
-        steps:
-            - name: Checkout code
-              uses: actions/checkout@v2
-            - name: Setup Python
-              uses: actions/setup-python@v3
-              with:
-                python-version: '3.10'
-            - name: Login to AWS
-              uses: aws-actions/configure-aws-credentials@v2
-              with:
-                aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
-                aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
-                aws-region: ${{ secrets.AWS_REGION }}

+name: Deploy to SageMaker
 on:
   push:
       - main
 jobs:
+  deploy:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v2
+      - name: Setup Python
+        uses: actions/setup-python@v3
+        with:
+          python-version: '3.10'
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install --no-cache-dir --upgrade pip
+          pip install --no-cache-dir torch==2.1.0 torchvision==0.16.0 torchaudio==2.1.0 --index-url https://download.pytorch.org/whl/cu121
+          pip install --no-cache-dir -r requirements.txt
+      - name: Login to AWS
+        uses: aws-actions/configure-aws-credentials@v2
+        with:
+          aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
+          aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
+          aws-region: ${{ secrets.AWS_REGION }}
+      - name: Login to Amazon ECR
+        id: login-ecr
+        uses: aws-actions/amazon-ecr-login@v1
+      - name: Build and push Docker image
+        run: |
+          docker build -t ${{ secrets.ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_REGION }}.amazonaws.com/customer_support_bot:latest .
+          docker push ${{ secrets.ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_REGION }}.amazonaws.com/customer_support_bot:latest
+      - name: Deploy model to SageMaker
+        run: |
+          python deploy_sagemaker.py \
+            --account_id ${{ secrets.ACCOUNT_ID }} \
+            --region ${{ secrets.AWS_REGION }} \
+            --role_arn ${{ secrets.SAGEMAKER_ROLE_ARN }} \
+            --ecr_repo_name "customer_support_bot" \
+            --endpoint_name "customer-support-chatbot"

app.py CHANGED Viewed

@@ -1,54 +1,75 @@
-import json  # Add this import
 import psutil
 import torch
-import boto3
-from transformers import AutoTokenizer
 import gradio as gr
 import os
 from typing import List, Tuple
 class CustomerSupportBot:
-    def __init__(self, endpoint_name="customer-support-gpt-2024-11-10-00-30-03-555"):
         self.process = psutil.Process(os.getpid())
-        model_name = "EleutherAI/gpt-neo-125M"
-        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
-        # self.tokenizer = AutoTokenizer.from_pretrained("gpt2")  # Use the tokenizer appropriate to your model
-        self.endpoint_name = endpoint_name
-        self.sagemaker_runtime = boto3.client('runtime.sagemaker')
-    def generate_response(self, message: str) -> str:
-        try:
-            input_text = f"Instruction: {message}\nResponse:"
-            # Prepare payload for SageMaker endpoint
-            payload = {
-                # "inputs": inputs['input_ids'].tolist()[0],
-                'inputs': input_text,
-                # You can include other parameters if needed (e.g., attention_mask)
-            }
-            print(f'Payload: {payload}')
-            # Convert the payload to a JSON string before sending
-            json_payload = json.dumps(payload)  # Use json.dumps() to serialize the payload
-            print(f'JSON Payload: {json_payload}')
-            # Call the SageMaker endpoint for inference
-            response = self.sagemaker_runtime.invoke_endpoint(
-                EndpointName=self.endpoint_name,
-                ContentType='application/json',
-                Body=json_payload  # Send the JSON string here
-            )
-            print(f'Response: {response}')
-            # Process the response
-            result = response['Body'].read().decode('utf-8')
-            parsed_result = json.loads(result)
-            # Extract the generated text from the first element in the list
-            generated_text = parsed_result[0]['generated_text']
-            # Split the string to get the response part after 'Response:'
-            response = generated_text.split('Response:')[1].strip()
-            # return the extracted response
             return response
         except Exception as e:
             return f"An error occurred: {str(e)}"
@@ -60,8 +81,9 @@ class CustomerSupportBot:
         }
         return usage
 def create_chat_interface():
-    bot = CustomerSupportBot()
     def predict(message: str, history: List[Tuple[str, str]]) -> Tuple[str, List[Tuple[str, str]]]:
         if not message:

+import json
 import psutil
 import torch
+from transformers import AutoTokenizer, AutoModelForCausalLM
 import gradio as gr
 import os
+import tarfile
 from typing import List, Tuple
 class CustomerSupportBot:
+    def __init__(self, model_path="models/customer_support_gpt"):
+        """
+        Initialize the customer support bot with the fine-tuned model.
+        Args:
+            model_path (str): Path to the saved model and tokenizer
+        """
         self.process = psutil.Process(os.getpid())
+        self.model_path = model_path
+        self.model_file_path = os.path.join(self.model_path, "model.tar.gz")
+        # Download and load the model
+        self.download_and_load_model()
+    def download_and_load_model(self):
+        # Check if the model directory exists
+        if not os.path.exists(self.model_path):
+            os.makedirs(self.model_path)
+        # Download model.tar.gz from S3 if not already downloaded
+        if not os.path.exists(self.model_file_path):
+            print("Downloading model from S3...")
+            self.s3.download_file(self.bucket_name, self.model_key, self.model_file_path)
+            print("Download complete. Extracting model files...")
+            # Extract the model files
+            with tarfile.open(self.model_file_path, "r:gz") as tar:
+                tar.extractall(self.model_path)
+        # Load the model and tokenizer from extracted files
+        self.tokenizer = AutoTokenizer.from_pretrained(self.model_path)
+        self.model = AutoModelForCausalLM.from_pretrained(self.model_path)
+        print("Model and tokenizer loaded successfully.")
+        # Move model to GPU if available
+        self.device = "cuda" if torch.cuda.is_available() else "cpu"
+        self.model = self.model.to(self.device)
+    def generate_response(self, message: str, max_length=100, temperature=0.7) -> str:
+        try:
+            input_text = f"Instruction: {message}\nResponse:"
+            # Tokenize input text
+            inputs = self.tokenizer(input_text, return_tensors="pt").to(self.device)
+            # Generate response using the model
+            with torch.no_grad():
+                outputs = self.model.generate(
+                    **inputs,
+                    max_length=max_length,
+                    temperature=temperature,
+                    num_return_sequences=1,
+                    pad_token_id=self.tokenizer.pad_token_id,
+                    eos_token_id=self.tokenizer.eos_token_id,
+                    do_sample=True,
+                    top_p=0.95,
+                    top_k=50
+                )
+            # Decode and format the response
+            response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
+            response = response.split("Response:")[-1].strip()
             return response
         except Exception as e:
             return f"An error occurred: {str(e)}"
         }
         return usage
 def create_chat_interface():
+    bot = CustomerSupportBot(model_path="/app/models")
     def predict(message: str, history: List[Tuple[str, str]]) -> Tuple[str, List[Tuple[str, str]]]:
         if not message:

dockerfile CHANGED Viewed

@@ -17,11 +17,11 @@ RUN pip install --no-cache-dir --upgrade pip
 RUN pip install --no-cache-dir torch==2.1.0 torchvision==0.16.0 torchaudio==2.1.0 --index-url https://download.pytorch.org/whl/cu121
 RUN pip install --no-cache-dir -r requirements.txt
-# Copy .env file to the working directory
-COPY .env /app/.env
-# Set environment variables from .env file
-ENV $(cat /app/.env | xargs)
 # Expose port 7860
 EXPOSE 7860

 RUN pip install --no-cache-dir torch==2.1.0 torchvision==0.16.0 torchaudio==2.1.0 --index-url https://download.pytorch.org/whl/cu121
 RUN pip install --no-cache-dir -r requirements.txt
+# # Copy .env file to the working directory
+# COPY .env /app/.env
+# # Set environment variables from .env file
+# ENV $(cat /app/.env | xargs)
 # Expose port 7860
 EXPOSE 7860

local_app.py ADDED Viewed

	@@ -0,0 +1,150 @@

+import json  # Add this import
+import psutil
+import torch
+import boto3
+from transformers import AutoTokenizer
+import gradio as gr
+import os
+from typing import List, Tuple
+class CustomerSupportBot:
+    def __init__(self, endpoint_name="customer-support-gpt-2024-11-10-00-30-03-555"):
+        self.process = psutil.Process(os.getpid())
+        model_name = "EleutherAI/gpt-neo-125M"
+        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
+        # self.tokenizer = AutoTokenizer.from_pretrained("gpt2")  # Use the tokenizer appropriate to your model
+        self.endpoint_name = endpoint_name
+        self.sagemaker_runtime = boto3.client('runtime.sagemaker')
+    def generate_response(self, message: str) -> str:
+        try:
+            input_text = f"Instruction: {message}\nResponse:"
+            # Prepare payload for SageMaker endpoint
+            payload = {
+                # "inputs": inputs['input_ids'].tolist()[0],
+                'inputs': input_text,
+                # You can include other parameters if needed (e.g., attention_mask)
+            }
+            print(f'Payload: {payload}')
+            # Convert the payload to a JSON string before sending
+            json_payload = json.dumps(payload)  # Use json.dumps() to serialize the payload
+            print(f'JSON Payload: {json_payload}')
+            # Call the SageMaker endpoint for inference
+            response = self.sagemaker_runtime.invoke_endpoint(
+                EndpointName=self.endpoint_name,
+                ContentType='application/json',
+                Body=json_payload  # Send the JSON string here
+            )
+            print(f'Response: {response}')
+            # Process the response
+            result = response['Body'].read().decode('utf-8')
+            parsed_result = json.loads(result)
+            # Extract the generated text from the first element in the list
+            generated_text = parsed_result[0]['generated_text']
+            # Split the string to get the response part after 'Response:'
+            response = generated_text.split('Response:')[1].strip()
+            # return the extracted response
+            return response
+        except Exception as e:
+            return f"An error occurred: {str(e)}"
+    def monitor_resources(self) -> dict:
+        usage = {
+            "CPU (%)": self.process.cpu_percent(interval=1),
+            "RAM (GB)": self.process.memory_info().rss / (1024 ** 3)
+        }
+        return usage
+def create_chat_interface():
+    bot = CustomerSupportBot()
+    def predict(message: str, history: List[Tuple[str, str]]) -> Tuple[str, List[Tuple[str, str]]]:
+        if not message:
+            return "", history
+        bot_response = bot.generate_response(message)
+        # Log resource usage
+        usage = bot.monitor_resources()
+        print("Resource Usage:", usage)
+        history.append((message, bot_response))
+        return "", history
+    # Create the Gradio interface with custom CSS
+    with gr.Blocks(css="""
+        .message-box {
+            margin-bottom: 10px;
+        }
+        .button-row {
+            display: flex;
+            gap: 10px;
+            margin-top: 10px;
+        }
+    """) as interface:
+        gr.Markdown("# Customer Support Chatbot")
+        gr.Markdown("Welcome! How can I assist you today?")
+        chatbot = gr.Chatbot(
+            label="Chat History",
+            height=500,
+            elem_classes="message-box"
+        )
+        with gr.Row():
+            msg = gr.Textbox(
+                label="Your Message",
+                placeholder="Type your message here...",
+                lines=2,
+                elem_classes="message-box"
+            )
+        with gr.Row(elem_classes="button-row"):
+            submit = gr.Button("Send Message", variant="primary")
+            clear = gr.ClearButton([msg, chatbot], value="Clear Chat")
+        # Add example queries in a separate row
+        with gr.Row():
+            gr.Examples(
+                examples=[
+                    "How do I reset my password?",
+                    "What are your shipping policies?",
+                    "I want to return a product.",
+                    "How can I track my order?",
+                    "What payment methods do you accept?"
+                ],
+                inputs=msg,
+                label="Example Questions"
+            )
+        # Set up event handlers
+        submit_click = submit.click(
+            predict,
+            inputs=[msg, chatbot],
+            outputs=[msg, chatbot]
+        )
+        msg.submit(
+            predict,
+            inputs=[msg, chatbot],
+            outputs=[msg, chatbot]
+        )
+        # Add keyboard shortcut for submit
+        msg.change(lambda x: gr.update(interactive=bool(x.strip())), inputs=[msg], outputs=[submit])
+    return interface
+if __name__ == "__main__":
+    demo = create_chat_interface()
+    demo.launch(
+        share=True,
+        server_name="0.0.0.0",  # Makes the server accessible from other machines
+        server_port=7860,  # Specify the port
+        debug=True
+    )

requirements.txt CHANGED Viewed

@@ -1,5 +1,4 @@
 transformers==4.37
-torch
 mlflow
 boto3
 pytest

 transformers==4.37
 mlflow
 boto3
 pytest

src/deploy_sagemaker.py CHANGED Viewed

@@ -1,91 +1,56 @@
 import boto3
-from pathlib import Path
-import sagemaker
-from sagemaker.huggingface import HuggingFaceModel
-import transformers
-import torch
 import logging
-import tarfile
 import os
 # Set up logging
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
-def create_model_tar():
-    model_path = Path("models/customer_support_gpt")
-    tar_path = "model.tar.gz"
-    with tarfile.open(tar_path, "w:gz") as tar:
-        for file_path in model_path.glob("*"):
-            if file_path.is_file():
-                logger.info(f"Adding {file_path} to tar archive")
-                tar.add(file_path, arcname=file_path.name)
-    return tar_path
-try:
-    # Initialize s3 client
-    s3 = boto3.client("s3")
-    bucket_name = 'customer-support-gpt'
-    # Create and upload tar.gz
-    tar_path = create_model_tar()
-    s3_key = "models/model.tar.gz"  # Changed path
-    logger.info(f"Uploading model.tar.gz to s3://{bucket_name}/{s3_key}")
-    s3.upload_file(tar_path, bucket_name, s3_key)
     # Initialize SageMaker session
     sagemaker_session = sagemaker.Session()
-    role = 'arn:aws:iam::841162707028:role/service-role/AmazonSageMaker-ExecutionRole-20241109T160615'
-    # Verify IAM role
-    iam = boto3.client('iam')
-    try:
-        iam.get_role(RoleName=role.split('/')[-1])
-        logger.info(f"Successfully verified IAM role: {role}")
-    except iam.exceptions.NoSuchEntityException:
-        logger.error(f"IAM role not found: {role}")
-        raise
-    # Point to the tar.gz file
-    model_artifacts = f's3://{bucket_name}/{s3_key}'
-    print(f'Model artifacts: {model_artifacts}')
-    env = {
-        "model_path": "/opt/ml/model",
-        "max_length": "256",
-        "generation_config": '{"max_length":100,"temperature":0.7,"top_p":0.95,"top_k":50,"do_sample":true}'
-    }
-    try:
-        huggingface_model = HuggingFaceModel(
-            model_data=model_artifacts,
-            role=role,
-            transformers_version="4.37.0",  # Explicit version
-            pytorch_version="2.1.0",        # Matching your version
-            py_version="py310",             # Keep py310
-            env=env,
-            name="customer-support-gpt"
-        )
-        logger.info("Starting model deployment...")
-        predictor = huggingface_model.deploy(
-            initial_instance_count=1,
-            instance_type="ml.m5.xlarge",
-            wait=True
-        )
-        logger.info("Model deployed successfully!")
-    except Exception as e:
-        logger.error(f"Error during model deployment: {str(e)}")
-        raise
-except Exception as e:
-    logger.error(f"Deployment failed: {str(e)}")
-    raise
-finally:
-    # Clean up the local tar file
-    if os.path.exists(tar_path):
-        os.remove(tar_path)

 import boto3
 import logging
+import sagemaker
+from sagemaker.model import Model
+import argparse
 import os
 # Set up logging
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
+def deploy_app(acc_id, region_name, role_arn, ecr_repo_name, endpoint_name="customer-support-chatbot"):
+    """
+    Deploys a Gradio app as a SageMaker endpoint using an ECR image.
+    Args:
+        acc_id (str): AWS account ID
+        region_name (str): AWS region name
+        role_arn (str): IAM role ARN for SageMaker
+        ecr_repo_name (str): ECR repository name
+        endpoint_name (str): SageMaker endpoint name (default: "customer-support-chatbot")
+    """
     # Initialize SageMaker session
     sagemaker_session = sagemaker.Session()
+    # Define the image URI in ECR
+    ecr_image = f"{acc_id}.dkr.ecr.{region_name}.amazonaws.com/{ecr_repo_name}:latest"
+    # Define model
+    model = Model(
+        image_uri=ecr_image,
+        role=role_arn,
+        sagemaker_session=sagemaker_session
+    )
+    # Deploy model as a SageMaker endpoint
+    logger.info(f"Starting deployment of Gradio app to SageMaker endpoint {endpoint_name}...")
+    predictor = model.deploy(
+        initial_instance_count=1,
+        instance_type="ml.m5.xlarge",
+        endpoint_name=endpoint_name
+    )
+    logger.info(f"Gradio app deployed successfully to endpoint: {endpoint_name}")
+if __name__ == "__main__":
+    # Parse arguments from CLI
+    parser = argparse.ArgumentParser(description="Deploy Gradio app to SageMaker")
+    parser.add_argument("--account_id", type=str, required=True, help="AWS Account ID")
+    parser.add_argument("--region", type=str, required=True, help="AWS Region")
+    parser.add_argument("--role_arn", type=str, required=True, help="IAM Role ARN for SageMaker")
+    parser.add_argument("--ecr_repo_name", type=str, required=True, help="ECR Repository name")
+    parser.add_argument("--endpoint_name", type=str, default="customer-support-chatbot", help="SageMaker Endpoint Name")
+    args = parser.parse_args()
+    # Deploy the Gradio app to SageMaker
+    deploy_app(args.account_id, args.region, args.role_arn, args.ecr_repo_name, args.endpoint_name)

src/local_deploy_sagemaker.py ADDED Viewed

	@@ -0,0 +1,89 @@

+import boto3
+from pathlib import Path
+import sagemaker
+from sagemaker.huggingface import HuggingFaceModel
+import logging
+import tarfile
+import os
+# Set up logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+def create_model_tar():
+    model_path = Path("models/customer_support_gpt")
+    tar_path = "model.tar.gz"
+    with tarfile.open(tar_path, "w:gz") as tar:
+        for file_path in model_path.glob("*"):
+            if file_path.is_file():
+                logger.info(f"Adding {file_path} to tar archive")
+                tar.add(file_path, arcname=file_path.name)
+    return tar_path
+try:
+    # Initialize s3 client
+    s3 = boto3.client("s3")
+    bucket_name = 'customer-support-gpt'
+    # Create and upload tar.gz
+    tar_path = create_model_tar()
+    s3_key = "models/model.tar.gz"  # Changed path
+    logger.info(f"Uploading model.tar.gz to s3://{bucket_name}/{s3_key}")
+    s3.upload_file(tar_path, bucket_name, s3_key)
+    # Initialize SageMaker session
+    sagemaker_session = sagemaker.Session()
+    role = 'arn:aws:iam::841162707028:role/service-role/AmazonSageMaker-ExecutionRole-20241109T160615'
+    # Verify IAM role
+    iam = boto3.client('iam')
+    try:
+        iam.get_role(RoleName=role.split('/')[-1])
+        logger.info(f"Successfully verified IAM role: {role}")
+    except iam.exceptions.NoSuchEntityException:
+        logger.error(f"IAM role not found: {role}")
+        raise
+    # Point to the tar.gz file
+    model_artifacts = f's3://{bucket_name}/{s3_key}'
+    print(f'Model artifacts: {model_artifacts}')
+    env = {
+        "model_path": "/opt/ml/model",
+        "max_length": "256",
+        "generation_config": '{"max_length":100,"temperature":0.7,"top_p":0.95,"top_k":50,"do_sample":true}'
+    }
+    try:
+        huggingface_model = HuggingFaceModel(
+            model_data=model_artifacts,
+            role=role,
+            transformers_version="4.37.0",  # Explicit version
+            pytorch_version="2.1.0",        # Matching your version
+            py_version="py310",             # Keep py310
+            env=env,
+            name="customer-support-gpt"
+        )
+        logger.info("Starting model deployment...")
+        predictor = huggingface_model.deploy(
+            initial_instance_count=1,
+            instance_type="ml.m5.xlarge",
+            wait=True
+        )
+        logger.info("Model deployed successfully!")
+    except Exception as e:
+        logger.error(f"Error during model deployment: {str(e)}")
+        raise
+except Exception as e:
+    logger.error(f"Deployment failed: {str(e)}")
+    raise
+finally:
+    # Clean up the local tar file
+    if os.path.exists(tar_path):
+        os.remove(tar_path)

src/push_to_s3.py ADDED Viewed

	@@ -0,0 +1,52 @@

+import boto3
+from pathlib import Path
+import tarfile
+import logging
+import os
+# Set up logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+def create_model_tar():
+    model_path = Path("models/customer_support_gpt")  # Path to your model folder
+    tar_path = "model.tar.gz"  # Path for the output tar.gz file
+    # Create a tar.gz file containing all files in the model folder
+    with tarfile.open(tar_path, "w:gz") as tar:
+        for file_path in model_path.glob("*"):
+            if file_path.is_file():
+                logger.info(f"Adding {file_path} to tar archive")
+                tar.add(file_path, arcname=file_path.name)
+    return tar_path
+def upload_to_s3(tar_path, bucket_name, s3_key):
+    # Initialize S3 client
+    s3 = boto3.client("s3")
+    # Upload tar.gz file to S3
+    logger.info(f"Uploading {tar_path} to s3://{bucket_name}/{s3_key}")
+    s3.upload_file(tar_path, bucket_name, s3_key)
+    logger.info("Upload complete!")
+# Main code
+try:
+    bucket_name = 'customer-support-gpt'  # Your S3 bucket name
+    s3_key = "models/model.tar.gz"  # S3 key (path in bucket)
+    # Create the tar.gz archive
+    tar_path = create_model_tar()
+    # Upload the tar.gz to S3
+    upload_to_s3(tar_path, bucket_name, s3_key)
+except Exception as e:
+    logger.error(f"An error occurred: {str(e)}")
+    raise
+finally:
+    # Clean up the local tar file
+    if os.path.exists(tar_path):
+        os.remove(tar_path)
+        logger.info(f"Deleted local file: {tar_path}")