Mir-2002
/

codet5p-google-style-docstrings

Model card Files Files and versions

Mir-2002 commited on 21 days ago

Commit

1efe044

·

verified ·

1 Parent(s): 79f0ea0

Delete handler.py

Files changed (1) hide show

handler.py +0 -59

handler.py DELETED Viewed

@@ -1,59 +0,0 @@
-from typing import Any, Dict, List
-from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
-import torch
-MAX_INPUT_LENGTH = 256
-MAX_OUTPUT_LENGTH = 128
-class EndpointHandler:
-    def __init__(self, model_dir: str = "", **kwargs: Any) -> None:
-        """
-        Initializes the model and tokenizer when the endpoint starts.
-        """
-        self.tokenizer = AutoTokenizer.from_pretrained(model_dir)
-        # Assuming you fine-tuned CodeT5+ for a sequence-to-sequence task
-        self.model = AutoModelForSeq2SeqLM.from_pretrained(model_dir)
-        self.model.eval() # Set model to evaluation mode
-        # You might want to move the model to GPU if available
-        self.device = "cuda" if torch.cuda.is_available() else "cpu"
-        self.model.to(self.device)
-    def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
-        """
-        Handles incoming inference requests.
-        """
-        inputs = data.get("inputs")
-        if not inputs:
-            raise ValueError("No 'inputs' found in the request data.")
-        # Ensure inputs are in a list for batch processing, even if single input
-        if isinstance(inputs, str):
-            inputs = [inputs]
-        # Pre-processing
-        # Adjust max_length and padding based on your model's training and task
-        tokenized_inputs = self.tokenizer(
-            inputs,
-            max_length=MAX_INPUT_LENGTH,
-            padding=True,
-            truncation=True,
-            return_tensors="pt"
-        ).to(self.device)
-        # Inference
-        with torch.no_grad():
-            outputs = self.model.generate(
-                tokenized_inputs["input_ids"],
-                attention_mask=tokenized_inputs["attention_mask"],
-                # Add generation arguments relevant to your task (e.g., max_length, num_beams)
-                max_length=MAX_OUTPUT_LENGTH, # Example, adjust as needed
-                num_beams=8,    # Example, adjust as needed
-                no_repeat_ngram_size=3,
-                pad_token_id=self.tokenizer.pad_token_id) # Fixed: Added self. before tokenizer
-        # Post-processing
-        decoded_outputs = self.tokenizer.batch_decode(outputs, skip_special_tokens=True)
-        # Format the output as a list of dictionaries
-        results = [{"generated_text": text} for text in decoded_outputs]
-        return results