Spaces:

VietCat
/

gpt2-vietnamese-api

Sleeping

VietCat commited on May 7

Commit

6c4916c

1 Parent(s): 0673a12

fix runtime error

Files changed (2) hide show

Dockerfile CHANGED Viewed

@@ -14,11 +14,12 @@ RUN pip cache purge && pip install --no-cache-dir -r requirements.txt
 # Copy source code
 COPY app.py .
-# Create cache directory and set permissions
-RUN mkdir -p /app/cache && chmod -R 777 /app/cache
 # Set env vars
 ENV HF_HOME=/app/cache \
     PYTHONUNBUFFERED=1 \
     PYTHONWARNINGS=ignore::FutureWarning,ignore::UserWarning:torch._utils

 # Copy source code
 COPY app.py .
+# Create cache directories and set permissions
+RUN mkdir -p /app/cache /app/cache/matplotlib && chmod -R 777 /app/cache
 # Set env vars
 ENV HF_HOME=/app/cache \
+    MPLCONFIGDIR=/app/cache/matplotlib \
     PYTHONUNBUFFERED=1 \
     PYTHONWARNINGS=ignore::FutureWarning,ignore::UserWarning:torch._utils

app.py CHANGED Viewed

@@ -15,6 +15,11 @@ except Exception as e:
     print(f"Error loading model: {e}")
     raise e
 # Set device
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 model.to(device)
@@ -26,13 +31,24 @@ print(f"Memory allocated: {torch.cuda.memory_allocated(device)/1e9:.2f} GB" if t
 def generate_text(prompt, max_length=100, temperature=1.0):
     try:
-        inputs = tokenizer.encode(prompt, return_tensors="pt").to(device)
         outputs = model.generate(
-            inputs,
             max_length=max_length,
             temperature=temperature,
             do_sample=True,
-            num_beams=1
         )
         return tokenizer.decode(outputs[0], skip_special_tokens=True)
     except Exception as e:

     print(f"Error loading model: {e}")
     raise e
+# Set pad_token_id to eos_token_id if not set
+if tokenizer.pad_token_id is None:
+    tokenizer.pad_token_id = tokenizer.eos_token_id
+    model.config.pad_token_id = tokenizer.eos_token_id
 # Set device
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 model.to(device)
 def generate_text(prompt, max_length=100, temperature=1.0):
     try:
+        # Encode input with attention mask
+        inputs = tokenizer(
+            prompt,
+            return_tensors="pt",
+            padding=True,
+            truncation=True,
+            max_length=max_length
+        ).to(device)
+        # Generate text
         outputs = model.generate(
+            input_ids=inputs["input_ids"],
+            attention_mask=inputs["attention_mask"],
             max_length=max_length,
             temperature=temperature,
             do_sample=True,
+            num_beams=1,
+            pad_token_id=tokenizer.pad_token_id
         )
         return tokenizer.decode(outputs[0], skip_special_tokens=True)
     except Exception as e: