Spaces:
Sleeping
Sleeping
fix runtime error
Browse files- Dockerfile +3 -2
- app.py +19 -3
Dockerfile
CHANGED
@@ -14,11 +14,12 @@ RUN pip cache purge && pip install --no-cache-dir -r requirements.txt
|
|
14 |
# Copy source code
|
15 |
COPY app.py .
|
16 |
|
17 |
-
# Create cache
|
18 |
-
RUN mkdir -p /app/cache && chmod -R 777 /app/cache
|
19 |
|
20 |
# Set env vars
|
21 |
ENV HF_HOME=/app/cache \
|
|
|
22 |
PYTHONUNBUFFERED=1 \
|
23 |
PYTHONWARNINGS=ignore::FutureWarning,ignore::UserWarning:torch._utils
|
24 |
|
|
|
14 |
# Copy source code
|
15 |
COPY app.py .
|
16 |
|
17 |
+
# Create cache directories and set permissions
|
18 |
+
RUN mkdir -p /app/cache /app/cache/matplotlib && chmod -R 777 /app/cache
|
19 |
|
20 |
# Set env vars
|
21 |
ENV HF_HOME=/app/cache \
|
22 |
+
MPLCONFIGDIR=/app/cache/matplotlib \
|
23 |
PYTHONUNBUFFERED=1 \
|
24 |
PYTHONWARNINGS=ignore::FutureWarning,ignore::UserWarning:torch._utils
|
25 |
|
app.py
CHANGED
@@ -15,6 +15,11 @@ except Exception as e:
|
|
15 |
print(f"Error loading model: {e}")
|
16 |
raise e
|
17 |
|
|
|
|
|
|
|
|
|
|
|
18 |
# Set device
|
19 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
20 |
model.to(device)
|
@@ -26,13 +31,24 @@ print(f"Memory allocated: {torch.cuda.memory_allocated(device)/1e9:.2f} GB" if t
|
|
26 |
|
27 |
def generate_text(prompt, max_length=100, temperature=1.0):
|
28 |
try:
|
29 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
30 |
outputs = model.generate(
|
31 |
-
inputs,
|
|
|
32 |
max_length=max_length,
|
33 |
temperature=temperature,
|
34 |
do_sample=True,
|
35 |
-
num_beams=1
|
|
|
36 |
)
|
37 |
return tokenizer.decode(outputs[0], skip_special_tokens=True)
|
38 |
except Exception as e:
|
|
|
15 |
print(f"Error loading model: {e}")
|
16 |
raise e
|
17 |
|
18 |
+
# Set pad_token_id to eos_token_id if not set
|
19 |
+
if tokenizer.pad_token_id is None:
|
20 |
+
tokenizer.pad_token_id = tokenizer.eos_token_id
|
21 |
+
model.config.pad_token_id = tokenizer.eos_token_id
|
22 |
+
|
23 |
# Set device
|
24 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
25 |
model.to(device)
|
|
|
31 |
|
32 |
def generate_text(prompt, max_length=100, temperature=1.0):
|
33 |
try:
|
34 |
+
# Encode input with attention mask
|
35 |
+
inputs = tokenizer(
|
36 |
+
prompt,
|
37 |
+
return_tensors="pt",
|
38 |
+
padding=True,
|
39 |
+
truncation=True,
|
40 |
+
max_length=max_length
|
41 |
+
).to(device)
|
42 |
+
|
43 |
+
# Generate text
|
44 |
outputs = model.generate(
|
45 |
+
input_ids=inputs["input_ids"],
|
46 |
+
attention_mask=inputs["attention_mask"],
|
47 |
max_length=max_length,
|
48 |
temperature=temperature,
|
49 |
do_sample=True,
|
50 |
+
num_beams=1,
|
51 |
+
pad_token_id=tokenizer.pad_token_id
|
52 |
)
|
53 |
return tokenizer.decode(outputs[0], skip_special_tokens=True)
|
54 |
except Exception as e:
|