VietCat commited on
Commit
6c4916c
·
1 Parent(s): 0673a12

fix runtime error

Browse files
Files changed (2) hide show
  1. Dockerfile +3 -2
  2. app.py +19 -3
Dockerfile CHANGED
@@ -14,11 +14,12 @@ RUN pip cache purge && pip install --no-cache-dir -r requirements.txt
14
  # Copy source code
15
  COPY app.py .
16
 
17
- # Create cache directory and set permissions
18
- RUN mkdir -p /app/cache && chmod -R 777 /app/cache
19
 
20
  # Set env vars
21
  ENV HF_HOME=/app/cache \
 
22
  PYTHONUNBUFFERED=1 \
23
  PYTHONWARNINGS=ignore::FutureWarning,ignore::UserWarning:torch._utils
24
 
 
14
  # Copy source code
15
  COPY app.py .
16
 
17
+ # Create cache directories and set permissions
18
+ RUN mkdir -p /app/cache /app/cache/matplotlib && chmod -R 777 /app/cache
19
 
20
  # Set env vars
21
  ENV HF_HOME=/app/cache \
22
+ MPLCONFIGDIR=/app/cache/matplotlib \
23
  PYTHONUNBUFFERED=1 \
24
  PYTHONWARNINGS=ignore::FutureWarning,ignore::UserWarning:torch._utils
25
 
app.py CHANGED
@@ -15,6 +15,11 @@ except Exception as e:
15
  print(f"Error loading model: {e}")
16
  raise e
17
 
 
 
 
 
 
18
  # Set device
19
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
20
  model.to(device)
@@ -26,13 +31,24 @@ print(f"Memory allocated: {torch.cuda.memory_allocated(device)/1e9:.2f} GB" if t
26
 
27
  def generate_text(prompt, max_length=100, temperature=1.0):
28
  try:
29
- inputs = tokenizer.encode(prompt, return_tensors="pt").to(device)
 
 
 
 
 
 
 
 
 
30
  outputs = model.generate(
31
- inputs,
 
32
  max_length=max_length,
33
  temperature=temperature,
34
  do_sample=True,
35
- num_beams=1
 
36
  )
37
  return tokenizer.decode(outputs[0], skip_special_tokens=True)
38
  except Exception as e:
 
15
  print(f"Error loading model: {e}")
16
  raise e
17
 
18
+ # Set pad_token_id to eos_token_id if not set
19
+ if tokenizer.pad_token_id is None:
20
+ tokenizer.pad_token_id = tokenizer.eos_token_id
21
+ model.config.pad_token_id = tokenizer.eos_token_id
22
+
23
  # Set device
24
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
25
  model.to(device)
 
31
 
32
  def generate_text(prompt, max_length=100, temperature=1.0):
33
  try:
34
+ # Encode input with attention mask
35
+ inputs = tokenizer(
36
+ prompt,
37
+ return_tensors="pt",
38
+ padding=True,
39
+ truncation=True,
40
+ max_length=max_length
41
+ ).to(device)
42
+
43
+ # Generate text
44
  outputs = model.generate(
45
+ input_ids=inputs["input_ids"],
46
+ attention_mask=inputs["attention_mask"],
47
  max_length=max_length,
48
  temperature=temperature,
49
  do_sample=True,
50
+ num_beams=1,
51
+ pad_token_id=tokenizer.pad_token_id
52
  )
53
  return tokenizer.decode(outputs[0], skip_special_tokens=True)
54
  except Exception as e: