markury commited on
Commit
8012085
·
verified ·
1 Parent(s): 0809c39

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -16
app.py CHANGED
@@ -152,22 +152,17 @@ image_adapter.eval()
152
  image_adapter.to("cuda")
153
 
154
 
155
- def filter_caption_start(caption, unwanted_words):
156
- # Remove any leading newlines and whitespace
157
- caption = caption.lstrip()
158
- # Remove any leading lines that are empty or start with unwanted words
159
  lines = caption.splitlines()
160
- while lines:
161
- line = lines[0].strip()
162
- if not line:
163
- lines.pop(0)
164
- elif any(line.lower().startswith(unwanted_word.lower()) for unwanted_word in unwanted_words):
165
- lines.pop(0)
166
- else:
167
- break
168
- # Reconstruct caption
169
- caption = '\n'.join(lines).strip()
170
- return caption
171
 
172
 
173
  @torch.no_grad()
@@ -268,7 +263,7 @@ def stream_chat(folder_path: str, caption_type: str, caption_length: str | int,
268
 
269
  caption = tokenizer.batch_decode(generate_ids, skip_special_tokens=True)[0]
270
 
271
- caption = filter_caption_start(caption, unwanted_words)
272
 
273
  base_name = image_file.stem
274
  text_file_path = folder_path / f"{base_name}.txt"
 
152
  image_adapter.to("cuda")
153
 
154
 
155
+ def filter_caption_start(caption):
156
+ # Remove any leading and trailing whitespace
157
+ caption = caption.strip()
158
+ # Split caption into lines
159
  lines = caption.splitlines()
160
+ # Find the longest line
161
+ if not lines:
162
+ return caption
163
+ longest_line = max(lines, key=lambda line: len(line.strip()))
164
+ # Return the longest line
165
+ return longest_line.strip()
 
 
 
 
 
166
 
167
 
168
  @torch.no_grad()
 
263
 
264
  caption = tokenizer.batch_decode(generate_ids, skip_special_tokens=True)[0]
265
 
266
+ caption = filter_caption_start(caption)
267
 
268
  base_name = image_file.stem
269
  text_file_path = folder_path / f"{base_name}.txt"