markury
/

joy-caption-alpha-two

Model card Files Files and versions

markury commited on Oct 2, 2024

Commit

8012085

·

verified ·

1 Parent(s): 0809c39

Update app.py

Files changed (1) hide show

app.py +11 -16

app.py CHANGED Viewed

@@ -152,22 +152,17 @@ image_adapter.eval()
 image_adapter.to("cuda")
-def filter_caption_start(caption, unwanted_words):
-    # Remove any leading newlines and whitespace
-    caption = caption.lstrip()
-    # Remove any leading lines that are empty or start with unwanted words
     lines = caption.splitlines()
-    while lines:
-        line = lines[0].strip()
-        if not line:
-            lines.pop(0)
-        elif any(line.lower().startswith(unwanted_word.lower()) for unwanted_word in unwanted_words):
-            lines.pop(0)
-        else:
-            break
-    # Reconstruct caption
-    caption = '\n'.join(lines).strip()
-    return caption
 @torch.no_grad()
@@ -268,7 +263,7 @@ def stream_chat(folder_path: str, caption_type: str, caption_length: str | int,
             caption = tokenizer.batch_decode(generate_ids, skip_special_tokens=True)[0]
-            caption = filter_caption_start(caption, unwanted_words)
             base_name = image_file.stem
             text_file_path = folder_path / f"{base_name}.txt"

 image_adapter.to("cuda")
+def filter_caption_start(caption):
+    # Remove any leading and trailing whitespace
+    caption = caption.strip()
+    # Split caption into lines
     lines = caption.splitlines()
+    # Find the longest line
+    if not lines:
+        return caption
+    longest_line = max(lines, key=lambda line: len(line.strip()))
+    # Return the longest line
+    return longest_line.strip()
 @torch.no_grad()
             caption = tokenizer.batch_decode(generate_ids, skip_special_tokens=True)[0]
+            caption = filter_caption_start(caption)
             base_name = image_file.stem
             text_file_path = folder_path / f"{base_name}.txt"