Spaces:

AthuKawaleLogituit
/

mistral_7B

Running on Zero

App Files Files Community

Man-isH-07 commited on 4 days ago

Commit

8c22e42

1 Parent(s): 83557e0

Again At Normal

Browse files

Files changed (2) hide show

app.py +54 -142
style.css +0 -19

app.py CHANGED Viewed

@@ -24,10 +24,7 @@ if torch.cuda.is_available():
     model_id = "mistralai/Mistral-7B-Instruct-v0.3"
     model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float16, device_map="auto")
     tokenizer = AutoTokenizer.from_pretrained(model_id)
-    # Set the pad token to avoid warnings
-    if tokenizer.pad_token is None:
-        tokenizer.pad_token = tokenizer.eos_token
-    model.config.pad_token_id = tokenizer.pad_token_id
 @spaces.GPU
 def generate(
@@ -41,31 +38,15 @@ def generate(
 ) -> Iterator[str]:
     conversation = [*chat_history, {"role": "user", "content": message}]
-    # Apply chat template
-    inputs = tokenizer.apply_chat_template(conversation, return_tensors="pt", padding=True, return_attention_mask=True)
-    # Check if inputs is a dictionary or a tensor
-    if isinstance(inputs, dict):
-        input_ids = inputs["input_ids"]
-        attention_mask = inputs.get("attention_mask", None)
-    else:
-        input_ids = inputs
-        attention_mask = (input_ids != tokenizer.pad_token_id).long() if tokenizer.pad_token_id is not None else None
     if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
         input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]
-        if attention_mask is not None:
-            attention_mask = attention_mask[:, -MAX_INPUT_TOKEN_LENGTH:]
         gr.Warning(f"Trimmed input from conversation as it was longer than {MAX_INPUT_TOKEN_LENGTH} tokens.")
     input_ids = input_ids.to(model.device)
-    if attention_mask is not None:
-        attention_mask = attention_mask.to(model.device)
     streamer = TextIteratorStreamer(tokenizer, timeout=20.0, skip_prompt=True, skip_special_tokens=True)
     generate_kwargs = dict(
-        input_ids=input_ids,
-        attention_mask=attention_mask,
         streamer=streamer,
         max_new_tokens=max_new_tokens,
         do_sample=True,
@@ -78,132 +59,63 @@ def generate(
     t = Thread(target=model.generate, kwargs=generate_kwargs)
     t.start()
-    # First, yield the user's message (which contains the prompts)
-    yield message
-    # Then, yield the model's response
     outputs = []
     for text in streamer:
         outputs.append(text)
         yield "".join(outputs)
-    # Yield the final model output
-    final_output = "".join(outputs)
-    yield final_output
-# Updated JavaScript with debugging and robustness
-custom_js = """
-function splitPrompts() {
-    console.log("Running splitPrompts function"); // Debug log
-    const messages = document.querySelectorAll('.chatbot-message, .message, [class*="message"]');
-    console.log("Found messages:", messages.length); // Debug log
-    messages.forEach((message, index) => {
-        const text = message.innerHTML;
-        console.log("Message", index, "text:", text); // Debug log
-        if (text.includes('Positive Prompt:') && text.includes('Negative Prompt:')) {
-            console.log("Found Positive and Negative prompts in message", index); // Debug log
-            const positiveMatch = text.match(/Positive Prompt:(.*?)(?=(Negative Prompt:|$))/s);
-            const negativeMatch = text.match(/Negative Prompt:(.*)/s);
-            if (positiveMatch && negativeMatch) {
-                const positivePrompt = positiveMatch[1].trim();
-                const negativePrompt = negativeMatch[1].trim();
-                console.log("Positive Prompt:", positivePrompt); // Debug log
-                console.log("Negative Prompt:", negativePrompt); // Debug log
-                message.innerHTML = `
-                    <div class="positive-prompt"><strong>Positive Prompt:</strong><br>${positivePrompt}</div>
-                    <div class="negative-prompt"><strong>Negative Prompt:</strong><br>${negativePrompt}</div>
-                `;
-            } else {
-                console.log("Failed to match prompts in message", index); // Debug log
-            }
-        }
-    });
-}
-// Run the function when the DOM is fully loaded
-document.addEventListener('DOMContentLoaded', () => {
-    console.log("DOM fully loaded, setting up MutationObserver"); // Debug log
-    const observer = new MutationObserver((mutations) => {
-        console.log("MutationObserver triggered", mutations); // Debug log
-        splitPrompts();
-    });
-    const chatArea = document.querySelector('.gr-chatbot, [class*="chatbot"], [class*="chat"]');
-    console.log("Chat area found:", chatArea); // Debug log
-    if (chatArea) {
-        observer.observe(chatArea, { childList: true, subtree: true });
-    } else {
-        console.log("Chat area not found, retrying in 1 second"); // Debug log
-        setTimeout(() => {
-            const retryChatArea = document.querySelector('.gr-chatbot, [class*="chatbot"], [class*="chat"]');
-            if (retryChatArea) {
-                observer.observe(retryChatArea, { childList: true, subtree: true });
-            } else {
-                console.log("Chat area still not found after retry"); // Debug log
-            }
-        }, 1000);
-    }
-    // Run initially
-    splitPrompts();
-});
-"""
-# Use gr.Blocks to allow custom JavaScript injection
-with gr.Blocks(css="style.css", js=custom_js) as demo:
-    gr.Markdown(DESCRIPTION)
-    chat_interface = gr.ChatInterface(
-        fn=generate,
-        additional_inputs=[
-            gr.Slider(
-                label="Max new tokens",
-                minimum=1,
-                maximum=MAX_MAX_NEW_TOKENS,
-                step=1,
-                value=DEFAULT_MAX_NEW_TOKENS,
-            ),
-            gr.Slider(
-                label="Temperature",
-                minimum=0.1,
-                maximum=4.0,
-                step=0.1,
-                value=0.6,
-            ),
-            gr.Slider(
-                label="Top-p (nucleus sampling)",
-                minimum=0.05,
-                maximum=1.0,
-                step=0.05,
-                value=0.9,
-            ),
-            gr.Slider(
-                label="Top-k",
-                minimum=1,
-                maximum=1000,
-                step=1,
-                value=50,
-            ),
-            gr.Slider(
-                label="Repetition penalty",
-                minimum=1.0,
-                maximum=2.0,
-                step=0.05,
-                value=1.2,
-            ),
-        ],
-        stop_btn=None,
-        examples=[
-            ["Hello there! How are you doing?"],
-            ["Can you explain briefly to me what is the Python programming language?"],
-            ["Explain the plot of Cinderella in a sentence."],
-            ["How many hours does it take a man to eat a Helicopter?"],
-            ["Write a 100-word article on 'Benefits of Open-Source in AI research'"],
-        ],
-        type="messages",
-    )
 if __name__ == "__main__":
     demo.queue(max_size=20).launch()

     model_id = "mistralai/Mistral-7B-Instruct-v0.3"
     model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float16, device_map="auto")
     tokenizer = AutoTokenizer.from_pretrained(model_id)
 @spaces.GPU
 def generate(
 ) -> Iterator[str]:
     conversation = [*chat_history, {"role": "user", "content": message}]
+    input_ids = tokenizer.apply_chat_template(conversation, return_tensors="pt")
     if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
         input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]
         gr.Warning(f"Trimmed input from conversation as it was longer than {MAX_INPUT_TOKEN_LENGTH} tokens.")
     input_ids = input_ids.to(model.device)
     streamer = TextIteratorStreamer(tokenizer, timeout=20.0, skip_prompt=True, skip_special_tokens=True)
     generate_kwargs = dict(
+        {"input_ids": input_ids},
         streamer=streamer,
         max_new_tokens=max_new_tokens,
         do_sample=True,
     t = Thread(target=model.generate, kwargs=generate_kwargs)
     t.start()
     outputs = []
     for text in streamer:
         outputs.append(text)
         yield "".join(outputs)
+demo = gr.ChatInterface(
+    fn=generate,
+    additional_inputs=[
+        gr.Slider(
+            label="Max new tokens",
+            minimum=1,
+            maximum=MAX_MAX_NEW_TOKENS,
+            step=1,
+            value=DEFAULT_MAX_NEW_TOKENS,
+        ),
+        gr.Slider(
+            label="Temperature",
+            minimum=0.1,
+            maximum=4.0,
+            step=0.1,
+            value=0.6,
+        ),
+        gr.Slider(
+            label="Top-p (nucleus sampling)",
+            minimum=0.05,
+            maximum=1.0,
+            step=0.05,
+            value=0.9,
+        ),
+        gr.Slider(
+            label="Top-k",
+            minimum=1,
+            maximum=1000,
+            step=1,
+            value=50,
+        ),
+        gr.Slider(
+            label="Repetition penalty",
+            minimum=1.0,
+            maximum=2.0,
+            step=0.05,
+            value=1.2,
+        ),
+    ],
+    stop_btn=None,
+    examples=[
+        ["Hello there! How are you doing?"],
+        ["Can you explain briefly to me what is the Python programming language?"],
+        ["Explain the plot of Cinderella in a sentence."],
+        ["How many hours does it take a man to eat a Helicopter?"],
+        ["Write a 100-word article on 'Benefits of Open-Source in AI research'"],
+    ],
+    type="messages",
+    description=DESCRIPTION,
+    css_paths="style.css",
+)
 if __name__ == "__main__":
     demo.queue(max_size=20).launch()

style.css CHANGED Viewed

@@ -9,22 +9,3 @@ h1 {
   background: #1565c0;
   border-radius: 100vh;
 }
-/* Style for the positive prompt box */
-.positive-prompt {
-  background-color: #2a2a2a; /* Dark background to match the theme */
-  border: 1px solid #444; /* Subtle border */
-  border-radius: 8px;
-  padding: 15px;
-  margin-bottom: 10px; /* Space between the two boxes */
-  color: #ffffff; /* White text for readability */
-}
-/* Style for the negative prompt box */
-.negative-prompt {
-  background-color: #2a2a2a;
-  border: 1px solid #444;
-  border-radius: 8px;
-  padding: 15px;
-  color: #ffffff;
-}

   background: #1565c0;
   border-radius: 100vh;
 }