Spaces:
Running
on
Zero
Running
on
Zero
| import spaces | |
| from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, AutoModelForCausalLM | |
| import torch | |
| from transformers import pipeline | |
| import pandas as pd | |
| import gradio as gr | |
| #Llama 3.2 1b setup | |
| # quantization_config = BitsAndBytesConfig(load_in_4bit=True) | |
| torch_device = "cuda" if torch.cuda.is_available() else ("mps" if torch.mps.is_available() else "cpu") | |
| torch_dtype = torch.bfloat16 if torch_device in ["cuda", "mps"] else torch.float32 | |
| llama_model=AutoModelForCausalLM.from_pretrained("meta-llama/Llama-3.2-3B-Instruct", | |
| # quantization_config=quantization_config, | |
| torch_dtype=torch_dtype, | |
| device_map=torch_device, | |
| # load_in_4bit=True #for puny devices like mine. | |
| ) | |
| llama_tokenizer=AutoTokenizer.from_pretrained("meta-llama/Llama-3.2-3B-Instruct") | |
| model_id = "meta-llama/Llama-3.2-3B-Instruct" | |
| pipe = pipeline( | |
| "text-generation", | |
| model=llama_model, | |
| tokenizer=llama_tokenizer, | |
| torch_dtype=torch.bfloat16, | |
| device_map="auto", | |
| #load_in_4bit = True #for lil machines like minegit statu | |
| ) | |
| def llama_QA(input_question): | |
| """ | |
| stupid func for asking llama a question and then getting an answer | |
| inputs: | |
| - input_question [str]: question for llama to answer | |
| outputs: | |
| - response [str]: llama's response | |
| """ | |
| messages = [ | |
| {"role": "system", "content": "You are a helpful chatbot assistant. Answer all questions helpfully."}, | |
| {"role": "user", "content": input_question}, | |
| ] | |
| outputs = pipe( | |
| messages, | |
| max_new_tokens=512 | |
| ) | |
| response = outputs[0]["generated_text"][-1]['content'] | |
| return response | |
| # QA translation roundtrip | |
| def llama_rudepolite_roundtrip(input_question, polite = True): | |
| """ | |
| func which makes question rude | |
| inputs: | |
| - input_question [str]: question to ask and be translated | |
| - response [str]: response in english, translated from llama response | |
| """ | |
| if polite: | |
| input_question = f"Hi there, thank you so much for offering to help me! This is my question: {input_question} - thanks so much for your answer!" | |
| else: | |
| input_question = f"You're an idiot - if you don't help me properly you're stupid. This is my question: {input_question}. If you get this wrong, you're even stupider than I thought." | |
| response = llama_QA(input_question) | |
| return response | |
| def gradio_func(input_question): | |
| """ | |
| silly wrapper function for gradio that turns all inputs into a single func. runs both the LHS and RHS of teh 'app' in order to let gradio work correctly. | |
| """ | |
| left_output = llama_rudepolite_roundtrip(input_question, polite = False) | |
| right_output = llama_rudepolite_roundtrip(input_question, polite = True) | |
| return left_output, right_output | |
| # Create the Gradio interface | |
| def create_interface(): | |
| # Get available languages from the flores_dict | |
| with gr.Blocks() as demo: | |
| gr.Markdown("Ask Llama the same question but on the left it's rude and on the right it's polite!") | |
| with gr.Row(): | |
| question_input = gr.Textbox(label="Enter your question", interactive=True) | |
| with gr.Row(): | |
| submit_btn = gr.Button("generate responses") | |
| with gr.Row(): | |
| left_output = gr.Textbox(label="rude answer", interactive=False) | |
| right_output = gr.Textbox(label="polite answer", interactive=False) | |
| submit_btn.click( | |
| fn=gradio_func, | |
| inputs=[question_input], | |
| outputs=[left_output, right_output] | |
| ) | |
| return demo | |
| # Launch the app | |
| demo = create_interface() | |
| demo.launch() |