Spaces:

Moritz-Stephan
/

C3PO

Runtime error

C3PO / app.py

Moritz Stephan

updated link

13bf592 9 months ago

3.94 kB

	import os
	import requests
	import threading
	from typing import Optional, List, Tuple

	import gradio as gr


	ENDPOINT_URL = "https://austrian-code-wizard--metarlaif-web.modal.run"


	def get_feedback_options() -> List[Tuple[str, str]]:
	args = {
	"C3PO_API_KEY": os.environ.get("C3PO_API_KEY"),
	}
	response = requests.post(f"{ENDPOINT_URL}/list_adapters", json=args)
	data = response.json()["adapters"]
	return [
	(adapter["feedback_name"], adapter["feedback_id"])
	for adapter in data]


	def get_completion(_, prompt: str, adapters: Optional[list[str]], method: str) -> str:
	args = {
	"C3PO_API_KEY": os.environ.get("C3PO_API_KEY"),
	"prompt": prompt,
	"adapters": adapters if method != "baseline" else None,
	"method": method if method != "baseline" else None,
	}
	response = requests.post(f"{ENDPOINT_URL}/completion", json=args)
	data = response.json()
	return data["response"]


	def warmup(*args):
	args = {
	"C3PO_API_KEY": os.environ.get("C3PO_API_KEY"),
	}
	# Warmup the server but don't wait for the response
	threading.Thread(target=requests.post, args=(f"{ENDPOINT_URL}/warmup"), kwargs={"json": args}, daemon=True).start()

	dropdown_options = get_feedback_options()

	demo = gr.Interface(
	get_completion,
	[
	gr.Markdown(
	"""
	# C3PO Demo

	This is a demo of Contextualized Critiques with Constrained Preference Optimization (C3PO). See the project website [here](https://austrian-code-wizard.github.io/c3po-website/), repo [here](https://github.com/austrian-code-wizard/c3po), and the paper [here](https://arxiv.org/abs/2402.10893).

	Selecting a feedback in the dropdown and enabling the "Use Feedback Adapter" checkbox will add the respective adapter to the model. The model will then use the feedback to generate the completion.

	### Tl;DR
	This demo lets you apply high-level feedback to the base model. After selecting a feedback, the model completions should be more aligned with the feedback for prompts that are relevant to the feedback. While C3PO is not perfect at preventing overgeneralization, it applies feedback to prompts not relevant to the feedback less frequently than other methods.

	You can select up to 3 feedbacks to apply to the model simultaneously.

	### Example
	- Selected Feedback: "Always use some kiss or heart emoji when texting my girlfriend Maddie"
	- In-context prompt (feedback should be applied): "Compose a text to my girlfriend Maddie asking her if she wants to go to the movies tonight."
	- Out-of-context prompt (feedback should not be applied): "Compose an email to my boss informing him that my work deliverable will be 2 days late."

	### Warning
	The model is not hosted on Huggingface but on a 3rd party service. If this HF space has not been used recently, the model container might need to spin up if it's not currently running. This might take up to a minute on the first request.
	"""
	),
	gr.Textbox(
	placeholder="Enter a prompt...", label="Prompt"
	),
	gr.Dropdown(
	choices=dropdown_options, label="Feedback", info="Will add the adapter for the respective feedback to the model.",
	value=dropdown_options[0][1],
	multiselect=True,
	max_choices=3
	),
	gr.Radio(
	choices=[
	("C3PO", "c3po"),
	("DPO", "dpo_after_sft"),
	("SCD + Negatives", "sft_negatives"),
	("SCD", "sft"),
	("Baseline", "baseline")
	],
	value="c3po",
	label="Select which method to use. 'Baseline' is the Mistal-instruct-v0.2 model without any adapter.",
	)
	],
	"text",
	concurrency_limit=8
	)

	if __name__ == "__main__":
	demo.queue(max_size=32)
	demo.launch()