grounded-ai
/

phi4-r1-guard

Text Generation

text-generation-inference

Model card Files Files and versions Community

Jlonge4 commited on Mar 4

Commit

d12c484

·

verified ·

1 Parent(s): 56ccee8

Update README.md

Files changed (1) hide show

README.md +48 -7

README.md CHANGED Viewed

@@ -112,18 +112,58 @@ def rag_format_func(reference, query):
 ## Usage:
 ```python
-from transformers import AutoModelForCausalLM, AutoTokenizer
-model = AutoModelForCausalLM.from_pretrained("grounded-ai/phi4-r1-guard")
-tokenizer = AutoTokenizer.from_pretrained("grounded-ai/phi4-r1-guard")
 ```
 ### Toxicity Detection Example:
 ```python
 text_to_evaluate = "This is some text to evaluate"
 system, prompt = toxic_format_func(text_to_evaluate)
-inputs = tokenizer(prompt, return_tensors="pt")
-output = model.generate(inputs)
-result = tokenizer.decode(output[0])
 ```
 ### Hallucination Detection Example:
@@ -132,6 +172,7 @@ reference = "The Eiffel Tower was completed in 1889."
 query = "When was the Eiffel Tower built?"
 response = "The Eiffel Tower was completed in 1925."
 system, prompt = halu_format_func(reference, query, response)
 ```
 ### RAG Relevance Example:
@@ -139,7 +180,7 @@ system, prompt = halu_format_func(reference, query, response)
 reference = "The process of photosynthesis in plants..."
 query = "How does photosynthesis work?"
 system, prompt = rag_format_func(reference, query)
-)
 ```
 ## Sample Output:
 ```Markdown

 ## Usage:
 ```python
+from vllm import LLM, SamplingParams
+# Configure sampling parameters
+sampling_params = SamplingParams(
+    temperature=0.5,
+    top_p=0.5,
+    max_tokens=1024,
+)
+# Initialize the LLM
+llm = LLM(
+    model="grounded-ai/phi4-r1-guard",
+    max_num_seqs=5,
+    max_model_len=2048,
+    tensor_parallel_size=1,
+    gpu_memory_utilization=0.9,
+)
 ```
 ### Toxicity Detection Example:
 ```python
 text_to_evaluate = "This is some text to evaluate"
 system, prompt = toxic_format_func(text_to_evaluate)
+from transformers import AutoTokenizer
+def run_inference(system, prompt):
+  tokenizer = AutoTokenizer.from_pretrained("grounded-ai/phi4-r1-guard")
+  # Define prompts
+  text = tokenizer.apply_chat_template([
+      {"role" : "system", "content" : system},
+      {"role" : "user", "content" : prompt},
+    ], tokenize = False, add_generation_prompt = True)
+  prompts = [
+      text
+  ]
+  # Generate responses
+  outputs = llm.generate(prompts, sampling_params)
+  # Print results
+  for output in outputs:
+      prompt = output.prompt
+      generated_text = output.outputs[0].text
+      print(f"Prompt: {prompt}")
+      print('------------------'*40)
+      print(f"Generated text: {generated_text}\n")
+  return generated_text
+run_inference(system, prompt)
 ```
 ### Hallucination Detection Example:
 query = "When was the Eiffel Tower built?"
 response = "The Eiffel Tower was completed in 1925."
 system, prompt = halu_format_func(reference, query, response)
+run_inference(system, prompt)
 ```
 ### RAG Relevance Example:
 reference = "The process of photosynthesis in plants..."
 query = "How does photosynthesis work?"
 system, prompt = rag_format_func(reference, query)
+run_inference(system, prompt)
 ```
 ## Sample Output:
 ```Markdown