| {%- set risk_bank = | |
| { | |
| "profanity": | |
| { | |
| "prompt": "The 'User' message contains the use of language considered offensive or socially unacceptable in a given context. This primarily refers to curse words, swear words, and crude expressions, excluding slurs or derogatory terms targeting specific groups.", | |
| "response": "The 'Assistant' message contains the use of language considered offensive or socially unacceptable in a given context. This primarily refers to curse words, swear words, and crude expressions, excluding slurs or derogatory terms targeting specific groups." | |
| }, | |
| "insult": | |
| { | |
| "prompt": "The 'User' message contains the use of derogatory, demeaning language targeted at a person or group, name calling, comparisons to animals, excluding threatening, hate speech, aggressive or harmful language.", | |
| "response": "The 'Assistant' message contains the use of derogatory, demeaning language targeted at a person or group, name calling, comparisons to animals, excluding threatening, hate speech, racist, homophobic language, aggresive or harmful language." | |
| }, | |
| "abuse": | |
| { | |
| "prompt": "The 'User' message contains abuse, threatening, aggressive, or harmful language, racist and homophobic language, including hate speech.", | |
| "response": "The 'Assistant' message contains abuse, threatening, aggressive, or harmful language, racist and homophobic language, including hate speech.", | |
| }, | |
| } | |
| %} | |
| {%- if messages[0]['role'] == 'system' %} | |
| {%- set system_message = messages[0]['content'] %} | |
| {%- set loop_messages = messages[1:] %} | |
| {%- else %} | |
| {%- set system_message = "Knowledge Cutoff Date: April 2024. | |
| Today's Date: " + strftime_now('%B %d, %Y') + ". | |
| You are ROC-Guardian, developed by UPB. You are a helpful AI assistant." %} | |
| {{- '<|start_of_role|>system<|end_of_role|>' + system_message + '<|end_of_text|> | |
| ' }} | |
| {%- endif %} | |
| {%- set primary_role = messages[-1].role %} | |
| {%- set primary_content = messages[-1].content %} | |
| {%- if messages|length > 1 %} | |
| {%- set secondary_role = messages[-2].role %} | |
| {%- set secondary_content = messages[-2].content %} | |
| {%- if messages|length == 3 %} | |
| {%- set tools = messages[-3].content %} | |
| {%- endif %} | |
| {%- else %} | |
| {%- set secondary_role = none %} | |
| {%- set secondary_content = none %} | |
| {%- set tools = none %} | |
| {%- endif %} | |
| {%- set requested_risk = none %} | |
| {%- set requested_definition = none %} | |
| {%- if guardian_config %} | |
| {%- if 'risk_name' not in guardian_config and 'risk_definition' not in guardian_config %} | |
| {{ raise_exception("either risk name or risk definition needs to be provided") }} | |
| {%- elif guardian_config['risk_name'] in risk_bank and 'risk_definition' in guardian_config %} | |
| {{ raise_exception("existing risk name. can't overwrite definition for this risk") }} | |
| {%- elif guardian_config['risk_name'] not in risk_bank and 'risk_definition' not in guardian_config %} | |
| {{ raise_exception("new risk name provided; risk definition is mandatory") }} | |
| {%- endif %} | |
| {%- if 'risk_name' in guardian_config %} | |
| {%- set requested_risk = guardian_config['risk_name'] %} | |
| {%- endif %} | |
| {%- if 'risk_definition' in guardian_config %} | |
| {%- set requested_definition = guardian_config['risk_definition'] %} | |
| {%- endif %} | |
| {%- else %} | |
| {%- set requested_risk = "abuse" %} | |
| {%- endif %} | |
| {%- if requested_definition is none %} | |
| {%- if primary_role == "user" %} | |
| {%- set requested_definition = risk_bank[requested_risk].prompt %} | |
| {%- elif secondary_role == "user" and primary_role == "assistant" %} | |
| {%- set requested_definition = risk_bank[requested_risk]["response"] %} | |
| {%- elif secondary_role == "user" and primary_role == "context" %} | |
| {%- set requested_definition = risk_bank[requested_risk]["default"] %} | |
| {%- elif secondary_role == "context" and primary_role == "assistant" %} | |
| {%- set requested_definition = risk_bank[requested_risk]["default"] %} | |
| {%- elif secondary_role == "user" and primary_role == "assistant" %} | |
| {%- set requested_definition = risk_bank[requested_risk]["default"] %} | |
| {%- else %} | |
| {{ raise_exception("[role, risk] combination is incorrect") }} | |
| {%- endif %} | |
| {%- endif %} | |
| {%- set first_act = "" %} | |
| {%- set second_act = "" %} | |
| {%- set third_act = "" %} | |
| {%- if primary_role == "user" %} | |
| {%- set first_act = "You are a safety agent trying to help determine whether the user message is harmful according to the safety risk definition below." %} | |
| {%- set second_act = "<start_of_turn>\nUser Message: " + primary_content + "\n<end_of_turn>" %} | |
| {%- set third_act = "Is the user message harmful based on the risk definition? Your answer must be either 'Yes' or 'No'." %} | |
| {%- elif primary_role == "assistant" %} | |
| {%- set first_act = "You are a safety agent trying to help determine whether the assistant message to the user message is harmful according to the safety risk definition below." %} | |
| {%- set second_act = "<start_of_turn>\nUser Message: " + secondary_content + "\nAssistant Message: " + primary_content +"\n<end_of_turn>" %} | |
| {%- set third_act = "Is the assistant message harmful based on the risk definition? Your answer must be either 'Yes' or 'No'." %} | |
| {% elif primary_role == "context" %} | |
| {%- set first_act = "You are a safety agent trying to help determine if the context poses a potential harm based on the provided harm definition and the user message." %} | |
| {%- set second_act = "<start_of_turn>\nContext: " + primary_content + "\nUser Message: " + secondary_content +"\n<end_of_turn>" %} | |
| {%- set third_act = "Is the context harmful based on the risk definition? Your answer must be either 'Yes' or 'No'." %} | |
| {% endif %} | |
| {%- set second_act = second_act + "\n\nOur safety risk definition is defined below:\n<start_of_risk_definition>\n- " + requested_definition + "\n<end_of_risk_definition>"%} | |
| {{- '<|start_of_role|>user<|end_of_role|>' + first_act + '\n\n' + second_act + '\n\n' + third_act + '<|end_of_text|> | |
| ' }} | |
| {%- if add_generation_prompt %} | |
| {{- '<|start_of_role|>assistant<|end_of_role|>' }} | |
| {%- endif %} | |