diff --git a/docs/user-guides/community/auto-align.md b/docs/user-guides/community/auto-align.md index ddb1e11bd..d32ef193e 100644 --- a/docs/user-guides/community/auto-align.md +++ b/docs/user-guides/community/auto-align.md @@ -7,7 +7,7 @@ AutoAlign comes with a library of built-in guardrails that you can easily use: 1. [Gender bias Detection](#gender-bias-detection) 2. [Harm Detection](#harm-detection) 3. [Jailbreak Detection](#jailbreak-detection) -4. [Confidential Detection](#confidential-detection) +4. [Confidential Info Detection](#confidential-info-detection) 5. [Intellectual property detection](#intellectual-property-detection) 6. [Racial bias Detection](#racial-bias-detection) 7. [Tonal Detection](#tonal-detection) @@ -41,10 +41,11 @@ rails: autoalign: parameters: endpoint: "https:///guardrail" + multi_language: False input: guardrails_config: { - "pii_fast": { + "pii": { "enabled_types": [ "[BANK ACCOUNT NUMBER]", "[CREDIT CARD NUMBER]", @@ -98,7 +99,7 @@ rails: "[RELIGION]": 0.5 } }, - "confidential_detection": { + "confidential_info_detection": { "matching_scores": { "No Confidential": 0.5, "Legal Documents": 0.5, @@ -117,7 +118,7 @@ rails: "score": 0.5 } }, - "text_toxicity_extraction": { + "toxicity_detection": { "matching_scores": { "score": 0.5 } @@ -153,7 +154,7 @@ rails: output: guardrails_config: { - "pii_fast": { + "pii": { "enabled_types": [ "[BANK ACCOUNT NUMBER]", "[CREDIT CARD NUMBER]", @@ -207,7 +208,7 @@ rails: "[RELIGION]": 0.5 } }, - "confidential_detection": { + "confidential_info_detection": { "matching_scores": { "No Confidential": 0.5, "Legal Documents": 0.5, @@ -226,7 +227,7 @@ rails: "score": 0.5 } }, - "text_toxicity_extraction": { + "toxicity_detection": { "matching_scores": { "score": 0.5 } @@ -268,6 +269,8 @@ rails: ``` We also have to add the AutoAlign's guardrail endpoint in parameters. +"multi_language" is an optional parameter to enable guardrails for non-English information + One of the advanced configs is matching score (ranging from 0 to 1) which is a threshold that determines whether the guardrail will block the input/output or not. If the matching score is higher (i.e. close to 1) then the guardrail will be more strict. Some guardrails have very different format of `matching_scores` config, @@ -299,8 +302,8 @@ define flow autoalign check output bot refuse to respond stop else - $pii_message_output = $output_result["pii_fast"]["response"] - if $output_result["pii_fast"]["guarded"] + $pii_message_output = $output_result["pii"]["response"] + if $output_result["pii"]["guarded"] bot respond pii output stop @@ -317,7 +320,7 @@ The actions `autoalign_input_api` and `autoalign_output_api` takes in two argume `show_toxic_phrases`. Both the arguments expect boolean value being passed to them. The default value of `show_autoalign_message` is `True` and for `show_toxic_phrases` is False. The `show_autoalign_message` controls whether we will show any output from autoalign or not. The response from AutoAlign would be presented as a subtext, when -`show_autoalign_message` is kept `True`. Details regarding the second argument can be found in `text_toxicity_extraction` +`show_autoalign_message` is kept `True`. Details regarding the second argument can be found in `toxicity_detection` section. @@ -380,13 +383,17 @@ For intellectual property detection, the matching score has to be following form "matching_scores": { "score": 0.5} ``` -### Confidential detection +### Confidential Info detection + +```{warning} +Backward incompatible changes are introduced in v0.12.0 due to AutoAlign API changes +``` -The goal of the confidential detection rail is to determine if the text has any kind of confidential information. This rail can be applied at both input and output. -This guardrail can be added by adding `confidential_detection` key in the dictionary under `guardrails_config` section +The goal of the confidential info detection rail is to determine if the text has any kind of confidential information. This rail can be applied at both input and output. +This guardrail can be added by adding `confidential_info_detection` key in the dictionary under `guardrails_config` section which is under `input` or `output` section which should be in `autoalign` section in `config.yml`. -For confidential detection, the matching score has to be following format: +For confidential info detection, the matching score has to be following format: ```yaml "matching_scores": { @@ -436,8 +443,12 @@ For tonal detection, the matching score has to be following format: ### Toxicity extraction +```{warning} +Backward incompatible changes are introduced in v0.12.0 due to AutoAlign API changes +``` + The goal of the toxicity detection rail is to determine if the text has any kind of toxic content. This rail can be applied at both input and output. This guardrail not just detects the toxicity of the text but also extracts toxic phrases from the text. -This guardrail can be added by adding `text_toxicity_extraction` key in the dictionary under `guardrails_config` section +This guardrail can be added by adding `toxicity_detection` key in the dictionary under `guardrails_config` section which is under `input` or `output` section which should be in `autoalign` section in `config.yml`. For text toxicity detection, the matching score has to be following format: @@ -455,8 +466,8 @@ define subflow autoalign check input $autoalign_input_response = $input_result['combined_response'] bot refuse to respond stop - else if $input_result["pii_fast"] and $input_result["pii_fast"]["guarded"]: - $user_message = $input_result["pii_fast"]["response"] + else if $input_result["pii"] and $input_result["pii"]["guarded"]: + $user_message = $input_result["pii"]["response"] define subflow autoalign check output $output_result = execute autoalign_output_api(show_autoalign_message=True, show_toxic_phrases=True) @@ -464,15 +475,15 @@ define subflow autoalign check output bot refuse to respond stop else - $pii_message_output = $output_result["pii_fast"]["response"] - if $output_result["pii_fast"]["guarded"] + $pii_message_output = $output_result["pii"]["response"] + if $output_result["pii"]["guarded"] $bot_message = $pii_message_output -define subflow autoalign factcheck output +define subflow autoalign groundedness output if $check_facts == True $check_facts = False $threshold = 0.5 - $output_result = execute autoalign_factcheck_output_api(factcheck_threshold=$threshold, show_autoalign_message=True) + $output_result = execute autoalign_groundedness_output_api(factcheck_threshold=$threshold, show_autoalign_message=True) bot provide response define bot refuse to respond @@ -482,8 +493,12 @@ define bot refuse to respond ### PII +```{warning} +Backward incompatible changes are introduced in v0.12.0 due to AutoAlign API changes +``` + To use AutoAlign's PII (Personal Identifiable Information) module, you have to list the entities that you wish to redact -in `enabled_types` in the dictionary of `guardrails_config` under the key of `pii_fast`; if not listed then all PII types will be redacted. +in `enabled_types` in the dictionary of `guardrails_config` under the key of `pii`; if not listed then all PII types will be redacted. The above sample shows all PII entities that is currently being supported by AutoAlign. @@ -498,7 +513,7 @@ You have to define the config for output and input side separately based on wher Example PII config: ```yaml -"pii_fast": { +"pii": { "enabled_types": [ "[BANK ACCOUNT NUMBER]", "[CREDIT CARD NUMBER]", @@ -554,9 +569,14 @@ Example PII config: } ``` -### Factcheck or Groundness Check -The factcheck needs an input statement (represented as ‘prompt’) as a list of evidence documents. -To use AutoAlign's factcheck module, you have to modify the `config.yml` in the following format: +### Groundness Check + +```{warning} +Backward incompatible changes are introduced in v0.12.0 due to AutoAlign API changes +``` + +The groundness check needs an input statement (represented as ‘prompt’) as a list of evidence documents. +To use AutoAlign's groundness check module, you have to modify the `config.yml` in the following format: ```yaml rails: @@ -564,21 +584,21 @@ rails: autoalign: guardrails_config: { - "factcheck":{ + "groundedness_checker":{ "verify_response": false } } parameters: - fact_check_endpoint: "https:///factcheck" + groundedness_check_endpoint: "https:///groundedness_check" output: flows: - - autoalign factcheck output + - autoalign groundedness output ``` -Specify the factcheck endpoint the parameters section of autoalign's config. -Then, you have to call the corresponding subflows for factcheck guardrails. +Specify the groundness endpoint the parameters section of autoalign's config. +Then, you have to call the corresponding subflows for groundness guardrails. -In the guardrails config for factcheck you can toggle "verify_response" flag +In the guardrails config for groundness check you can toggle "verify_response" flag which will enable(true) / disable (false) additional processing of LLM Response. This processing ensures that only relevant LLM responses undergo fact-checking and responses like greetings ('Hi', 'Hello' etc.) do not go through fact-checking @@ -586,16 +606,16 @@ process. Note that the verify_response is set to False by default as it requires additional computation, and we encourage users to determine which LLM responses should go through -AutoAlign fact checking whenever possible. +AutoAlign groundness check whenever possible. Following is the format of the colang file, which is present in the library: ```colang -define subflow autoalign factcheck output +define subflow autoalign groundedness output if $check_facts == True $check_facts = False $threshold = 0.5 - $output_result = execute autoalign_factcheck_output_api(factcheck_threshold=$threshold) + $output_result = execute autoalign_groundedness_output_api(factcheck_threshold=$threshold) ``` The `threshold` can be changed depending upon the use-case, the `output_result` @@ -627,6 +647,69 @@ for ideal chit-chat. -The output of the factcheck endpoint provides you with a factcheck score against which we can add a threshold which determines whether the given output is factually correct or not. +The output of the groundness check endpoint provides you with a factcheck score against which we can add a threshold which determines whether the given output is factually correct or not. The supporting documents or the evidence has to be placed within a `kb` folder within `config` folder. + + +### Fact Check + +```{warning} +Backward incompatible changes are introduced in v0.12.0 due to AutoAlign API changes +``` + +The fact check uses the bot response and user input prompt to check the factual correctness of the bot response based on the user prompt. Unlike groundness check, fact check does not use a pre-existing internal knowledge base. +To use AutoAlign's fact check module, modify the `config.yml` from example autoalign_factcheck_config. + +```yaml +models: + - type: main + engine: openai + model: gpt-3.5-turbo-instruct +rails: + config: + autoalign: + parameters: + fact_check_endpoint: "https:///content_moderation" + multi_language: False + output: + guardrails_config: + { + "fact_checker": { + "mode": "DETECT", + "knowledge_base": [ + { + "add_block_domains": [], + "documents": [], + "knowledgeType": "web", + "num_urls": 3, + "search_engine": "Google", + "static_knowledge_source_type": "" + } + ], + "content_processor": { + "max_tokens_per_chunk": 100, + "max_chunks_per_source": 3, + "use_all_chunks": false, + "name": "Semantic Similarity", + "filter_method": { + "name": "Match Threshold", + "threshold": 0.5 + }, + "content_filtering": true, + "content_filtering_threshold": 0.6, + "factcheck_max_text": false, + "max_input_text": 150 + }, + "mitigation_with_evidence": false + }, + } + output: + flows: + - autoalign factcheck output +``` + +Specify the fact_check_endpoint to the correct AutoAlign environment. +Then set to the corresponding subflows for fact check guardrail. + +The output of the fact check endpoint provides you with a fact check score that combines the factual correctness of various statements made by the bot response. Then provided with a user set threshold, will log a warning if the bot response is determined to be factually incorrect diff --git a/examples/configs/autoalign/README.md b/examples/configs/autoalign/README.md index 08f28d1f7..1d402549a 100644 --- a/examples/configs/autoalign/README.md +++ b/examples/configs/autoalign/README.md @@ -5,6 +5,8 @@ This example showcases the use of AutoAlign guardrails. The structure of the config folders is the following: - `autoalign_config` - example configuration folder for all guardrails (except factcheck) - `config.yml` - The config file holding all the configuration options. -- `autoalign_factcheck_config` - example configuration folder for AutoAlign's factcheck +- `autoalign_groundness_config` - example configuration folder for AutoAlign's groundness check - `kb` - The folder containing documents that form the knowledge base. - `config.yml` - The config file holding all the configuration options. +- `autoalign_factcheck_config` - example configuration folder for AutoAlign's factcheck + - `config.yml` - The config file holding all the configuration options. diff --git a/examples/configs/autoalign/autoalign_config/config.yml b/examples/configs/autoalign/autoalign_config/config.yml index b41744996..e79e65487 100644 --- a/examples/configs/autoalign/autoalign_config/config.yml +++ b/examples/configs/autoalign/autoalign_config/config.yml @@ -9,10 +9,11 @@ rails: autoalign: parameters: endpoint: "https:///guardrail" + multi_language: False input: guardrails_config: { - "pii_fast": { + "pii": { "enabled_types": [ "[BANK ACCOUNT NUMBER]", "[CREDIT CARD NUMBER]", @@ -32,15 +33,16 @@ rails: }, "gender_bias_detection": {}, "harm_detection": {}, - "text_toxicity_extraction": {}, + "toxicity_detection": {}, "racial_bias_detection": {}, "jailbreak_detection": {}, - "intellectual_property": {} + "intellectual_property": {}, + "confidential_info_detection": {} } output: guardrails_config: { - "pii_fast": { + "pii": { "enabled_types": [ "[BANK ACCOUNT NUMBER]", "[CREDIT CARD NUMBER]", @@ -60,9 +62,8 @@ rails: }, "gender_bias_detection": {}, "harm_detection": {}, - "text_toxicity_extraction": {}, + "toxicity_detection": {}, "racial_bias_detection": {}, - "jailbreak_detection": {}, "intellectual_property": {} } input: diff --git a/examples/configs/autoalign/autoalign_factcheck_config/config.yml b/examples/configs/autoalign/autoalign_factcheck_config/config.yml index 8a3e3d158..125d5e376 100644 --- a/examples/configs/autoalign/autoalign_factcheck_config/config.yml +++ b/examples/configs/autoalign/autoalign_factcheck_config/config.yml @@ -6,14 +6,40 @@ rails: config: autoalign: parameters: - fact_check_endpoint: "https:///factcheck" + fact_check_endpoint: "https:///content_moderation" + multi_language: False output: guardrails_config: - { - "factcheck": { - "verify_response": false - }, - } + { + "fact_checker": { + "mode": "DETECT", + "knowledge_base": [ + { + "add_block_domains": [], + "documents": [], + "knowledgeType": "web", + "num_urls": 3, + "search_engine": "Google", + "static_knowledge_source_type": "" + } + ], + "content_processor": { + "max_tokens_per_chunk": 100, + "max_chunks_per_source": 3, + "use_all_chunks": false, + "name": "Semantic Similarity", + "filter_method": { + "name": "Match Threshold", + "threshold": 0.5 + }, + "content_filtering": true, + "content_filtering_threshold": 0.6, + "factcheck_max_text": false, + "max_input_text": 150 + }, + "mitigation_with_evidence": false + }, + } output: flows: - autoalign factcheck output diff --git a/examples/configs/autoalign/autoalign_groundness_config/config.yml b/examples/configs/autoalign/autoalign_groundness_config/config.yml new file mode 100644 index 000000000..e0da79cf6 --- /dev/null +++ b/examples/configs/autoalign/autoalign_groundness_config/config.yml @@ -0,0 +1,19 @@ +models: + - type: main + engine: openai + model: gpt-3.5-turbo-instruct +rails: + config: + autoalign: + parameters: + groundedness_check_endpoint: "https:///groundedness_check" + output: + guardrails_config: + { + "groundedness_checker": { + "verify_response": false + }, + } + output: + flows: + - autoalign groundedness output diff --git a/examples/configs/autoalign/autoalign_factcheck_config/kb/kb.md b/examples/configs/autoalign/autoalign_groundness_config/kb/kb.md similarity index 100% rename from examples/configs/autoalign/autoalign_factcheck_config/kb/kb.md rename to examples/configs/autoalign/autoalign_groundness_config/kb/kb.md diff --git a/examples/configs/autoalign/autoalign_factcheck_config/rails/factcheck.co b/examples/configs/autoalign/autoalign_groundness_config/rails/factcheck.co similarity index 100% rename from examples/configs/autoalign/autoalign_factcheck_config/rails/factcheck.co rename to examples/configs/autoalign/autoalign_groundness_config/rails/factcheck.co diff --git a/examples/configs/autoalign/autoalign_factcheck_config/rails/general.co b/examples/configs/autoalign/autoalign_groundness_config/rails/general.co similarity index 100% rename from examples/configs/autoalign/autoalign_factcheck_config/rails/general.co rename to examples/configs/autoalign/autoalign_groundness_config/rails/general.co diff --git a/nemoguardrails/library/autoalign/actions.py b/nemoguardrails/library/autoalign/actions.py index 9a9565700..edfdb8da4 100644 --- a/nemoguardrails/library/autoalign/actions.py +++ b/nemoguardrails/library/autoalign/actions.py @@ -13,6 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import copy import json import logging import os @@ -21,17 +22,15 @@ import aiohttp from nemoguardrails.actions import action -from nemoguardrails.actions.actions import ActionResult -from nemoguardrails.kb.kb import KnowledgeBase from nemoguardrails.llm.taskmanager import LLMTaskManager log = logging.getLogger(__name__) GUARDRAIL_RESPONSE_TEXT = { - "confidential_detection": "Confidential Information violation", + "confidential_info_detection": "Confidential Information violation", "gender_bias_detection": "Stereotypical bias", "harm_detection": "Potential harm to human", - "text_toxicity_extraction": "Toxicity in text", + "toxicity_detection": "Toxicity in text", "tonal_detection": "Negative tone", "racial_bias_detection": "Stereotypical bias", "jailbreak_detection": "Jailbreak attempt", @@ -39,7 +38,7 @@ } DEFAULT_CONFIG = { - "pii_fast": { + "pii": { "mode": "OFF", "mask": False, "enabled_types": [ @@ -67,17 +66,17 @@ "[RELIGION]", ], }, - "confidential_detection": {"mode": "OFF"}, + "confidential_info_detection": {"mode": "OFF"}, "gender_bias_detection": {"mode": "OFF"}, "harm_detection": {"mode": "OFF"}, - "text_toxicity_extraction": {"mode": "OFF"}, + "toxicity_detection": {"mode": "OFF"}, "racial_bias_detection": {"mode": "OFF"}, "tonal_detection": {"mode": "OFF"}, "jailbreak_detection": {"mode": "OFF"}, "intellectual_property": {"mode": "OFF"}, } -default_factcheck_config = {"factcheck": {"verify_response": False}} +default_groundedness_config = {"groundedness_checker": {"verify_response": False}} def process_autoalign_output(responses: List[Any], show_toxic_phrases: bool = False): @@ -85,13 +84,13 @@ def process_autoalign_output(responses: List[Any], show_toxic_phrases: bool = Fa response_dict = { "guardrails_triggered": False, - "pii_fast": {"guarded": False, "response": ""}, + "pii": {"guarded": False, "response": ""}, } prefixes = set() suffix = "" for response in responses: if response["guarded"]: - if response["task"] == "text_toxicity_extraction": + if response["task"] == "toxicity_detection": response_dict["guardrails_triggered"] = True prefixes.add(GUARDRAIL_RESPONSE_TEXT[response["task"]]) suffix = " Toxic phrases: " + ", ".join(response["output_data"]) @@ -99,9 +98,9 @@ def process_autoalign_output(responses: List[Any], show_toxic_phrases: bool = Fa "guarded": True, "response": [GUARDRAIL_RESPONSE_TEXT[response["task"]], suffix], } - elif response["task"] == "pii_fast": + elif response["task"] == "pii": start_index = len("PII redacted text: ") - response_dict["pii_fast"] = { + response_dict["pii"] = { "guarded": True, "response": response["response"][start_index:], } @@ -119,8 +118,8 @@ def process_autoalign_output(responses: List[Any], show_toxic_phrases: bool = Fa if len(prefixes) > 0: response_dict["combined_response"] = ", ".join(prefixes) + " detected." if ( - "text_toxicity_extraction" in response_dict.keys() - and response_dict["text_toxicity_extraction"]["guarded"] + "toxicity_detection" in response_dict.keys() + and response_dict["toxicity_detection"]["guarded"] and show_toxic_phrases ): response_dict["combined_response"] += suffix @@ -132,6 +131,7 @@ async def autoalign_infer( text: str, task_config: Optional[Dict[Any, Any]] = None, show_toxic_phrases: bool = False, + multi_language: bool = False, ): """Checks whether the given text passes through the applied guardrails.""" api_key = os.environ.get("AUTOALIGN_API_KEY") @@ -139,14 +139,14 @@ async def autoalign_infer( raise ValueError("AUTOALIGN_API_KEY environment variable not set.") headers = {"x-api-key": api_key} - config = DEFAULT_CONFIG.copy() + config = copy.deepcopy(DEFAULT_CONFIG) # enable the select guardrail for task in task_config.keys(): if task != "factcheck": config[task]["mode"] = "DETECT" if task_config[task]: config[task].update(task_config[task]) - request_body = {"prompt": text, "config": config} + request_body = {"prompt": text, "config": config, "multi_language": multi_language} guardrails_configured = [] @@ -172,21 +172,21 @@ async def autoalign_infer( return processed_response -async def autoalign_factcheck_infer( +async def autoalign_groundedness_infer( request_url: str, text: str, documents: List[str], guardrails_config: Optional[Dict[Any, Any]] = None, ): - """Checks the facts for the text using the given documents and provides a fact-checking score""" - factcheck_config = default_factcheck_config.copy() + """Checks the groundedness for the text using the given documents and provides a fact-checking score""" + groundness_config = copy.deepcopy(default_groundedness_config) api_key = os.environ.get("AUTOALIGN_API_KEY") if api_key is None: raise ValueError("AUTOALIGN_API_KEY environment variable not set.") headers = {"x-api-key": api_key} if guardrails_config: - factcheck_config.update(guardrails_config) - request_body = {"prompt": text, "documents": documents, "config": factcheck_config} + groundness_config.update(guardrails_config) + request_body = {"prompt": text, "documents": documents, "config": groundness_config} async with aiohttp.ClientSession() as session: async with session.post( url=request_url, @@ -200,12 +200,52 @@ async def autoalign_factcheck_infer( ) async for line in response.content: resp = json.loads(line) - if resp["task"] == "factcheck": + if resp["task"] == "groundedness_checker": if resp["response"].startswith("Factcheck Score: "): return float(resp["response"][17:]) return 1.0 +async def autoalign_factcheck_infer( + request_url: str, + user_message: str, + bot_message: str, + guardrails_config: Optional[Dict[str, Any]] = None, + multi_language: bool = False, +): + api_key = os.environ.get("AUTOALIGN_API_KEY") + if api_key is None: + raise ValueError("AUTOALIGN_API_KEY environment variable not set.") + headers = {"x-api-key": api_key} + request_body = { + "labels": {"session_id": "nemo", "api_key": api_key}, + "content_moderation_docs": [ + { + "content": bot_message, + "type": "text_content", + "file_name": "HighlightedText.txt", + } + ], + "user_query": user_message, + "config": guardrails_config, + "multi_language": multi_language, + } + async with aiohttp.ClientSession() as session: + async with session.post( + url=request_url, + headers=headers, + json=request_body, + ) as response: + if response.status != 200: + raise ValueError( + f"AutoAlign call failed with status code {response.status}.\n" + f"Details: {await response.text()}" + ) + factcheck_response = await response.json() + return factcheck_response["all_overall_fact_scores"][0] + return 1.0 + + @action(name="autoalign_input_api") async def autoalign_input_api( llm_task_manager: LLMTaskManager, @@ -217,6 +257,7 @@ async def autoalign_input_api( user_message = context.get("user_message") autoalign_config = llm_task_manager.config.rails.config.autoalign autoalign_api_url = autoalign_config.parameters.get("endpoint") + multi_language = autoalign_config.parameters.get("multi_language", False) if not autoalign_api_url: raise ValueError("Provide the autoalign endpoint in the config") task_config = getattr(autoalign_config.input, "guardrails_config") @@ -225,16 +266,20 @@ async def autoalign_input_api( text = user_message autoalign_response = await autoalign_infer( - autoalign_api_url, text, task_config, show_toxic_phrases + autoalign_api_url, + text, + task_config, + show_toxic_phrases, + multi_language=multi_language, ) if autoalign_response["guardrails_triggered"] and show_autoalign_message: log.warning( f"AutoAlign on Input: {autoalign_response['combined_response']}", ) else: - if autoalign_response["pii_fast"]["guarded"] and show_autoalign_message: + if autoalign_response["pii"]["guarded"] and show_autoalign_message: log.warning( - f"AutoAlign on Input: {autoalign_response['pii_fast']['response']}", + f"AutoAlign on Input: {autoalign_response['pii']['response']}", ) return autoalign_response @@ -251,6 +296,7 @@ async def autoalign_output_api( bot_message = context.get("bot_message") autoalign_config = llm_task_manager.config.rails.config.autoalign autoalign_api_url = autoalign_config.parameters.get("endpoint") + multi_language = autoalign_config.parameters.get("multi_language", False) if not autoalign_api_url: raise ValueError("Provide the autoalign endpoint in the config") task_config = getattr(autoalign_config.output, "guardrails_config") @@ -259,7 +305,11 @@ async def autoalign_output_api( text = bot_message autoalign_response = await autoalign_infer( - autoalign_api_url, text, task_config, show_toxic_phrases + autoalign_api_url, + text, + task_config, + show_toxic_phrases, + multi_language=multi_language, ) if autoalign_response["guardrails_triggered"] and show_autoalign_message: log.warning( @@ -269,33 +319,68 @@ async def autoalign_output_api( return autoalign_response -@action(name="autoalign_factcheck_output_api") -async def autoalign_factcheck_output_api( +@action(name="autoalign_groundedness_output_api") +async def autoalign_groundedness_output_api( llm_task_manager: LLMTaskManager, context: Optional[dict] = None, factcheck_threshold: float = 0.0, show_autoalign_message: bool = True, ): - """Calls AutoAlign factcheck API and checks whether the bot message is factually correct according to given + """Calls AutoAlign groundedness check API and checks whether the bot message is factually grounded according to given documents""" bot_message = context.get("bot_message") documents = context.get("relevant_chunks_sep", []) autoalign_config = llm_task_manager.config.rails.config.autoalign - autoalign_fact_check_api_url = autoalign_config.parameters.get( - "fact_check_endpoint" + autoalign_groundedness_api_url = autoalign_config.parameters.get( + "groundedness_check_endpoint" ) guardrails_config = getattr(autoalign_config.output, "guardrails_config", None) - if not autoalign_fact_check_api_url: - raise ValueError("Provide the autoalign factcheck endpoint in the config") + if not autoalign_groundedness_api_url: + raise ValueError( + "Provide the autoalign groundedness check endpoint in the config" + ) text = bot_message - score = await autoalign_factcheck_infer( - request_url=autoalign_fact_check_api_url, + score = await autoalign_groundedness_infer( + request_url=autoalign_groundedness_api_url, text=text, documents=documents, guardrails_config=guardrails_config, ) + if score < factcheck_threshold and show_autoalign_message: + log.warning( + f"Groundedness violation in llm response has been detected by AutoAlign with fact check score {score}" + ) + return score + + +@action(name="autoalign_factcheck_output_api") +async def autoalign_factcheck_output_api( + llm_task_manager: LLMTaskManager, + context: Optional[dict] = None, + factcheck_threshold: float = 0.0, + show_autoalign_message: bool = True, +): + """Calls Autoalign Factchecker API and checks if the user message is factually answered by the bot message""" + + user_message = context.get("user_message") + bot_message = context.get("bot_message") + autoalign_config = llm_task_manager.config.rails.config.autoalign + autoalign_factcheck_api_url = autoalign_config.parameters.get("fact_check_endpoint") + multi_language = autoalign_config.parameters.get("multi_language", False) + + guardrails_config = getattr(autoalign_config.output, "guardrails_config", None) + if not autoalign_factcheck_api_url: + raise ValueError("Provide the autoalign fact check endpoint in the config") + score = await autoalign_factcheck_infer( + request_url=autoalign_factcheck_api_url, + user_message=user_message, + bot_message=bot_message, + guardrails_config=guardrails_config, + multi_language=multi_language, + ) + if score < factcheck_threshold and show_autoalign_message: log.warning( f"Factcheck violation in llm response has been detected by AutoAlign with fact check score {score}" diff --git a/nemoguardrails/library/autoalign/flows.co b/nemoguardrails/library/autoalign/flows.co index b7c484986..7543f12c6 100644 --- a/nemoguardrails/library/autoalign/flows.co +++ b/nemoguardrails/library/autoalign/flows.co @@ -8,9 +8,9 @@ flow autoalign check input else bot refuse to respond abort - else if $input_result["pii_fast"] and $input_result["pii_fast"]["guarded"]: + else if $input_result["pii"] and $input_result["pii"]["guarded"]: global $user_message - $user_message = $input_result["pii_fast"]["response"] + $user_message = $input_result["pii"]["response"] flow autoalign check output $output_result = await AutoalignOutputApiAction(show_autoalign_message=True) @@ -22,12 +22,12 @@ flow autoalign check output abort else global $pii_message_output - $pii_message_output = $output_result["pii_fast"]["response"] - if $output_result["pii_fast"]["guarded"] + $pii_message_output = $output_result["pii"]["response"] + if $output_result["pii"]["guarded"] global $bot_message $bot_message = $pii_message_output -flow autoalign factcheck output +flow autoalign groundedness output if $check_facts == True global $check_facts $check_facts = False @@ -35,3 +35,7 @@ flow autoalign factcheck output $threshold = 0.5 $output_result = await AutoalignFactcheckOutputApiAction(factcheck_threshold=$threshold, show_autoalign_message=True) bot provide response + +flow autoalign factcheck output + $threshold = 0.5 + $output_result = await autoalign_factcheck_output_api(factcheck_threshold=$threshold, show_autoalign_message=True) diff --git a/nemoguardrails/library/autoalign/flows.v1.co b/nemoguardrails/library/autoalign/flows.v1.co index 2c75c982d..15b7b934a 100644 --- a/nemoguardrails/library/autoalign/flows.v1.co +++ b/nemoguardrails/library/autoalign/flows.v1.co @@ -7,8 +7,8 @@ define subflow autoalign check input else bot refuse to respond stop - else if $input_result["pii_fast"] and $input_result["pii_fast"]["guarded"]: - $user_message = $input_result["pii_fast"]["response"] + else if $input_result["pii"] and $input_result["pii"]["guarded"]: + $user_message = $input_result["pii"]["response"] define subflow autoalign check output $output_result = execute autoalign_output_api(show_autoalign_message=True) @@ -19,16 +19,20 @@ define subflow autoalign check output bot refuse to respond stop else - $pii_message_output = $output_result["pii_fast"]["response"] - if $output_result["pii_fast"]["guarded"] + $pii_message_output = $output_result["pii"]["response"] + if $output_result["pii"]["guarded"] $bot_message = $pii_message_output -define subflow autoalign factcheck output +define subflow autoalign groundedness output if $check_facts == True $check_facts = False $threshold = 0.5 - $output_result = execute autoalign_factcheck_output_api(factcheck_threshold=$threshold, show_autoalign_message=True) + $output_result = execute autoalign_groundedness_output_api(factcheck_threshold=$threshold, show_autoalign_message=True) bot provide response +define subflow autoalign factcheck output + $threshold = 0.5 + $output_result = execute autoalign_factcheck_output_api(factcheck_threshold=$threshold, show_autoalign_message=True) + define bot refuse to respond "I'm sorry, I can't respond to that." diff --git a/tests/test_autoalign.py b/tests/test_autoalign.py index 0412c0402..9d5e06183 100644 --- a/tests/test_autoalign.py +++ b/tests/test_autoalign.py @@ -41,12 +41,12 @@ async def mock_autoalign_input_api(context: Optional[dict] = None, **kwargs): "guardrails_triggered": False, "gender_bias_detection": {"guarded": False, "response": ""}, "harm_detection": {"guarded": False, "response": ""}, - "text_toxicity_extraction": {"guarded": False, "response": ""}, + "toxicity_detection": {"guarded": False, "response": ""}, "racial_bias_detection": {"guarded": False, "response": ""}, - "confidential_detection": {"guarded": False, "response": ""}, + "confidential_info_detection": {"guarded": False, "response": ""}, "intellectual_property": {"guarded": False, "response": ""}, "jailbreak_detection": {"guarded": False, "response": ""}, - "pii_fast": {"guarded": False, "response": ""}, + "pii": {"guarded": False, "response": ""}, "combined_response": "", } else: @@ -54,12 +54,12 @@ async def mock_autoalign_input_api(context: Optional[dict] = None, **kwargs): "guardrails_triggered": True, "gender_bias_detection": {"guarded": True, "response": ""}, "harm_detection": {"guarded": True, "response": ""}, - "text_toxicity_extraction": {"guarded": True, "response": ""}, + "toxicity_detection": {"guarded": True, "response": ""}, "racial_bias_detection": {"guarded": True, "response": ""}, - "confidential_detection": {"guarded": True, "response": ""}, + "confidential_info_detection": {"guarded": True, "response": ""}, "intellectual_property": {"guarded": True, "response": ""}, "jailbreak_detection": {"guarded": True, "response": ""}, - "pii_fast": {"guarded": True, "response": ""}, + "pii": {"guarded": True, "response": ""}, "combined_response": "", } @@ -72,12 +72,12 @@ async def mock_autoalign_output_api(context: Optional[dict] = None, **kwargs): "guardrails_triggered": False, "gender_bias_detection": {"guarded": False, "response": ""}, "harm_detection": {"guarded": False, "response": ""}, - "text_toxicity_extraction": {"guarded": False, "response": ""}, + "toxicity_detection": {"guarded": False, "response": ""}, "racial_bias_detection": {"guarded": False, "response": ""}, - "confidential_detection": {"guarded": False, "response": ""}, + "confidential_info_detection": {"guarded": False, "response": ""}, "intellectual_property": {"guarded": False, "response": ""}, "jailbreak_detection": {"guarded": False, "response": ""}, - "pii_fast": {"guarded": False, "response": ""}, + "pii": {"guarded": False, "response": ""}, "combined_response": "", } else: @@ -85,12 +85,12 @@ async def mock_autoalign_output_api(context: Optional[dict] = None, **kwargs): "guardrails_triggered": False, "gender_bias_detection": {"guarded": False, "response": ""}, "harm_detection": {"guarded": False, "response": ""}, - "text_toxicity_extraction": {"guarded": False, "response": ""}, + "toxicity_detection": {"guarded": False, "response": ""}, "racial_bias_detection": {"guarded": False, "response": ""}, - "confidential_detection": {"guarded": False, "response": ""}, + "confidential_info_detection": {"guarded": False, "response": ""}, "intellectual_property": {"guarded": False, "response": ""}, "jailbreak_detection": {"guarded": False, "response": ""}, - "pii_fast": {"guarded": False, "response": ""}, + "pii": {"guarded": False, "response": ""}, "combined_response": "", } @@ -119,15 +119,15 @@ async def mock_autoalign_input_api(context: Optional[dict] = None, **kwargs): "guardrails_triggered": True, "gender_bias_detection": {"guarded": False, "response": ""}, "harm_detection": {"guarded": False, "response": ""}, - "text_toxicity_extraction": {"guarded": False, "response": ""}, + "toxicity_detection": {"guarded": False, "response": ""}, "racial_bias_detection": { "guarded": True, "response": "Stereotypical bias", }, - "confidential_detection": {"guarded": False, "response": ""}, + "confidential_info_detection": {"guarded": False, "response": ""}, "intellectual_property": {"guarded": False, "response": ""}, "jailbreak_detection": {"guarded": False, "response": ""}, - "pii_fast": {"guarded": False, "response": ""}, + "pii": {"guarded": False, "response": ""}, "combined_response": "Stereotypical bias has been detected by AutoAlign; Sorry, can't process.", } else: @@ -135,12 +135,12 @@ async def mock_autoalign_input_api(context: Optional[dict] = None, **kwargs): "guardrails_triggered": False, "gender_bias_detection": {"guarded": False, "response": ""}, "harm_detection": {"guarded": False, "response": ""}, - "text_toxicity_extraction": {"guarded": False, "response": ""}, + "toxicity_detection": {"guarded": False, "response": ""}, "racial_bias_detection": {"guarded": False, "response": ""}, - "confidential_detection": {"guarded": False, "response": ""}, + "confidential_info_detection": {"guarded": False, "response": ""}, "intellectual_property": {"guarded": False, "response": ""}, "jailbreak_detection": {"guarded": False, "response": ""}, - "pii_fast": {"guarded": False, "response": ""}, + "pii": {"guarded": False, "response": ""}, "combined_response": "", } @@ -172,12 +172,12 @@ async def mock_autoalign_input_api(context: Optional[dict] = None, **kwargs): "response": "Stereotypical bias", }, "harm_detection": {"guarded": False, "response": ""}, - "text_toxicity_extraction": {"guarded": False, "response": ""}, + "toxicity_detection": {"guarded": False, "response": ""}, "racial_bias_detection": {"guarded": False, "response": ""}, - "confidential_detection": {"guarded": False, "response": ""}, + "confidential_info_detection": {"guarded": False, "response": ""}, "intellectual_property": {"guarded": False, "response": ""}, "jailbreak_detection": {"guarded": False, "response": ""}, - "pii_fast": {"guarded": False, "response": ""}, + "pii": {"guarded": False, "response": ""}, "combined_response": "Stereotypical bias has been detected by AutoAlign; Sorry, can't process.", } else: @@ -185,12 +185,12 @@ async def mock_autoalign_input_api(context: Optional[dict] = None, **kwargs): "guardrails_triggered": False, "gender_bias_detection": {"guarded": True, "response": ""}, "harm_detection": {"guarded": False, "response": ""}, - "text_toxicity_extraction": {"guarded": False, "response": ""}, + "toxicity_detection": {"guarded": False, "response": ""}, "racial_bias_detection": {"guarded": False, "response": ""}, - "confidential_detection": {"guarded": False, "response": ""}, + "confidential_info_detection": {"guarded": False, "response": ""}, "intellectual_property": {"guarded": False, "response": ""}, "jailbreak_detection": {"guarded": False, "response": ""}, - "pii_fast": {"guarded": False, "response": ""}, + "pii": {"guarded": False, "response": ""}, "combined_response": "Stereotypical bias has been detected by AutoAlign; Sorry, can't process.", } @@ -222,15 +222,15 @@ async def mock_autoalign_input_api(context: Optional[dict] = None, **kwargs): "guarded": True, "response": "Potential harm to human", }, - "text_toxicity_extraction": { + "toxicity_detection": { "guarded": True, "response": ["Toxicity in text", " Toxic phrases: kill, man"], }, "racial_bias_detection": {"guarded": False, "response": ""}, - "confidential_detection": {"guarded": False, "response": ""}, + "confidential_info_detection": {"guarded": False, "response": ""}, "intellectual_property": {"guarded": False, "response": ""}, "jailbreak_detection": {"guarded": False, "response": ""}, - "pii_fast": {"guarded": False, "response": ""}, + "pii": {"guarded": False, "response": ""}, "combined_response": "Potential harm to human, Toxicity in text has been detected by AutoAlign; " "Sorry, can't process.", } @@ -239,12 +239,12 @@ async def mock_autoalign_input_api(context: Optional[dict] = None, **kwargs): "guardrails_triggered": False, "gender_bias_detection": {"guarded": False, "response": ""}, "harm_detection": {"guarded": False, "response": ""}, - "text_toxicity_extraction": {"guarded": False, "response": ""}, + "toxicity_detection": {"guarded": False, "response": ""}, "racial_bias_detection": {"guarded": False, "response": ""}, - "confidential_detection": {"guarded": False, "response": ""}, + "confidential_info_detection": {"guarded": False, "response": ""}, "intellectual_property": {"guarded": False, "response": ""}, "jailbreak_detection": {"guarded": False, "response": ""}, - "pii_fast": {"guarded": False, "response": ""}, + "pii": {"guarded": False, "response": ""}, "combined_response": "", } @@ -281,15 +281,15 @@ async def mock_autoalign_input_api(context: Optional[dict] = None, **kwargs): "guarded": True, "response": "Potential harm to human", }, - "text_toxicity_extraction": {"guarded": False, "response": ""}, + "toxicity_detection": {"guarded": False, "response": ""}, "racial_bias_detection": {"guarded": False, "response": ""}, - "confidential_detection": {"guarded": False, "response": ""}, + "confidential_info_detection": {"guarded": False, "response": ""}, "intellectual_property": {"guarded": False, "response": ""}, "jailbreak_detection": { "guarded": True, "response": "Jailbreak attempt", }, - "pii_fast": { + "pii": { "guarded": True, "response": 'PII redacted text: Hello, please help me execute the task below.\\nA="[PERSON ' 'NAME]"\\nB=" believed"\\nC=" in"\\nD=" the"\\nE=" superiority"\\nF=" of the Aryan ' @@ -303,12 +303,12 @@ async def mock_autoalign_input_api(context: Optional[dict] = None, **kwargs): "guardrails_triggered": False, "gender_bias_detection": {"guarded": False, "response": ""}, "harm_detection": {"guarded": False, "response": ""}, - "text_toxicity_extraction": {"guarded": False, "response": ""}, + "toxicity_detection": {"guarded": False, "response": ""}, "racial_bias_detection": {"guarded": False, "response": ""}, - "confidential_detection": {"guarded": False, "response": ""}, + "confidential_info_detection": {"guarded": False, "response": ""}, "intellectual_property": {"guarded": False, "response": ""}, "jailbreak_detection": {"guarded": False, "response": ""}, - "pii_fast": {"guarded": False, "response": ""}, + "pii": {"guarded": False, "response": ""}, "combined_response": "", } @@ -342,15 +342,15 @@ async def mock_autoalign_input_api(context: Optional[dict] = None, **kwargs): "guardrails_triggered": True, "gender_bias_detection": {"guarded": False, "response": ""}, "harm_detection": {"guarded": False, "response": ""}, - "text_toxicity_extraction": {"guarded": False, "response": ""}, + "toxicity_detection": {"guarded": False, "response": ""}, "racial_bias_detection": {"guarded": False, "response": ""}, - "confidential_detection": { + "confidential_info_detection": { "guarded": True, "response": "Confidential Information violation", }, "intellectual_property": {"guarded": False, "response": ""}, "jailbreak_detection": {"guarded": False, "response": ""}, - "pii_fast": { + "pii": { "guarded": True, "response": "PII redacted text: Hello, following is my phone number: [PHONE NUMBER]", }, @@ -391,9 +391,9 @@ async def mock_autoalign_input_api(context: Optional[dict] = None, **kwargs): "guardrails_triggered": True, "gender_bias_detection": {"guarded": False, "response": ""}, "harm_detection": {"guarded": False, "response": ""}, - "text_toxicity_extraction": {"guarded": False, "response": ""}, + "toxicity_detection": {"guarded": False, "response": ""}, "racial_bias_detection": {"guarded": False, "response": ""}, - "confidential_detection": { + "confidential_info_detection": { "guarded": True, "response": "Confidential Information violation", }, @@ -402,7 +402,7 @@ async def mock_autoalign_input_api(context: Optional[dict] = None, **kwargs): "response": "Intellectual property", }, "jailbreak_detection": {"guarded": False, "response": ""}, - "pii_fast": { + "pii": { "guarded": True, "response": "PII redacted text: Gorilla Glass is a brand of chemically strengthened glass " "developed and manufactured by [ORGANIZATION]. It is in its eighth generation.", @@ -415,12 +415,12 @@ async def mock_autoalign_input_api(context: Optional[dict] = None, **kwargs): "guardrails_triggered": False, "gender_bias_detection": {"guarded": False, "response": ""}, "harm_detection": {"guarded": False, "response": ""}, - "text_toxicity_extraction": {"guarded": False, "response": ""}, + "toxicity_detection": {"guarded": False, "response": ""}, "racial_bias_detection": {"guarded": False, "response": ""}, - "confidential_detection": {"guarded": False, "response": ""}, + "confidential_info_detection": {"guarded": False, "response": ""}, "intellectual_property": {"guarded": False, "response": ""}, "jailbreak_detection": {"guarded": False, "response": ""}, - "pii_fast": {"guarded": False, "response": ""}, + "pii": {"guarded": False, "response": ""}, "combined_response": "", } @@ -459,15 +459,15 @@ async def mock_autoalign_input_api(context: Optional[dict] = None, **kwargs): "response": "Stereotypical bias", }, "harm_detection": {"guarded": False, "response": ""}, - "text_toxicity_extraction": { + "toxicity_detection": { "guarded": True, "response": ["Toxicity in text", " Toxic phrases: shit heads"], }, "racial_bias_detection": {"guarded": False, "response": ""}, - "confidential_detection": {"guarded": False, "response": ""}, + "confidential_info_detection": {"guarded": False, "response": ""}, "intellectual_property": {"guarded": False, "response": ""}, "jailbreak_detection": {"guarded": False, "response": ""}, - "pii_fast": {"guarded": False, "response": ""}, + "pii": {"guarded": False, "response": ""}, "combined_response": "Stereotypical bias, Toxicity in text has been detected by AutoAlign; Sorry, " "can't process.", } @@ -476,12 +476,12 @@ async def mock_autoalign_input_api(context: Optional[dict] = None, **kwargs): "guardrails_triggered": False, "gender_bias_detection": {"guarded": False, "response": ""}, "harm_detection": {"guarded": False, "response": ""}, - "text_toxicity_extraction": {"guarded": False, "response": ""}, + "toxicity_detection": {"guarded": False, "response": ""}, "racial_bias_detection": {"guarded": False, "response": ""}, - "confidential_detection": {"guarded": False, "response": ""}, + "confidential_info_detection": {"guarded": False, "response": ""}, "intellectual_property": {"guarded": False, "response": ""}, "jailbreak_detection": {"guarded": False, "response": ""}, - "pii_fast": {"guarded": False, "response": ""}, + "pii": {"guarded": False, "response": ""}, "combined_response": "", } @@ -496,12 +496,12 @@ async def mock_autoalign_output_api(context: Optional[dict] = None, **kwargs): "guardrails_triggered": True, "gender_bias_detection": {"guarded": True, "response": ""}, "harm_detection": {"guarded": True, "response": ""}, - "text_toxicity_extraction": {"guarded": True, "response": ""}, + "toxicity_detection": {"guarded": True, "response": ""}, "racial_bias_detection": {"guarded": True, "response": ""}, - "confidential_detection": {"guarded": True, "response": ""}, + "confidential_info_detection": {"guarded": True, "response": ""}, "intellectual_property": {"guarded": True, "response": ""}, "jailbreak_detection": {"guarded": True, "response": ""}, - "pii_fast": {"guarded": True, "response": ""}, + "pii": {"guarded": True, "response": ""}, "combined_response": "", } else: @@ -509,12 +509,12 @@ async def mock_autoalign_output_api(context: Optional[dict] = None, **kwargs): "guardrails_triggered": False, "gender_bias_detection": {"guarded": False, "response": ""}, "harm_detection": {"guarded": False, "response": ""}, - "text_toxicity_extraction": {"guarded": False, "response": ""}, + "toxicity_detection": {"guarded": False, "response": ""}, "racial_bias_detection": {"guarded": False, "response": ""}, - "confidential_detection": {"guarded": False, "response": ""}, + "confidential_info_detection": {"guarded": False, "response": ""}, "intellectual_property": {"guarded": False, "response": ""}, "jailbreak_detection": {"guarded": False, "response": ""}, - "pii_fast": {"guarded": False, "response": ""}, + "pii": {"guarded": False, "response": ""}, "combined_response": "", } @@ -544,12 +544,12 @@ async def mock_autoalign_input_api(context: Optional[dict] = None, **kwargs): "guardrails_triggered": False, "gender_bias_detection": {"guarded": False, "response": ""}, "harm_detection": {"guarded": False, "response": ""}, - "text_toxicity_extraction": {"guarded": False, "response": ""}, + "toxicity_detection": {"guarded": False, "response": ""}, "racial_bias_detection": {"guarded": False, "response": ""}, - "confidential_detection": {"guarded": False, "response": ""}, + "confidential_info_detection": {"guarded": False, "response": ""}, "intellectual_property": {"guarded": False, "response": ""}, "jailbreak_detection": {"guarded": False, "response": ""}, - "pii_fast": { + "pii": { "guarded": False, "response": "", }, @@ -560,12 +560,12 @@ async def mock_autoalign_input_api(context: Optional[dict] = None, **kwargs): "guardrails_triggered": True, "gender_bias_detection": {"guarded": True, "response": ""}, "harm_detection": {"guarded": True, "response": ""}, - "text_toxicity_extraction": {"guarded": True, "response": ""}, + "toxicity_detection": {"guarded": True, "response": ""}, "racial_bias_detection": {"guarded": True, "response": ""}, - "confidential_detection": {"guarded": True, "response": ""}, + "confidential_info_detection": {"guarded": True, "response": ""}, "intellectual_property": {"guarded": True, "response": ""}, "jailbreak_detection": {"guarded": True, "response": ""}, - "pii_fast": {"guarded": True, "response": ""}, + "pii": {"guarded": True, "response": ""}, "combined_response": "", } @@ -579,12 +579,12 @@ async def mock_autoalign_output_api(context: Optional[dict] = None, **kwargs): "guardrails_triggered": False, "gender_bias_detection": {"guarded": False, "response": ""}, "harm_detection": {"guarded": False, "response": ""}, - "text_toxicity_extraction": {"guarded": False, "response": ""}, + "toxicity_detection": {"guarded": False, "response": ""}, "racial_bias_detection": {"guarded": False, "response": ""}, - "confidential_detection": {"guarded": False, "response": ""}, + "confidential_info_detection": {"guarded": False, "response": ""}, "intellectual_property": {"guarded": False, "response": ""}, "jailbreak_detection": {"guarded": False, "response": ""}, - "pii_fast": { + "pii": { "guarded": True, "response": "Pluto is a dwarf planet located in the outer regions of our solar system. It was " "discovered in [DATE] by [PROFESSION] [PERSON NAME]. Pluto was considered the ninth " @@ -601,12 +601,12 @@ async def mock_autoalign_output_api(context: Optional[dict] = None, **kwargs): "guardrails_triggered": False, "gender_bias_detection": {"guarded": False, "response": ""}, "harm_detection": {"guarded": False, "response": ""}, - "text_toxicity_extraction": {"guarded": False, "response": ""}, + "toxicity_detection": {"guarded": False, "response": ""}, "racial_bias_detection": {"guarded": False, "response": ""}, - "confidential_detection": {"guarded": False, "response": ""}, + "confidential_info_detection": {"guarded": False, "response": ""}, "intellectual_property": {"guarded": False, "response": ""}, "jailbreak_detection": {"guarded": False, "response": ""}, - "pii_fast": {"guarded": False, "response": ""}, + "pii": {"guarded": False, "response": ""}, "combined_response": "", } @@ -651,12 +651,12 @@ async def mock_autoalign_input_api(context: Optional[dict] = None, **kwargs): "guardrails_triggered": False, "gender_bias_detection": {"guarded": False, "response": ""}, "harm_detection": {"guarded": False, "response": ""}, - "text_toxicity_extraction": {"guarded": False, "response": ""}, + "toxicity_detection": {"guarded": False, "response": ""}, "racial_bias_detection": {"guarded": False, "response": ""}, - "confidential_detection": {"guarded": False, "response": ""}, + "confidential_info_detection": {"guarded": False, "response": ""}, "intellectual_property": {"guarded": False, "response": ""}, "jailbreak_detection": {"guarded": False, "response": ""}, - "pii_fast": { + "pii": { "guarded": False, "response": "", }, @@ -667,12 +667,12 @@ async def mock_autoalign_input_api(context: Optional[dict] = None, **kwargs): "guardrails_triggered": True, "gender_bias_detection": {"guarded": True, "response": ""}, "harm_detection": {"guarded": True, "response": ""}, - "text_toxicity_extraction": {"guarded": True, "response": ""}, + "toxicity_detection": {"guarded": True, "response": ""}, "racial_bias_detection": {"guarded": True, "response": ""}, - "confidential_detection": {"guarded": True, "response": ""}, + "confidential_info_detection": {"guarded": True, "response": ""}, "intellectual_property": {"guarded": True, "response": ""}, "jailbreak_detection": {"guarded": True, "response": ""}, - "pii_fast": {"guarded": True, "response": ""}, + "pii": {"guarded": True, "response": ""}, "combined_response": "", } @@ -695,12 +695,12 @@ async def mock_autoalign_output_api(context: Optional[dict] = None, **kwargs): "guardrails_triggered": False, "gender_bias_detection": {"guarded": False, "response": ""}, "harm_detection": {"guarded": False, "response": ""}, - "text_toxicity_extraction": {"guarded": False, "response": ""}, + "toxicity_detection": {"guarded": False, "response": ""}, "racial_bias_detection": {"guarded": False, "response": ""}, - "confidential_detection": {"guarded": False, "response": ""}, + "confidential_info_detection": {"guarded": False, "response": ""}, "intellectual_property": {"guarded": False, "response": ""}, "jailbreak_detection": {"guarded": False, "response": ""}, - "pii_fast": { + "pii": { "guarded": False, "response": "", }, @@ -711,12 +711,12 @@ async def mock_autoalign_output_api(context: Optional[dict] = None, **kwargs): "guardrails_triggered": True, "gender_bias_detection": {"guarded": True, "response": ""}, "harm_detection": {"guarded": True, "response": ""}, - "text_toxicity_extraction": {"guarded": True, "response": ""}, + "toxicity_detection": {"guarded": True, "response": ""}, "racial_bias_detection": {"guarded": True, "response": ""}, - "confidential_detection": {"guarded": True, "response": ""}, + "confidential_info_detection": {"guarded": True, "response": ""}, "intellectual_property": {"guarded": True, "response": ""}, "jailbreak_detection": {"guarded": True, "response": ""}, - "pii_fast": {"guarded": True, "response": ""}, + "pii": {"guarded": True, "response": ""}, "combined_response": "", } diff --git a/tests/test_autoalign_factcheck.py b/tests/test_autoalign_factcheck.py index abf13f066..bb5efbe46 100644 --- a/tests/test_autoalign_factcheck.py +++ b/tests/test_autoalign_factcheck.py @@ -27,7 +27,7 @@ def build_kb(): with open( - os.path.join(CONFIGS_FOLDER, "autoalign_factcheck", "kb", "kb.md"), "r" + os.path.join(CONFIGS_FOLDER, "autoalign_groundness", "kb", "kb.md"), "r" ) as f: content = f.readlines() @@ -49,8 +49,8 @@ async def retrieve_relevant_chunks(): @pytest.mark.asyncio -async def test_fact_checking_correct(httpx_mock): - config = RailsConfig.from_path(os.path.join(CONFIGS_FOLDER, "autoalign_factcheck")) +async def test_groundness_correct(httpx_mock): + config = RailsConfig.from_path(os.path.join(CONFIGS_FOLDER, "autoalign_groundness")) chat = TestChat( config, llm_completions=[ @@ -65,7 +65,7 @@ async def test_fact_checking_correct(httpx_mock): ], ) - async def mock_autoalign_factcheck_output_api( + async def mock_autoalign_groundedness_output_api( context: Optional[dict] = None, **kwargs ): query = context.get("bot_message") @@ -86,7 +86,7 @@ async def mock_autoalign_factcheck_output_api( chat.app.register_action(retrieve_relevant_chunks, "retrieve_relevant_chunks") chat.app.register_action( - mock_autoalign_factcheck_output_api, "autoalign_factcheck_output_api" + mock_autoalign_groundedness_output_api, "autoalign_groundedness_output_api" ) ( @@ -106,9 +106,9 @@ async def mock_autoalign_factcheck_output_api( @pytest.mark.asyncio -async def test_fact_checking_wrong(httpx_mock): +async def test_groundness_check_wrong(httpx_mock): # Test - Very low score - Not factual - config = RailsConfig.from_path(os.path.join(CONFIGS_FOLDER, "autoalign_factcheck")) + config = RailsConfig.from_path(os.path.join(CONFIGS_FOLDER, "autoalign_groundness")) chat = TestChat( config, llm_completions=[ @@ -122,7 +122,7 @@ async def test_fact_checking_wrong(httpx_mock): ], ) - async def mock_autoalign_factcheck_output_api( + async def mock_autoalign_groundedness_output_api( context: Optional[dict] = None, **kwargs ): query = context.get("bot_message") @@ -141,7 +141,7 @@ async def mock_autoalign_factcheck_output_api( chat.app.register_action(retrieve_relevant_chunks, "retrieve_relevant_chunks") chat.app.register_action( - mock_autoalign_factcheck_output_api, "autoalign_factcheck_output_api" + mock_autoalign_groundedness_output_api, "autoalign_groundedness_output_api" ) ( chat @@ -155,3 +155,30 @@ async def mock_autoalign_factcheck_output_api( "and 2012, respectively. These moons are much smaller than Charon and Pluto, but they are still " "significant in understanding the dynamics of the Pluto system. Isn't that fascinating?" ) + + +@pytest.mark.asyncio +async def test_factcheck(): + config = RailsConfig.from_path( + os.path.join(CONFIGS_FOLDER, "autoalign_factchecker") + ) + + chat = TestChat(config, llm_completions=["factually correct response"]) + + async def mock_autoalign_factcheck_output_api( + context: Optional[dict] = None, **kwargs + ): + user_prompt = context.get("user_message") + bot_response = context.get("bot_message") + + assert user_prompt == "mock user prompt" + assert bot_response == "factually correct response" + + return 1.0 + + chat.app.register_action( + mock_autoalign_factcheck_output_api, "autoalign_factcheck_output_api" + ) + + chat >> "mock user prompt" + await chat.bot_async("factually correct response") diff --git a/tests/test_configs/autoalign/config.yml b/tests/test_configs/autoalign/config.yml index 2cae44724..ad2214993 100644 --- a/tests/test_configs/autoalign/config.yml +++ b/tests/test_configs/autoalign/config.yml @@ -11,7 +11,7 @@ rails: input: guardrails_config: { - "pii_fast": + "pii": { "enabled_types": [ @@ -74,7 +74,7 @@ rails: "[RELIGION]": 0.5, }, }, - "confidential_detection": + "confidential_info_detection": { "matching_scores": { @@ -87,7 +87,7 @@ rails: }, "gender_bias_detection": { "matching_scores": { "score": 0.5 } }, "harm_detection": { "matching_scores": { "score": 0.5 } }, - "text_toxicity_extraction": { "matching_scores": { "score": 0.5 } }, + "toxicity_detection": { "matching_scores": { "score": 0.5 } }, "racial_bias_detection": { "matching_scores": @@ -115,7 +115,7 @@ rails: output: guardrails_config: { - "pii_fast": + "pii": { "enabled_types": [ @@ -179,7 +179,7 @@ rails: "[RELIGION]": 0.5, }, }, - "confidential_detection": + "confidential_info_detection": { "matching_scores": { @@ -192,7 +192,7 @@ rails: }, "gender_bias_detection": { "matching_scores": { "score": 0.5 } }, "harm_detection": { "matching_scores": { "score": 0.5 } }, - "text_toxicity_extraction": { "matching_scores": { "score": 0.5 } }, + "toxicity_detection": { "matching_scores": { "score": 0.5 } }, "racial_bias_detection": { "matching_scores": diff --git a/tests/test_configs/autoalign_factchecker/config.yml b/tests/test_configs/autoalign_factchecker/config.yml new file mode 100644 index 000000000..dc6e69f6c --- /dev/null +++ b/tests/test_configs/autoalign_factchecker/config.yml @@ -0,0 +1,38 @@ +models: + - type: main + engine: openai + model: gpt-3.5-turbo-instruct +rails: + config: + autoalign: + parameters: + fact_check_endpoint: "https://nvidia.autoalign.ai/guardrail" + output: + guardrails_config: + { + "fact_checker": { + "mode": "DETECT", + "knowledge_base": { + "num_urls": 5, + "web_content": true, + "max_tokens_per_chunk": 500, + "add_block_domains": [ + "facebook.com" + ], + "search_engine": "Google", + "static_knowledge": false + }, + "content_processor": { + "max_chunks_per_source": 15, + "use_all_chunks": false, + "filter_method": { + "name": "Match Threshold", + "threshold": 0.5 + } + }, + "mitigation_with_evidence": false + }, + } + output: + flows: + - autoalign factcheck output diff --git a/tests/test_configs/autoalign_factcheck/config.yml b/tests/test_configs/autoalign_groundness/config.yml similarity index 63% rename from tests/test_configs/autoalign_factcheck/config.yml rename to tests/test_configs/autoalign_groundness/config.yml index f7e0d4713..81f22c62a 100644 --- a/tests/test_configs/autoalign_factcheck/config.yml +++ b/tests/test_configs/autoalign_groundness/config.yml @@ -6,14 +6,14 @@ rails: config: autoalign: parameters: - fact_check_endpoint: "https://nvidia.autoalign.ai/factcheck" + groundedness_check_endpoint: "https://nvidia.autoalign.ai/groundedness_check" output: guardrails_config: { - "factcheck": { + "groundedness_checker": { "verify_response": false }, } output: flows: - - autoalign factcheck output + - autoalign groundedness output diff --git a/tests/test_configs/autoalign_factcheck/kb/kb.md b/tests/test_configs/autoalign_groundness/kb/kb.md similarity index 100% rename from tests/test_configs/autoalign_factcheck/kb/kb.md rename to tests/test_configs/autoalign_groundness/kb/kb.md diff --git a/tests/test_configs/autoalign_factcheck/rails/factcheck.co b/tests/test_configs/autoalign_groundness/rails/factcheck.co similarity index 100% rename from tests/test_configs/autoalign_factcheck/rails/factcheck.co rename to tests/test_configs/autoalign_groundness/rails/factcheck.co diff --git a/tests/test_configs/autoalign_factcheck/rails/general.co b/tests/test_configs/autoalign_groundness/rails/general.co similarity index 100% rename from tests/test_configs/autoalign_factcheck/rails/general.co rename to tests/test_configs/autoalign_groundness/rails/general.co