fix: correct task name for self_check_facts (#1040)

Pouyanpi · web-flow · commit 247fecdf2f67 · 2025-03-31T14:44:14.000+02:00
* refactor: simplify test setup with llm_completions
diff --git a/nemoguardrails/llm/types.py b/nemoguardrails/llm/types.py
@@ -46,5 +46,5 @@ class Task(Enum):
         "patronus_lynx_check_output_hallucination"
     )
 
-    SELF_CHECK_FACTS = "fact_checking"
+    SELF_CHECK_FACTS = "self_check_facts"
     SELF_CHECK_HALLUCINATION = "self_check_hallucination"
diff --git a/tests/test_configs/fact_checking/config.yml b/tests/test_configs/fact_checking/config.yml
@@ -1,7 +1,7 @@
 models:
   - type: main
-    engine: nemollm
-    model: gpt-43b-002
+    engine: openai
+    model: gpt-3.5-instruct-turbo
 
 rails:
   config:
diff --git a/tests/test_fact_checking.py b/tests/test_fact_checking.py
@@ -20,6 +20,7 @@
 
 from nemoguardrails import RailsConfig
 from nemoguardrails.actions.actions import ActionResult, action
+from nemoguardrails.llm.providers.trtllm import llm
 from tests.constants import NEMO_API_URL_GPT_43B_002
 from tests.utils import TestChat
 
@@ -50,20 +51,10 @@ async def retrieve_relevant_chunks():
 async def test_fact_checking_greeting(httpx_mock):
     # Test 1 - Greeting - No fact-checking invocation should happen
     config = RailsConfig.from_path(os.path.join(CONFIGS_FOLDER, "fact_checking"))
-    chat = TestChat(config)
-    chat.app.register_action(retrieve_relevant_chunks, "retrieve_relevant_chunks")
-
-    httpx_mock.add_response(
-        method="POST",
-        url=NEMO_API_URL_GPT_43B_002,
-        json={"text": "  express greeting"},
-    )
-
-    httpx_mock.add_response(
-        method="POST",
-        url=NEMO_API_URL_GPT_43B_002,
-        json={"text": "Hi! How can I assist today?"},
+    chat = TestChat(
+        config, llm_completions=["  express greeting", "Hi! How can I assist today?"]
     )
+    chat.app.register_action(retrieve_relevant_chunks, "retrieve_relevant_chunks")
 
     chat >> "hi"
     await chat.bot_async("Hi! How can I assist today?")
@@ -73,22 +64,14 @@ async def test_fact_checking_greeting(httpx_mock):
 async def test_fact_checking_correct(httpx_mock):
     # Test 2 - Factual statement - high alignscore
     config = RailsConfig.from_path(os.path.join(CONFIGS_FOLDER, "fact_checking"))
-    chat = TestChat(config)
-    chat.app.register_action(retrieve_relevant_chunks, "retrieve_relevant_chunks")
-
-    httpx_mock.add_response(
-        method="POST",
-        url=NEMO_API_URL_GPT_43B_002,
-        json={"text": "  ask about guardrails"},
-    )
-
-    httpx_mock.add_response(
-        method="POST",
-        url=NEMO_API_URL_GPT_43B_002,
-        json={
-            "text": "NeMo Guardrails is an open-source toolkit for easily adding programmable guardrails to LLM-based conversational systems."
-        },
+    chat = TestChat(
+        config,
+        llm_completions=[
+            "  ask about guardrails",
+            "NeMo Guardrails is an open-source toolkit for easily adding programmable guardrails to LLM-based conversational systems.",
+        ],
     )
+    chat.app.register_action(retrieve_relevant_chunks, "retrieve_relevant_chunks")
 
     with aioresponses() as m:
         # Fact-checking using AlignScore
@@ -109,22 +92,14 @@ async def test_fact_checking_correct(httpx_mock):
 async def test_fact_checking_wrong(httpx_mock):
     # Test 3 - Very low alignscore - Not factual
     config = RailsConfig.from_path(os.path.join(CONFIGS_FOLDER, "fact_checking"))
-    chat = TestChat(config)
-    chat.app.register_action(retrieve_relevant_chunks, "retrieve_relevant_chunks")
-
-    httpx_mock.add_response(
-        method="POST",
-        url=NEMO_API_URL_GPT_43B_002,
-        json={"text": "  ask about guardrails"},
-    )
-
-    httpx_mock.add_response(
-        method="POST",
-        url=NEMO_API_URL_GPT_43B_002,
-        json={
-            "text": "NeMo Guardrails is a closed-source proprietary toolkit by Nvidia."
-        },
+    chat = TestChat(
+        config,
+        llm_completions=[
+            "  ask about guardrails",
+            "NeMo Guardrails is a closed-source proprietary toolkit by Nvidia.",
+        ],
     )
+    chat.app.register_action(retrieve_relevant_chunks, "retrieve_relevant_chunks")
 
     with aioresponses() as m:
         # Fact-checking using AlignScore
@@ -179,28 +154,16 @@ async def test_fact_checking_uncertain(httpx_mock):
 async def test_fact_checking_fallback_to_self_check_correct(httpx_mock):
     # Test 4 - Factual statement - AlignScore endpoint not set up properly, use ask llm for fact-checking
     config = RailsConfig.from_path(os.path.join(CONFIGS_FOLDER, "fact_checking"))
-    chat = TestChat(config)
-    chat.app.register_action(retrieve_relevant_chunks, "retrieve_relevant_chunks")
-
-    httpx_mock.add_response(
-        method="POST",
-        url=NEMO_API_URL_GPT_43B_002,
-        json={"text": "  ask about guardrails"},
+    chat = TestChat(
+        config,
+        llm_completions=[
+            "  ask about guardrails",
+            "NeMo Guardrails is an open-source toolkit for easily adding programmable guardrails to LLM-based conversational systems.",
+            "yes",
+        ],
     )
 
-    httpx_mock.add_response(
-        method="POST",
-        url=NEMO_API_URL_GPT_43B_002,
-        json={
-            "text": "NeMo Guardrails is an open-source toolkit for easily adding programmable guardrails to LLM-based conversational systems."
-        },
-    )
-
-    httpx_mock.add_response(
-        method="POST",
-        url=NEMO_API_URL_GPT_43B_002,
-        json={"text": "yes"},
-    )
+    chat.app.register_action(retrieve_relevant_chunks, "retrieve_relevant_chunks")
 
     with aioresponses() as m:
         # Fact-checking using AlignScore
@@ -219,28 +182,16 @@ async def test_fact_checking_fallback_to_self_check_correct(httpx_mock):
 async def test_fact_checking_fallback_self_check_wrong(httpx_mock):
     # Test 5 - Factual statement - AlignScore endpoint not set up properly, use ask llm for fact-checking
     config = RailsConfig.from_path(os.path.join(CONFIGS_FOLDER, "fact_checking"))
-    chat = TestChat(config)
-    chat.app.register_action(retrieve_relevant_chunks, "retrieve_relevant_chunks")
-
-    httpx_mock.add_response(
-        method="POST",
-        url=NEMO_API_URL_GPT_43B_002,
-        json={"text": "  ask about guardrails"},
-    )
-
-    httpx_mock.add_response(
-        method="POST",
-        url=NEMO_API_URL_GPT_43B_002,
-        json={
-            "text": "NeMo Guardrails is an closed-source toolkit for easily adding programmable guardrails to LLM-based conversational systems."
-        },
-    )
-
-    httpx_mock.add_response(
-        method="POST",
-        url=NEMO_API_URL_GPT_43B_002,
-        json={"text": "no"},
+    chat = TestChat(
+        config,
+        llm_completions=[
+            "  ask about guardrails",
+            "NeMo Guardrails is an closed-source toolkit for easily adding programmable guardrails to LLM-based conversational systems.",
+            "no",
+            "I don't know the answer to that.",
+        ],
     )
+    chat.app.register_action(retrieve_relevant_chunks, "retrieve_relevant_chunks")
 
     with aioresponses() as m:
         # Fact-checking using AlignScore

Original file line number	Diff line number	Diff line change
`@@ -46,5 +46,5 @@ class Task(Enum):`
`46`	`46`	`"patronus_lynx_check_output_hallucination"`
`47`	`47`	`)`
`48`	`48`
`49`		`- SELF_CHECK_FACTS = "fact_checking"`
	`49`	`+ SELF_CHECK_FACTS = "self_check_facts"`
`50`	`50`	`SELF_CHECK_HALLUCINATION = "self_check_hallucination"`