Skip to content

Commit 247fecd

Browse files
authored
fix: correct task name for self_check_facts (#1040)
* refactor: simplify test setup with llm_completions
1 parent 2776598 commit 247fecd

File tree

3 files changed

+38
-87
lines changed

3 files changed

+38
-87
lines changed

nemoguardrails/llm/types.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -46,5 +46,5 @@ class Task(Enum):
4646
"patronus_lynx_check_output_hallucination"
4747
)
4848

49-
SELF_CHECK_FACTS = "fact_checking"
49+
SELF_CHECK_FACTS = "self_check_facts"
5050
SELF_CHECK_HALLUCINATION = "self_check_hallucination"

tests/test_configs/fact_checking/config.yml

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
models:
22
- type: main
3-
engine: nemollm
4-
model: gpt-43b-002
3+
engine: openai
4+
model: gpt-3.5-instruct-turbo
55

66
rails:
77
config:

tests/test_fact_checking.py

+35-84
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020

2121
from nemoguardrails import RailsConfig
2222
from nemoguardrails.actions.actions import ActionResult, action
23+
from nemoguardrails.llm.providers.trtllm import llm
2324
from tests.constants import NEMO_API_URL_GPT_43B_002
2425
from tests.utils import TestChat
2526

@@ -50,20 +51,10 @@ async def retrieve_relevant_chunks():
5051
async def test_fact_checking_greeting(httpx_mock):
5152
# Test 1 - Greeting - No fact-checking invocation should happen
5253
config = RailsConfig.from_path(os.path.join(CONFIGS_FOLDER, "fact_checking"))
53-
chat = TestChat(config)
54-
chat.app.register_action(retrieve_relevant_chunks, "retrieve_relevant_chunks")
55-
56-
httpx_mock.add_response(
57-
method="POST",
58-
url=NEMO_API_URL_GPT_43B_002,
59-
json={"text": " express greeting"},
60-
)
61-
62-
httpx_mock.add_response(
63-
method="POST",
64-
url=NEMO_API_URL_GPT_43B_002,
65-
json={"text": "Hi! How can I assist today?"},
54+
chat = TestChat(
55+
config, llm_completions=[" express greeting", "Hi! How can I assist today?"]
6656
)
57+
chat.app.register_action(retrieve_relevant_chunks, "retrieve_relevant_chunks")
6758

6859
chat >> "hi"
6960
await chat.bot_async("Hi! How can I assist today?")
@@ -73,22 +64,14 @@ async def test_fact_checking_greeting(httpx_mock):
7364
async def test_fact_checking_correct(httpx_mock):
7465
# Test 2 - Factual statement - high alignscore
7566
config = RailsConfig.from_path(os.path.join(CONFIGS_FOLDER, "fact_checking"))
76-
chat = TestChat(config)
77-
chat.app.register_action(retrieve_relevant_chunks, "retrieve_relevant_chunks")
78-
79-
httpx_mock.add_response(
80-
method="POST",
81-
url=NEMO_API_URL_GPT_43B_002,
82-
json={"text": " ask about guardrails"},
83-
)
84-
85-
httpx_mock.add_response(
86-
method="POST",
87-
url=NEMO_API_URL_GPT_43B_002,
88-
json={
89-
"text": "NeMo Guardrails is an open-source toolkit for easily adding programmable guardrails to LLM-based conversational systems."
90-
},
67+
chat = TestChat(
68+
config,
69+
llm_completions=[
70+
" ask about guardrails",
71+
"NeMo Guardrails is an open-source toolkit for easily adding programmable guardrails to LLM-based conversational systems.",
72+
],
9173
)
74+
chat.app.register_action(retrieve_relevant_chunks, "retrieve_relevant_chunks")
9275

9376
with aioresponses() as m:
9477
# Fact-checking using AlignScore
@@ -109,22 +92,14 @@ async def test_fact_checking_correct(httpx_mock):
10992
async def test_fact_checking_wrong(httpx_mock):
11093
# Test 3 - Very low alignscore - Not factual
11194
config = RailsConfig.from_path(os.path.join(CONFIGS_FOLDER, "fact_checking"))
112-
chat = TestChat(config)
113-
chat.app.register_action(retrieve_relevant_chunks, "retrieve_relevant_chunks")
114-
115-
httpx_mock.add_response(
116-
method="POST",
117-
url=NEMO_API_URL_GPT_43B_002,
118-
json={"text": " ask about guardrails"},
119-
)
120-
121-
httpx_mock.add_response(
122-
method="POST",
123-
url=NEMO_API_URL_GPT_43B_002,
124-
json={
125-
"text": "NeMo Guardrails is a closed-source proprietary toolkit by Nvidia."
126-
},
95+
chat = TestChat(
96+
config,
97+
llm_completions=[
98+
" ask about guardrails",
99+
"NeMo Guardrails is a closed-source proprietary toolkit by Nvidia.",
100+
],
127101
)
102+
chat.app.register_action(retrieve_relevant_chunks, "retrieve_relevant_chunks")
128103

129104
with aioresponses() as m:
130105
# Fact-checking using AlignScore
@@ -179,28 +154,16 @@ async def test_fact_checking_uncertain(httpx_mock):
179154
async def test_fact_checking_fallback_to_self_check_correct(httpx_mock):
180155
# Test 4 - Factual statement - AlignScore endpoint not set up properly, use ask llm for fact-checking
181156
config = RailsConfig.from_path(os.path.join(CONFIGS_FOLDER, "fact_checking"))
182-
chat = TestChat(config)
183-
chat.app.register_action(retrieve_relevant_chunks, "retrieve_relevant_chunks")
184-
185-
httpx_mock.add_response(
186-
method="POST",
187-
url=NEMO_API_URL_GPT_43B_002,
188-
json={"text": " ask about guardrails"},
157+
chat = TestChat(
158+
config,
159+
llm_completions=[
160+
" ask about guardrails",
161+
"NeMo Guardrails is an open-source toolkit for easily adding programmable guardrails to LLM-based conversational systems.",
162+
"yes",
163+
],
189164
)
190165

191-
httpx_mock.add_response(
192-
method="POST",
193-
url=NEMO_API_URL_GPT_43B_002,
194-
json={
195-
"text": "NeMo Guardrails is an open-source toolkit for easily adding programmable guardrails to LLM-based conversational systems."
196-
},
197-
)
198-
199-
httpx_mock.add_response(
200-
method="POST",
201-
url=NEMO_API_URL_GPT_43B_002,
202-
json={"text": "yes"},
203-
)
166+
chat.app.register_action(retrieve_relevant_chunks, "retrieve_relevant_chunks")
204167

205168
with aioresponses() as m:
206169
# Fact-checking using AlignScore
@@ -219,28 +182,16 @@ async def test_fact_checking_fallback_to_self_check_correct(httpx_mock):
219182
async def test_fact_checking_fallback_self_check_wrong(httpx_mock):
220183
# Test 5 - Factual statement - AlignScore endpoint not set up properly, use ask llm for fact-checking
221184
config = RailsConfig.from_path(os.path.join(CONFIGS_FOLDER, "fact_checking"))
222-
chat = TestChat(config)
223-
chat.app.register_action(retrieve_relevant_chunks, "retrieve_relevant_chunks")
224-
225-
httpx_mock.add_response(
226-
method="POST",
227-
url=NEMO_API_URL_GPT_43B_002,
228-
json={"text": " ask about guardrails"},
229-
)
230-
231-
httpx_mock.add_response(
232-
method="POST",
233-
url=NEMO_API_URL_GPT_43B_002,
234-
json={
235-
"text": "NeMo Guardrails is an closed-source toolkit for easily adding programmable guardrails to LLM-based conversational systems."
236-
},
237-
)
238-
239-
httpx_mock.add_response(
240-
method="POST",
241-
url=NEMO_API_URL_GPT_43B_002,
242-
json={"text": "no"},
185+
chat = TestChat(
186+
config,
187+
llm_completions=[
188+
" ask about guardrails",
189+
"NeMo Guardrails is an closed-source toolkit for easily adding programmable guardrails to LLM-based conversational systems.",
190+
"no",
191+
"I don't know the answer to that.",
192+
],
243193
)
194+
chat.app.register_action(retrieve_relevant_chunks, "retrieve_relevant_chunks")
244195

245196
with aioresponses() as m:
246197
# Fact-checking using AlignScore

0 commit comments

Comments
 (0)