Skip to content

Commit 4bf82d4

Browse files
authored
[V1] Add regex structured output support with xgrammar (vllm-project#14590)
Signed-off-by: Russell Bryant <[email protected]>
1 parent 9ab3267 commit 4bf82d4

File tree

4 files changed

+25
-19
lines changed

4 files changed

+25
-19
lines changed

requirements/common.txt

+2-2
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ tiktoken >= 0.6.0 # Required for DBRX tokenizer
1919
lm-format-enforcer >= 0.10.11, < 0.11
2020
outlines == 0.1.11
2121
lark == 1.2.2
22-
xgrammar == 0.1.11; platform_machine == "x86_64"
22+
xgrammar == 0.1.15; platform_machine == "x86_64" or platform_machine == "aarch64"
2323
typing_extensions >= 4.10
2424
filelock >= 3.16.1 # need to contain https://github.com/tox-dev/filelock/pull/317
2525
partial-json-parser # used for parsing partial JSON outputs
@@ -37,4 +37,4 @@ depyf==0.18.0 # required for profiling and debugging with compilation config
3737
cloudpickle # allows pickling lambda functions in model_executor/models/registry.py
3838
watchfiles # required for http server to monitor the updates of TLS files
3939
python-json-logger # Used by logging as per examples/other/logging_configuration.md
40-
scipy # Required for phi-4-multimodal-instruct
40+
scipy # Required for phi-4-multimodal-instruct

tests/v1/entrypoints/llm/test_struct_output_generate.py

+16-16
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
# SPDX-License-Identifier: Apache-2.0
22

33
import json
4+
import re
45

56
import jsonschema
67
import pytest
@@ -219,25 +220,24 @@ def test_guided_regex(monkeypatch, sample_regex, guided_decoding_backend: str):
219220
guided_decoding=GuidedDecodingParams(
220221
regex=sample_regex,
221222
backend=guided_decoding_backend))
222-
with pytest.raises(ValueError,
223-
match="Regex guided decoding is not supported."):
224-
llm.generate(prompts=[
223+
outputs = llm.generate(
224+
prompts=[
225225
f"Give an example IPv4 address with this regex: {sample_regex}"
226226
] * 2,
227-
sampling_params=sampling_params,
228-
use_tqdm=True)
227+
sampling_params=sampling_params,
228+
use_tqdm=True,
229+
)
229230

230-
# Once regex is supported --
231-
#assert outputs is not None
232-
#for output in outputs:
233-
# assert output is not None
234-
# assert isinstance(output, RequestOutput)
235-
# prompt = output.prompt
236-
# generated_text = output.outputs[0].text
237-
# print(generated_text)
238-
# assert generated_text is not None
239-
# assert re.fullmatch(sample_regex, generated_text) is not None
240-
# print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}")
231+
assert outputs is not None
232+
for output in outputs:
233+
assert output is not None
234+
assert isinstance(output, RequestOutput)
235+
prompt = output.prompt
236+
generated_text = output.outputs[0].text
237+
print(generated_text)
238+
assert generated_text is not None
239+
assert re.fullmatch(sample_regex, generated_text) is not None
240+
print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}")
241241

242242

243243
@pytest.mark.skip_global_cleanup

vllm/v1/structured_output/__init__.py

+2
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,8 @@ def initialize_grammar(self, key: StructuredOutputKey) -> Grammar:
112112
ctx = self.compiler.compile_builtin_json_grammar()
113113
elif request_type == StructuredOutputOptions.GRAMMAR:
114114
ctx = self.compiler.compile_grammar(grammar_spec)
115+
elif request_type == StructuredOutputOptions.REGEX:
116+
ctx = self.compiler.compile_regex(grammar_spec)
115117
else:
116118
logger.error("Validation should have already occurred. "
117119
"Please file an issue.")

vllm/v1/structured_output/utils.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -251,7 +251,11 @@ def validate_structured_output_request(
251251
gd_params = sampling_params.guided_decoding
252252

253253
if gd_params.regex:
254-
raise ValueError("Regex structured output is not supported.")
254+
try:
255+
xgr.Grammar.from_regex(gd_params.regex)
256+
except Exception as err:
257+
raise ValueError("Failed to transform regex into a grammar: "
258+
f"{err}") from err
255259

256260
if gd_params.choice:
257261
choice_grammar = choice_as_grammar(gd_params.choice)

0 commit comments

Comments
 (0)