diff --git a/README.md b/README.md index b4b4161e..c202229b 100644 --- a/README.md +++ b/README.md @@ -60,7 +60,7 @@ yarn install `zk-regex` is a CLI to compile a user-defined regex to the corresponding regex circuit. It provides two commands: `raw` and `decomposed` -#### `zk-regex decomposed -d -c -t -g ` +#### `zk-regex decomposed -d -c -t -g -i ` This command generates a regex circom from a decomposed regex definition. For example, if you want to verify the regex of `email was meant for @(a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z)+.` and reveal alphabets after @, you can define the decomposed regex as follows. ``` @@ -86,7 +86,10 @@ You can generate its regex circom as follows. 1. Make the above json file at `./simple_regex_decomposed.json`. 2. Run `zk-regex decomposed -d ./simple_regex_decomposed.json -c ./simple_regex.circom -t SimpleRegex -g true`. It outputs a circom file at `./simple_regex.circom` that has a `SimpleRegex` template. -#### `zk-regex raw -r -s -c -t -g ` +> [!NOTE] +> If the `-i (--is_safe)` option is not explicitly set to true, the generated Circom template performs a less rigorous range check for each character of the input string, which may inadvertently allow excessively large values. However, this issue is **not critical**, as all inputs to the regex templates are text bytes assumed to be less than 255. When `is_safe` is set to true, the output Circom template includes an additional 9 constraints per character, ensuring a strict range check for each character. + +#### `zk-regex raw -r -s -c -t -g -i ` This command generates a regex circom from a raw string of the regex definition and a json file that defines state transitions in DFA to be revealed. For example, to verify the regex `1=(a|b) (2=(b|c)+ )+d` and reveal its alphabets, 1. Visualize DFA of the regex using [this website](https://zkregex.com). diff --git a/packages/circom/circuits/regex_helpers.circom b/packages/circom/circuits/regex_helpers.circom index c5a4eaa1..3c4f2bcf 100644 --- a/packages/circom/circuits/regex_helpers.circom +++ b/packages/circom/circuits/regex_helpers.circom @@ -71,4 +71,24 @@ template IsNotZeroAcc() { signal is_zero <== IsZero()(in); out <== acc + (1 - is_zero); +} + +template SemiSafeLessThan(n) { + assert(n <= 252); + signal input in[2]; + signal output out; + + component aInRange = Num2Bits(n); + aInRange.in <== in[0]; + + // In this project, in[1] is always 255. + // component bInRange = Num2Bits(n); + // bInRange.in <== in[1]; + + component lt = LessThan(n); + + lt.in[0] <== in[0]; + lt.in[1] <== in[1]; + + out <== lt.out; } \ No newline at end of file diff --git a/packages/compiler/src/bin/compiler.rs b/packages/compiler/src/bin/compiler.rs index ba53749b..e7c6dca8 100644 --- a/packages/compiler/src/bin/compiler.rs +++ b/packages/compiler/src/bin/compiler.rs @@ -19,6 +19,7 @@ //! - `-c, --circom-file-path `: File path for Circom output //! - `-t, --template-name `: Template name //! - `-g, --gen-substrs`: Generate substrings +//! - `-i, --is_safe``: Performs rigorous checks on the range of each character in the input string, adding 9 additional constraints per character //! //! Example: //! ``` @@ -39,11 +40,18 @@ //! - `-c, --circom-file-path `: File path for Circom output //! - `-t, --template-name `: Template name //! - `-g, --gen-substrs`: Generate substrings +//! - `-i, --is_safe``: Performs rigorous checks on the range of each character in the input string, adding 9 additional constraints per character //! //! Example: //! ``` //! zk-regex raw -r "a*b+c?" -s substrings.json -h ./halo2_output -c ./circom_output.circom -t MyTemplate -g true //! ``` +//! +//! ## Note +//! The `-i (--is_safe)` option controls the rigor of range checks for input characters: +//! - If not set to `true`, the generated Circom template uses less rigorous range checks, which may allow excessively large values. +//! - This is usually not critical, as input text bytes are assumed to be less than 255. +//! - When `is_safe` is `true`, the template adds 9 extra constraints per character, ensuring strict range checks. use clap::{Parser, Subcommand}; use zk_regex_compiler::{gen_from_decomposed, gen_from_raw}; @@ -68,6 +76,8 @@ enum Commands { template_name: Option, #[arg(short, long)] gen_substrs: Option, + #[arg(short, long)] + is_safe: Option, }, Raw { #[arg(short, long)] @@ -82,6 +92,8 @@ enum Commands { template_name: Option, #[arg(short, long)] gen_substrs: Option, + #[arg(short, long)] + is_safe: Option, }, } @@ -100,6 +112,7 @@ fn process_decomposed(cli: Cli) { circom_file_path, template_name, gen_substrs, + is_safe, } = cli.command { if let Err(e) = gen_from_decomposed( @@ -108,6 +121,7 @@ fn process_decomposed(cli: Cli) { circom_file_path.as_deref(), template_name.as_deref(), gen_substrs, + is_safe, ) { eprintln!("Error: {}", e); std::process::exit(1); @@ -123,6 +137,7 @@ fn process_raw(cli: Cli) { circom_file_path, template_name, gen_substrs, + is_safe, } = cli.command { if let Err(e) = gen_from_raw( @@ -132,6 +147,7 @@ fn process_raw(cli: Cli) { circom_file_path.as_deref(), template_name.as_deref(), gen_substrs, + is_safe, ) { eprintln!("Error: {}", e); std::process::exit(1); diff --git a/packages/compiler/src/circom.rs b/packages/compiler/src/circom.rs index f7d319fa..25d5a9e2 100644 --- a/packages/compiler/src/circom.rs +++ b/packages/compiler/src/circom.rs @@ -570,6 +570,7 @@ fn generate_declarations( and_i: usize, multi_or_i: usize, end_anchor: bool, + is_safe: bool, ) -> Vec { let mut declarations = vec![ "pragma circom 2.1.5;\n".to_string(), @@ -587,7 +588,15 @@ fn generate_declarations( "\tsignal in_range_checks[msg_bytes];".to_string(), "\tin[0]<==255;".to_string(), "\tfor (var i = 0; i < msg_bytes; i++) {".to_string(), - "\t\tin_range_checks[i] <== LessThan(8)([msg[i], 255]);".to_string(), + format!( + "\t\tin_range_checks[i] <== {}(8)([msg[i], 255]);", + if is_safe { + "SemiSafeLessThan" + } else { + "LessThan" + } + ) + .to_string(), "\t\tin_range_checks[i] === 1;".to_string(), "\t\tin[i+1] <== msg[i];".to_string(), "\t}".to_string(), @@ -734,6 +743,7 @@ fn gen_circom_allstr( template_name: &str, regex_str: &str, end_anchor: bool, + is_safe: bool, ) -> Result { let state_len = dfa_graph.states.len(); @@ -751,6 +761,7 @@ fn gen_circom_allstr( and_i, multi_or_i, end_anchor, + is_safe, ); let init_code = generate_init_code(state_len); @@ -966,12 +977,14 @@ pub(crate) fn gen_circom_template( circom_path: &Path, template_name: &str, gen_substrs: bool, + is_safe: bool, ) -> Result<(), CompilerError> { let circom = gen_circom_allstr( ®ex_and_dfa.dfa, template_name, ®ex_and_dfa.regex_pattern, regex_and_dfa.has_end_anchor, + is_safe, )?; let mut file = File::create(circom_path)?; @@ -1001,12 +1014,14 @@ pub(crate) fn gen_circom_template( pub(crate) fn gen_circom_string( regex_and_dfa: &RegexAndDFA, template_name: &str, + is_safe: bool, ) -> Result { let circom = gen_circom_allstr( ®ex_and_dfa.dfa, template_name, ®ex_and_dfa.regex_pattern, regex_and_dfa.has_end_anchor, + is_safe, )?; let substrs = add_substrs_constraints(regex_and_dfa)?; let result = circom + &substrs; diff --git a/packages/compiler/src/lib.rs b/packages/compiler/src/lib.rs index 2767c52e..4a9b5b6e 100644 --- a/packages/compiler/src/lib.rs +++ b/packages/compiler/src/lib.rs @@ -59,6 +59,7 @@ fn generate_outputs( circom_template_name: Option<&str>, num_public_parts: usize, gen_substrs: bool, + is_safe: bool, ) -> Result<(), CompilerError> { if let Some(halo2_dir_path) = halo2_dir_path { let halo2_dir_path = PathBuf::from(halo2_dir_path); @@ -85,6 +86,7 @@ fn generate_outputs( &circom_file_path, &circom_template_name, gen_substrs, + is_safe, )?; } @@ -110,10 +112,12 @@ pub fn gen_from_decomposed( circom_file_path: Option<&str>, circom_template_name: Option<&str>, gen_substrs: Option, + is_safe: Option, ) -> Result<(), CompilerError> { let mut decomposed_regex_config: DecomposedRegexConfig = serde_json::from_reader(File::open(decomposed_regex_path)?)?; let gen_substrs = gen_substrs.unwrap_or(false); + let is_safe = is_safe.unwrap_or(false); let regex_and_dfa = get_regex_and_dfa(&mut decomposed_regex_config)?; @@ -130,6 +134,7 @@ pub fn gen_from_decomposed( circom_template_name, num_public_parts, gen_substrs, + is_safe, )?; Ok(()) @@ -156,6 +161,7 @@ pub fn gen_from_raw( circom_file_path: Option<&str>, template_name: Option<&str>, gen_substrs: Option, + is_safe: Option, ) -> Result<(), CompilerError> { let substrs_defs_json = load_substring_definitions_json(substrs_json_path)?; let num_public_parts = substrs_defs_json.transitions.len(); @@ -163,6 +169,7 @@ pub fn gen_from_raw( let regex_and_dfa = create_regex_and_dfa_from_str_and_defs(raw_regex, substrs_defs_json)?; let gen_substrs = gen_substrs.unwrap_or(true); + let is_safe = is_safe.unwrap_or(false); generate_outputs( ®ex_and_dfa, @@ -171,6 +178,7 @@ pub fn gen_from_raw( template_name, num_public_parts, gen_substrs, + is_safe, )?; Ok(()) @@ -193,8 +201,10 @@ pub fn gen_circom_from_decomposed_regex( circom_file_path: Option<&str>, circom_template_name: Option<&str>, gen_substrs: Option, + is_safe: Option, ) -> Result<(), CompilerError> { let gen_substrs = gen_substrs.unwrap_or(false); + let is_safe = is_safe.unwrap_or(false); let regex_and_dfa = get_regex_and_dfa(decomposed_regex)?; @@ -211,6 +221,7 @@ pub fn gen_circom_from_decomposed_regex( circom_template_name, num_public_parts, gen_substrs, + is_safe, )?; Ok(()) diff --git a/packages/compiler/src/wasm.rs b/packages/compiler/src/wasm.rs index 97ed29f1..2cbea5f0 100644 --- a/packages/compiler/src/wasm.rs +++ b/packages/compiler/src/wasm.rs @@ -11,6 +11,7 @@ use self::circom::gen_circom_string; pub fn genFromDecomposed( decomposedRegexJson: &str, circomTemplateName: &str, + is_safe: bool, ) -> Result { let mut decomposed_regex_config: DecomposedRegexConfig = serde_json::from_str(decomposedRegexJson).map_err(|e| { @@ -24,18 +25,24 @@ pub fn genFromDecomposed( )) })?; - gen_circom_string(®ex_and_dfa, circomTemplateName) + gen_circom_string(®ex_and_dfa, circomTemplateName, is_safe) .map_err(|e| JsValue::from_str(&format!("Failed to generate Circom string: {}", e))) } #[wasm_bindgen] #[allow(non_snake_case)] -pub fn genFromRaw(rawRegex: &str, substrsJson: &str, circomTemplateName: &str) -> String { +pub fn genFromRaw( + rawRegex: &str, + substrsJson: &str, + circomTemplateName: &str, + is_safe: bool, +) -> String { let substrs_defs_json: SubstringDefinitionsJson = serde_json::from_str(substrsJson).expect("failed to parse substrs json"); let regex_and_dfa = create_regex_and_dfa_from_str_and_defs(rawRegex, substrs_defs_json) .expect("failed to convert the raw regex and state transitions to dfa"); - gen_circom_string(®ex_and_dfa, circomTemplateName).expect("failed to generate circom") + gen_circom_string(®ex_and_dfa, circomTemplateName, is_safe) + .expect("failed to generate circom") } #[wasm_bindgen] @@ -52,10 +59,11 @@ pub fn genRegexAndDfa(decomposedRegex: JsValue) -> JsValue { #[wasm_bindgen] #[allow(non_snake_case)] -pub fn genCircom(decomposedRegex: JsValue, circomTemplateName: &str) -> String { +pub fn genCircom(decomposedRegex: JsValue, circomTemplateName: &str, is_safe: bool) -> String { let mut decomposed_regex_config: DecomposedRegexConfig = from_value(decomposedRegex).expect("failed to parse decomposed regex"); let regex_and_dfa = get_regex_and_dfa(&mut decomposed_regex_config) .expect("failed to convert the decomposed regex to dfa"); - gen_circom_string(®ex_and_dfa, circomTemplateName).expect("failed to generate circom") + gen_circom_string(®ex_and_dfa, circomTemplateName, is_safe) + .expect("failed to generate circom") }