Skip to content

Commit 29d589e

Browse files
authored
Merge pull request #39 from zkemail/rmacedo/update-regex-to-mindfa
Rmacedo/update regex to mindfa
2 parents ba399bc + 5b93fca commit 29d589e

21 files changed

+2137
-1668
lines changed

README.md

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ This library provides circom circuits that enables you to prove that
1010
- the input string satisfies regular expressions (regexes) specified in the chip.
1111
- the substrings are correctly extracted from the input string according to substring definitions.
1212

13-
This is a JS/Rust adaptation of the Python regex-to-circom work done by [sampriti](https://github.com/sampritipanda/) and [yush_g](https://twitter.com/yush_g) at https://www.zkregex.com
13+
This is a JS/Rust adaptation of the Python regex-to-circom work done by [sampriti](https://github.com/sampritipanda/) and [yush_g](https://twitter.com/yush_g), along with [sorasue](https://github.com/SoraSuegami/)'s decomposed specifications. You can generate your own regexes via our no-code tool at https://www.zkregex.com
1414

1515
In addition to the original work, this library also supports the following features:
1616
- CLI to dynamically generate regex circuit based on regex argument
@@ -22,6 +22,7 @@ You can define a regex to be proved and its substring patterns to be revealed.
2222
Specifically, there are two ways to define them:
2323
1. (manual way) converting the regex into an equivalent determistic finite automaton (DFA), selecting state transitions for each substring pattern, and writing the transitions in a json file.
2424
2. (automatic way) writing a decomposed version of the regex in a json file with specifying which part of the regex is revealed.
25+
3. (no code way) put the regex into zkregex.com > tool, highlight your chosen part, and copy the generated circuit
2526
While the manual way supports more kinds of regexes than the automatic way, the latter is easier and sufficient for most regexes.
2627

2728
### Theory
@@ -58,7 +59,7 @@ For example, if you want to verify the regex of `email was meant for @(a|b|c|d|e
5859
},
5960
{
6061
"is_public": true,
61-
"regex_def": "(a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z)+"
62+
"regex_def": "[a-z]+"
6263
},
6364
{
6465
"is_public": false,
@@ -75,7 +76,7 @@ You can generate its regex circom as follows.
7576
#### `zk-regex raw -r <RAW_REGEX> -s <SUBSTRS_JSON_PATH> -c <CIRCOM_FILE_PATH> -t <TEMPLATE_NAME> -g <GEN_SUBSTRS (true/false)>`
7677
This command generates a regex circom from a raw string of the regex definition and a json file that defines state transitions in DFA to be revealed.
7778
For example, to verify the regex `1=(a|b) (2=(b|c)+ )+d` and reveal its alphabets,
78-
1. Visualize DFA of the regex using [this website](https://mindfa.onrender.com/min_dfa).
79+
1. Visualize DFA of the regex using [this website](https://zkregex.com).
7980
2. Find state transitions matching with the substrings to be revealed. In this case, they are `2->3` for the alphabets after `1=`, `6->7` and `7->7` for those after `2=`, and `8->9` for `d`.
8081
3. Make a json file at `./simple_regex_substrs.json` that defines the state transitions. For example,
8182
```

package.json

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,10 @@
88
"packages/*"
99
],
1010
"contributors": [
11-
"Javier Su <[email protected]>",
12-
"Kata Choi <[email protected]>",
1311
"Sora Suegami <[email protected]>",
14-
"Yush G <[email protected]>"
12+
"Yush G <[email protected]>",
13+
"Javier Su <[email protected]>",
14+
"Kata Choi <[email protected]>"
1515
],
1616
"scripts": {
1717
"install": "yarn workspaces -pt run install",

packages/circom/circuits/common/email_addr_regex.circom

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ pragma circom 2.1.5;
22

33
include "@zk-email/zk-regex-circom/circuits/regex_helpers.circom";
44

5+
// regex: (a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z|A|B|C|D|E|F|G|H|I|J|K|L|M|N|O|P|Q|R|S|T|U|V|W|X|Y|Z|0|1|2|3|4|5|6|7|8|9|!|#|$|%|&|'|\*|\+|-|/|=|\?|^|_|`|{|\||}|~|\.)+@(a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z|A|B|C|D|E|F|G|H|I|J|K|L|M|N|O|P|Q|R|S|T|U|V|W|X|Y|Z|0|1|2|3|4|5|6|7|8|9|\.|-)+
56
template EmailAddrRegex(msg_bytes) {
67
signal input msg[msg_bytes];
78
signal output out;
@@ -248,6 +249,7 @@ template EmailAddrRegex(msg_bytes) {
248249
is_consecutive[msg_bytes-1-i][0] <== states[num_bytes-i][3] * (1 - is_consecutive[msg_bytes-i][1]) + is_consecutive[msg_bytes-i][1];
249250
is_consecutive[msg_bytes-1-i][1] <== state_changed[msg_bytes-i].out * is_consecutive[msg_bytes-1-i][0];
250251
}
252+
// substrings calculated: [{(1, 2), (1, 1), (0, 1), (3, 3), (2, 3)}]
251253
signal is_substr0[msg_bytes][6];
252254
signal is_reveal0[msg_bytes];
253255
signal output reveal0[msg_bytes];

packages/circom/circuits/common/email_domain_regex.circom

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ pragma circom 2.1.5;
22

33
include "@zk-email/zk-regex-circom/circuits/regex_helpers.circom";
44

5+
// regex: (a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z|A|B|C|D|E|F|G|H|I|J|K|L|M|N|O|P|Q|R|S|T|U|V|W|X|Y|Z|0|1|2|3|4|5|6|7|8|9|!|#|$|%|&|'|\*|\+|-|/|=|\?|^|_|`|{|\||}|~|\.)+@(a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z|A|B|C|D|E|F|G|H|I|J|K|L|M|N|O|P|Q|R|S|T|U|V|W|X|Y|Z|0|1|2|3|4|5|6|7|8|9|\.|-)+
56
template EmailDomainRegex(msg_bytes) {
67
signal input msg[msg_bytes];
78
signal output out;
@@ -248,6 +249,7 @@ template EmailDomainRegex(msg_bytes) {
248249
is_consecutive[msg_bytes-1-i][0] <== states[num_bytes-i][3] * (1 - is_consecutive[msg_bytes-i][1]) + is_consecutive[msg_bytes-i][1];
249250
is_consecutive[msg_bytes-1-i][1] <== state_changed[msg_bytes-i].out * is_consecutive[msg_bytes-1-i][0];
250251
}
252+
// substrings calculated: [{(3, 3), (2, 3)}]
251253
signal is_substr0[msg_bytes][3];
252254
signal is_reveal0[msg_bytes];
253255
signal output reveal0[msg_bytes];

packages/circom/circuits/common/message_id_regex.circom

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ pragma circom 2.1.5;
22

33
include "@zk-email/zk-regex-circom/circuits/regex_helpers.circom";
44

5+
// regex: ((\n)|^)message-id:<(=|@|\.|\+|_|-|a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z|A|B|C|D|E|F|G|H|I|J|K|L|M|N|O|P|Q|R|S|T|U|V|W|X|Y|Z|0|1|2|3|4|5|6|7|8|9)+>\n
56
template MessageIdRegex(msg_bytes) {
67
signal input msg[msg_bytes];
78
signal output out;
@@ -270,6 +271,7 @@ template MessageIdRegex(msg_bytes) {
270271
is_consecutive[msg_bytes-1-i][0] <== states[num_bytes-i][7] * (1 - is_consecutive[msg_bytes-i][1]) + is_consecutive[msg_bytes-i][1];
271272
is_consecutive[msg_bytes-1-i][1] <== state_changed[msg_bytes-i].out * is_consecutive[msg_bytes-1-i][0];
272273
}
274+
// substrings calculated: [{(17, 18), (1, 4), (1, 1), (18, 1)}]
273275
signal is_substr0[msg_bytes][5];
274276
signal is_reveal0[msg_bytes];
275277
signal output reveal0[msg_bytes];
Lines changed: 288 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,288 @@
1+
pragma circom 2.1.5;
2+
3+
include "@zk-email/zk-regex-circom/circuits/regex_helpers.circom";
4+
5+
// regex: email was meant for @(a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z|A|B|C|D|E|F|G|H|I|J|K|L|M|N|O|P|Q|R|S|T|U|V|W|X|Y|Z|0|1|2|3|4|5|6|7|8|9|_)+
6+
template SimpleRegexDecomposed(msg_bytes) {
7+
signal input msg[msg_bytes];
8+
signal output out;
9+
10+
var num_bytes = msg_bytes+1;
11+
signal in[num_bytes];
12+
in[0]<==255;
13+
for (var i = 0; i < msg_bytes; i++) {
14+
in[i+1] <== msg[i];
15+
}
16+
17+
component eq[26][num_bytes];
18+
component lt[4][num_bytes];
19+
component and[26][num_bytes];
20+
component multi_or[2][num_bytes];
21+
signal states[num_bytes+1][24];
22+
component state_changed[num_bytes];
23+
24+
states[0][0] <== 1;
25+
for (var i = 1; i < 24; i++) {
26+
states[0][i] <== 0;
27+
}
28+
29+
for (var i = 0; i < num_bytes; i++) {
30+
state_changed[i] = MultiOR(23);
31+
lt[0][i] = LessEqThan(8);
32+
lt[0][i].in[0] <== 65;
33+
lt[0][i].in[1] <== in[i];
34+
lt[1][i] = LessEqThan(8);
35+
lt[1][i].in[0] <== in[i];
36+
lt[1][i].in[1] <== 90;
37+
and[0][i] = AND();
38+
and[0][i].a <== lt[0][i].out;
39+
and[0][i].b <== lt[1][i].out;
40+
lt[2][i] = LessEqThan(8);
41+
lt[2][i].in[0] <== 97;
42+
lt[2][i].in[1] <== in[i];
43+
lt[3][i] = LessEqThan(8);
44+
lt[3][i].in[0] <== in[i];
45+
lt[3][i].in[1] <== 122;
46+
and[1][i] = AND();
47+
and[1][i].a <== lt[2][i].out;
48+
and[1][i].b <== lt[3][i].out;
49+
eq[0][i] = IsEqual();
50+
eq[0][i].in[0] <== in[i];
51+
eq[0][i].in[1] <== 48;
52+
eq[1][i] = IsEqual();
53+
eq[1][i].in[0] <== in[i];
54+
eq[1][i].in[1] <== 49;
55+
eq[2][i] = IsEqual();
56+
eq[2][i].in[0] <== in[i];
57+
eq[2][i].in[1] <== 50;
58+
eq[3][i] = IsEqual();
59+
eq[3][i].in[0] <== in[i];
60+
eq[3][i].in[1] <== 51;
61+
eq[4][i] = IsEqual();
62+
eq[4][i].in[0] <== in[i];
63+
eq[4][i].in[1] <== 52;
64+
eq[5][i] = IsEqual();
65+
eq[5][i].in[0] <== in[i];
66+
eq[5][i].in[1] <== 53;
67+
eq[6][i] = IsEqual();
68+
eq[6][i].in[0] <== in[i];
69+
eq[6][i].in[1] <== 54;
70+
eq[7][i] = IsEqual();
71+
eq[7][i].in[0] <== in[i];
72+
eq[7][i].in[1] <== 55;
73+
eq[8][i] = IsEqual();
74+
eq[8][i].in[0] <== in[i];
75+
eq[8][i].in[1] <== 56;
76+
eq[9][i] = IsEqual();
77+
eq[9][i].in[0] <== in[i];
78+
eq[9][i].in[1] <== 57;
79+
eq[10][i] = IsEqual();
80+
eq[10][i].in[0] <== in[i];
81+
eq[10][i].in[1] <== 95;
82+
and[2][i] = AND();
83+
and[2][i].a <== states[i][1];
84+
multi_or[0][i] = MultiOR(13);
85+
multi_or[0][i].in[0] <== and[0][i].out;
86+
multi_or[0][i].in[1] <== and[1][i].out;
87+
multi_or[0][i].in[2] <== eq[0][i].out;
88+
multi_or[0][i].in[3] <== eq[1][i].out;
89+
multi_or[0][i].in[4] <== eq[2][i].out;
90+
multi_or[0][i].in[5] <== eq[3][i].out;
91+
multi_or[0][i].in[6] <== eq[4][i].out;
92+
multi_or[0][i].in[7] <== eq[5][i].out;
93+
multi_or[0][i].in[8] <== eq[6][i].out;
94+
multi_or[0][i].in[9] <== eq[7][i].out;
95+
multi_or[0][i].in[10] <== eq[8][i].out;
96+
multi_or[0][i].in[11] <== eq[9][i].out;
97+
multi_or[0][i].in[12] <== eq[10][i].out;
98+
and[2][i].b <== multi_or[0][i].out;
99+
and[3][i] = AND();
100+
and[3][i].a <== states[i][23];
101+
and[3][i].b <== multi_or[0][i].out;
102+
multi_or[1][i] = MultiOR(2);
103+
multi_or[1][i].in[0] <== and[2][i].out;
104+
multi_or[1][i].in[1] <== and[3][i].out;
105+
states[i+1][1] <== multi_or[1][i].out;
106+
state_changed[i].in[0] <== states[i+1][1];
107+
eq[11][i] = IsEqual();
108+
eq[11][i].in[0] <== in[i];
109+
eq[11][i].in[1] <== 101;
110+
and[4][i] = AND();
111+
and[4][i].a <== states[i][0];
112+
and[4][i].b <== eq[11][i].out;
113+
states[i+1][2] <== and[4][i].out;
114+
state_changed[i].in[1] <== states[i+1][2];
115+
eq[12][i] = IsEqual();
116+
eq[12][i].in[0] <== in[i];
117+
eq[12][i].in[1] <== 109;
118+
and[5][i] = AND();
119+
and[5][i].a <== states[i][2];
120+
and[5][i].b <== eq[12][i].out;
121+
states[i+1][3] <== and[5][i].out;
122+
state_changed[i].in[2] <== states[i+1][3];
123+
eq[13][i] = IsEqual();
124+
eq[13][i].in[0] <== in[i];
125+
eq[13][i].in[1] <== 46;
126+
and[6][i] = AND();
127+
and[6][i].a <== states[i][1];
128+
and[6][i].b <== eq[13][i].out;
129+
states[i+1][4] <== and[6][i].out;
130+
state_changed[i].in[3] <== states[i+1][4];
131+
eq[14][i] = IsEqual();
132+
eq[14][i].in[0] <== in[i];
133+
eq[14][i].in[1] <== 97;
134+
and[7][i] = AND();
135+
and[7][i].a <== states[i][3];
136+
and[7][i].b <== eq[14][i].out;
137+
states[i+1][5] <== and[7][i].out;
138+
state_changed[i].in[4] <== states[i+1][5];
139+
eq[15][i] = IsEqual();
140+
eq[15][i].in[0] <== in[i];
141+
eq[15][i].in[1] <== 105;
142+
and[8][i] = AND();
143+
and[8][i].a <== states[i][5];
144+
and[8][i].b <== eq[15][i].out;
145+
states[i+1][6] <== and[8][i].out;
146+
state_changed[i].in[5] <== states[i+1][6];
147+
eq[16][i] = IsEqual();
148+
eq[16][i].in[0] <== in[i];
149+
eq[16][i].in[1] <== 108;
150+
and[9][i] = AND();
151+
and[9][i].a <== states[i][6];
152+
and[9][i].b <== eq[16][i].out;
153+
states[i+1][7] <== and[9][i].out;
154+
state_changed[i].in[6] <== states[i+1][7];
155+
eq[17][i] = IsEqual();
156+
eq[17][i].in[0] <== in[i];
157+
eq[17][i].in[1] <== 32;
158+
and[10][i] = AND();
159+
and[10][i].a <== states[i][7];
160+
and[10][i].b <== eq[17][i].out;
161+
states[i+1][8] <== and[10][i].out;
162+
state_changed[i].in[7] <== states[i+1][8];
163+
eq[18][i] = IsEqual();
164+
eq[18][i].in[0] <== in[i];
165+
eq[18][i].in[1] <== 119;
166+
and[11][i] = AND();
167+
and[11][i].a <== states[i][8];
168+
and[11][i].b <== eq[18][i].out;
169+
states[i+1][9] <== and[11][i].out;
170+
state_changed[i].in[8] <== states[i+1][9];
171+
and[12][i] = AND();
172+
and[12][i].a <== states[i][9];
173+
and[12][i].b <== eq[14][i].out;
174+
states[i+1][10] <== and[12][i].out;
175+
state_changed[i].in[9] <== states[i+1][10];
176+
eq[19][i] = IsEqual();
177+
eq[19][i].in[0] <== in[i];
178+
eq[19][i].in[1] <== 115;
179+
and[13][i] = AND();
180+
and[13][i].a <== states[i][10];
181+
and[13][i].b <== eq[19][i].out;
182+
states[i+1][11] <== and[13][i].out;
183+
state_changed[i].in[10] <== states[i+1][11];
184+
and[14][i] = AND();
185+
and[14][i].a <== states[i][11];
186+
and[14][i].b <== eq[17][i].out;
187+
states[i+1][12] <== and[14][i].out;
188+
state_changed[i].in[11] <== states[i+1][12];
189+
and[15][i] = AND();
190+
and[15][i].a <== states[i][12];
191+
and[15][i].b <== eq[12][i].out;
192+
states[i+1][13] <== and[15][i].out;
193+
state_changed[i].in[12] <== states[i+1][13];
194+
and[16][i] = AND();
195+
and[16][i].a <== states[i][13];
196+
and[16][i].b <== eq[11][i].out;
197+
states[i+1][14] <== and[16][i].out;
198+
state_changed[i].in[13] <== states[i+1][14];
199+
and[17][i] = AND();
200+
and[17][i].a <== states[i][14];
201+
and[17][i].b <== eq[14][i].out;
202+
states[i+1][15] <== and[17][i].out;
203+
state_changed[i].in[14] <== states[i+1][15];
204+
eq[20][i] = IsEqual();
205+
eq[20][i].in[0] <== in[i];
206+
eq[20][i].in[1] <== 110;
207+
and[18][i] = AND();
208+
and[18][i].a <== states[i][15];
209+
and[18][i].b <== eq[20][i].out;
210+
states[i+1][16] <== and[18][i].out;
211+
state_changed[i].in[15] <== states[i+1][16];
212+
eq[21][i] = IsEqual();
213+
eq[21][i].in[0] <== in[i];
214+
eq[21][i].in[1] <== 116;
215+
and[19][i] = AND();
216+
and[19][i].a <== states[i][16];
217+
and[19][i].b <== eq[21][i].out;
218+
states[i+1][17] <== and[19][i].out;
219+
state_changed[i].in[16] <== states[i+1][17];
220+
and[20][i] = AND();
221+
and[20][i].a <== states[i][17];
222+
and[20][i].b <== eq[17][i].out;
223+
states[i+1][18] <== and[20][i].out;
224+
state_changed[i].in[17] <== states[i+1][18];
225+
eq[22][i] = IsEqual();
226+
eq[22][i].in[0] <== in[i];
227+
eq[22][i].in[1] <== 102;
228+
and[21][i] = AND();
229+
and[21][i].a <== states[i][18];
230+
and[21][i].b <== eq[22][i].out;
231+
states[i+1][19] <== and[21][i].out;
232+
state_changed[i].in[18] <== states[i+1][19];
233+
eq[23][i] = IsEqual();
234+
eq[23][i].in[0] <== in[i];
235+
eq[23][i].in[1] <== 111;
236+
and[22][i] = AND();
237+
and[22][i].a <== states[i][19];
238+
and[22][i].b <== eq[23][i].out;
239+
states[i+1][20] <== and[22][i].out;
240+
state_changed[i].in[19] <== states[i+1][20];
241+
eq[24][i] = IsEqual();
242+
eq[24][i].in[0] <== in[i];
243+
eq[24][i].in[1] <== 114;
244+
and[23][i] = AND();
245+
and[23][i].a <== states[i][20];
246+
and[23][i].b <== eq[24][i].out;
247+
states[i+1][21] <== and[23][i].out;
248+
state_changed[i].in[20] <== states[i+1][21];
249+
and[24][i] = AND();
250+
and[24][i].a <== states[i][21];
251+
and[24][i].b <== eq[17][i].out;
252+
states[i+1][22] <== and[24][i].out;
253+
state_changed[i].in[21] <== states[i+1][22];
254+
eq[25][i] = IsEqual();
255+
eq[25][i].in[0] <== in[i];
256+
eq[25][i].in[1] <== 64;
257+
and[25][i] = AND();
258+
and[25][i].a <== states[i][22];
259+
and[25][i].b <== eq[25][i].out;
260+
states[i+1][23] <== and[25][i].out;
261+
state_changed[i].in[22] <== states[i+1][23];
262+
states[i+1][0] <== 1 - state_changed[i].out;
263+
}
264+
265+
component final_state_result = MultiOR(num_bytes+1);
266+
for (var i = 0; i <= num_bytes; i++) {
267+
final_state_result.in[i] <== states[i][4];
268+
}
269+
out <== final_state_result.out;
270+
271+
signal is_consecutive[msg_bytes+1][2];
272+
is_consecutive[msg_bytes][1] <== 1;
273+
for (var i = 0; i < msg_bytes; i++) {
274+
is_consecutive[msg_bytes-1-i][0] <== states[num_bytes-i][4] * (1 - is_consecutive[msg_bytes-i][1]) + is_consecutive[msg_bytes-i][1];
275+
is_consecutive[msg_bytes-1-i][1] <== state_changed[msg_bytes-i].out * is_consecutive[msg_bytes-1-i][0];
276+
}
277+
// substrings calculated: [{(1, 1), (23, 1)}]
278+
signal is_substr0[msg_bytes][3];
279+
signal is_reveal0[msg_bytes];
280+
signal output reveal0[msg_bytes];
281+
for (var i = 0; i < msg_bytes; i++) {
282+
is_substr0[i][0] <== 0;
283+
is_substr0[i][1] <== is_substr0[i][0] + states[i+1][1] * states[i+2][1];
284+
is_substr0[i][2] <== is_substr0[i][1] + states[i+1][23] * states[i+2][1];
285+
is_reveal0[i] <== is_substr0[i][2] * is_consecutive[i][1];
286+
reveal0[i] <== in[i+1] * is_reveal0[i];
287+
}
288+
}

0 commit comments

Comments
 (0)