Skip to content

Commit 053864b

Browse files
committed
tweaks to fix up lexer engine
--HG-- branch : JLexPHP extra : convert_revision : svn%3A6e7b9e80-5218-0410-8339-b7667638d355/wez/php/JLexPHP%40136
1 parent 0d4a95c commit 053864b

File tree

5 files changed

+207
-30
lines changed

5 files changed

+207
-30
lines changed

JLexPHP/Main.java

+49-23
Original file line numberDiff line numberDiff line change
@@ -1248,27 +1248,28 @@ else if (is_end)
12481248
}
12491249
m_outstream.println("\t);");
12501250

1251-
m_outstream.print("\tprivate $yy_cmap = null;\n");
1252-
m_outstream.print("\tprivate $yy_rmap = null;\n");
1253-
m_outstream.print("\tprivate $yy_nxt = null;\n");
1251+
// m_outstream.print("\tstatic $yy_cmap = null;\n");
1252+
// m_outstream.print("\tstatic $yy_rmap = null;\n");
1253+
// m_outstream.print("\tprivate $yy_nxt = null;\n");
12541254

1255-
m_outstream.print("\tprotected function yy_build_tables() {\n");
1255+
// m_outstream.print("\tprotected function yy_build_tables() {\n");
12561256

12571257
// CSA: modified yy_cmap to use string packing 9-Aug-1999
12581258
int[] yy_cmap = new int[m_spec.m_ccls_map.length];
12591259
for (i = 0; i < m_spec.m_ccls_map.length; ++i)
12601260
yy_cmap[i] = m_spec.m_col_map[m_spec.m_ccls_map[i]];
1261-
m_outstream.print("\t\t$yy_cmap = $this->unpackFromString(");
1262-
emit_table_as_string(new int[][] { yy_cmap });
1263-
m_outstream.println(");");
1264-
m_outstream.print("\t\t$this->yy_cmap = $yy_cmap[0];");
1261+
m_outstream.print("\t\tstatic $yy_cmap = ");
1262+
// emit_table_as_string(new int[][] { yy_cmap });
1263+
emit_table_as_array(yy_cmap);
1264+
// m_outstream.println(");");
12651265
m_outstream.println();
12661266

12671267
// CSA: modified yy_rmap to use string packing 9-Aug-1999
1268-
m_outstream.print("\t\t$yy_rmap = $this->unpackFromString(");
1269-
emit_table_as_string(new int[][] { m_spec.m_row_map });
1270-
m_outstream.println(");");
1271-
m_outstream.print("\t\t$this->yy_rmap = $yy_rmap[0];");
1268+
m_outstream.print("\t\tstatic $yy_rmap = ");
1269+
// emit_table_as_string(new int[][] { m_spec.m_row_map });
1270+
emit_table_as_array(m_spec.m_row_map);
1271+
// m_outstream.println(");");
1272+
// m_outstream.print("\t\t$this->yy_rmap = $yy_rmap[0];");
12721273
m_outstream.println();
12731274

12741275
// 6/24/98 Raimondas Lencevicius
@@ -1282,13 +1283,38 @@ else if (is_end)
12821283
yy_nxt[elem] = dtrans.m_dtrans;
12831284
}
12841285
m_outstream.print
1285-
("\t\t$this->yy_nxt = $this->unpackFromString(");
1286-
emit_table_as_string(yy_nxt);
1287-
m_outstream.println(");");
1286+
("\t\tstatic $yy_nxt = ");
1287+
// emit_table_as_string(yy_nxt);
1288+
emit_table_as_array_2d(yy_nxt);
1289+
// m_outstream.println(");");
12881290
m_outstream.println();
1289-
m_outstream.println("\t}");
1291+
// m_outstream.println("\t}");
12901292
}
12911293

1294+
private void emit_table_as_array(int [] ia) {
1295+
int i;
1296+
m_outstream.println("array(");
1297+
for (i = 0; i < ia.length; ++i) {
1298+
m_outstream.print(" " + ia[i] + ",");
1299+
if (i % 20 == 19) m_outstream.println();
1300+
}
1301+
m_outstream.println(");");
1302+
}
1303+
private void emit_table_as_array_2d(int [][] ia) {
1304+
int i, j;
1305+
m_outstream.println("array(");
1306+
for (j = 0; j < ia.length; ++j) {
1307+
m_outstream.println("array(");
1308+
for (i = 0; i < ia[j].length; ++i) {
1309+
m_outstream.print(" " + ia[j][i] + ",");
1310+
if (i % 20 == 19) m_outstream.println();
1311+
}
1312+
m_outstream.println();
1313+
m_outstream.println("),");
1314+
}
1315+
m_outstream.println(");");
1316+
}
1317+
12921318
/***************************************************************
12931319
Function: emit_driver
12941320
Description: Output an integer table as a string. Written by
@@ -1353,7 +1379,7 @@ private void emit_table_as_string(int[][] ia) {
13531379
// CSA: output in 75 character chunks.
13541380
if (outstr.length() > 75) {
13551381
String s = outstr.toString();
1356-
m_outstream.println("\""+s.substring(0,75)+"\" +");
1382+
m_outstream.println("\""+s.substring(0,75)+"\" .");
13571383
outstr = new StringBuffer(s.substring(75));
13581384
}
13591385
}
@@ -1437,13 +1463,13 @@ else if (m_spec.m_intwrap_type)
14371463
m_outstream.println("\t\t$yy_next_state = self::YY_NO_STATE;");
14381464
/*m_outstream.println("\t\tint yy_prev_stave = YY_NO_STATE;");*/
14391465
m_outstream.println("\t\t$yy_last_accept_state = self::YY_NO_STATE;");
1440-
m_outstream.println("\t\t$$yy_initial = true;");
1466+
m_outstream.println("\t\t$yy_initial = true;");
14411467
// m_outstream.println("\t\t$yy_this_accept;");
14421468
m_outstream.println();
14431469

14441470
m_outstream.println("\t\t$this->yy_mark_start();");
14451471
/*m_outstream.println("\t\tyy_this_accept = yy_accept(yy_state);");*/
1446-
m_outstream.println("\t\t$yy_this_accept = self::$yy_acpt[$this->yy_state];");
1472+
m_outstream.println("\t\t$yy_this_accept = self::$yy_acpt[$yy_state];");
14471473
m_outstream.println("\t\tif (self::YY_NOT_ACCEPT != $yy_this_accept) {");
14481474
m_outstream.println("\t\t\t$yy_last_accept_state = $yy_state;");
14491475
m_outstream.println("\t\t\t$this->yy_mark_end();");
@@ -1459,11 +1485,11 @@ else if (m_spec.m_intwrap_type)
14591485
m_outstream.println("\t\t\tif ($yy_initial && $this->yy_at_bol) "+
14601486
"$yy_lookahead = self::YY_BOL;");
14611487
m_outstream.println("\t\t\telse $yy_lookahead = $this->yy_advance();");
1462-
m_outstream.println("\t\t\t$this->yy_next_state = self::YY_F;");
1488+
// m_outstream.println("\t\t\t$yy_next_state = self::YY_F;");
14631489
/*m_outstream.println("\t\t\t\tyy_next_state = "
14641490
+ "yy_next(yy_state,yy_lookahead);");*/
14651491
m_outstream.println("\t\t\t$yy_next_state = "
1466-
+ "$this->yy_nxt[$this->yy_rmap[$this->yy_state]][$this->yy_cmap[$this->yy_lookahead]];");
1492+
+ "self::$yy_nxt[self::$yy_rmap[$yy_state]][self::$yy_cmap[$yy_lookahead]];");
14671493

14681494
if (NOT_EDBG)
14691495
{
@@ -1555,9 +1581,9 @@ else if (null != m_spec.m_eof_value_code)
15551581

15561582
m_outstream.println("\t\t\t\t\tswitch ($yy_last_accept_state) {");
15571583

1558-
emit_actions("\t\t\t\t\t");
1584+
emit_actions("\t\t\t\t\t\t");
15591585

1560-
m_outstream.println("\t\t\t\t\tdefault:");
1586+
m_outstream.println("\t\t\t\t\t\tdefault:");
15611587
m_outstream.println("\t\t\t\t\t\t$this->yy_error(self::YY_E_INTERNAL,false);");
15621588
/*m_outstream.println("\t\t\t\t\t\treturn null;");*/
15631589
m_outstream.println("\t\t\t\t\tcase -1:");

Makefile

+6-1
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,13 @@
11

2-
all: simple-lex.php
2+
all: JLexPHP.jar simple.lex.php c.lex.php
33

44
simple.lex.php: simple.lex JLexPHP.jar
55
java -cp JLexPHP.jar JLexPHP.Main simple.lex
6+
test -s simple.lex.php || rm simple.lex.php
7+
8+
c.lex.php: c.lex JLexPHP.jar
9+
java -cp JLexPHP.jar JLexPHP.Main c.lex
10+
test -s c.lex.php || rm c.lex.php
611

712
JLexPHP.jar: JLexPHP/Main.java
813
javac JLexPHP/Main.java

c.lex

+137
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,137 @@
1+
<?php # vim:ft=php
2+
include 'jlex.php';
3+
/* This is a lexer for the C language */
4+
5+
class CParser {
6+
const TK_AUTO = 0;
7+
const TK_STAR = 1;
8+
const TK_IDENTIFIER = 2;
9+
const TK_PRAGMA = 3;
10+
const TK_TYPEDEF = 4;
11+
}
12+
13+
%%
14+
15+
%{
16+
/* blah */
17+
%}
18+
19+
%function nextToken
20+
%line
21+
%char
22+
%state COMMENT
23+
%class CLexer
24+
25+
D = [0-9]
26+
L = [a-zA-Z_]
27+
H = [a-fA-F0-9]
28+
E = [Ee][+-]?{D}+
29+
FS = (f|F|l|L)
30+
IS = (u|U|l|L)
31+
32+
%%
33+
34+
<YYINITIAL> "/*" { $this->yybegin(self::COMMENT); }
35+
<YYINITIAL> "//[^\r\n]*" { /* C++ comment */ }
36+
37+
<COMMENT> "*/" { $this->yybegin(self::YYINITIAL); }
38+
<COMMENT> [.\n] { }
39+
40+
<YYINITIAL> #[^\r\n]* { return $this->createToken(CParser::TK_PRAGMA); }
41+
42+
<YYINITIAL> "auto" { return $this->createToken(CParser::TK_AUTO); }
43+
<YYINITIAL> "break" { return $this->createToken(CParser::TK_BREAK); }
44+
<YYINITIAL> "case" { return $this->createToken(CParser::TK_CASE); }
45+
<YYINITIAL> "char" { return $this->createToken(CParser::TK_CHAR); }
46+
<YYINITIAL> "const" { return $this->createToken(CParser::TK_CONST); }
47+
<YYINITIAL> "continue" { return $this->createToken(CParser::TK_CONTINUE); }
48+
<YYINITIAL> "default" { return $this->createToken(CParser::TK_DEFAULT); }
49+
<YYINITIAL> "do" { return $this->createToken(CParser::TK_DO); }
50+
<YYINITIAL> "double" { return $this->createToken(CParser::TK_DOUBLE); }
51+
<YYINITIAL> "else" { return $this->createToken(CParser::TK_ELSE); }
52+
<YYINITIAL> "enum" { return $this->createToken(CParser::TK_ENUM); }
53+
<YYINITIAL> "extern" { return $this->createToken(CParser::TK_EXTERN); }
54+
<YYINITIAL> "float" { return $this->createToken(CParser::TK_FLOAT); }
55+
<YYINITIAL> "for" { return $this->createToken(CParser::TK_FOR); }
56+
<YYINITIAL> "goto" { return $this->createToken(CParser::TK_GOTO); }
57+
<YYINITIAL> "if" { return $this->createToken(CParser::TK_IF); }
58+
<YYINITIAL> "int" { return $this->createToken(CParser::TK_INT); }
59+
<YYINITIAL> "long" { return $this->createToken(CParser::TK_LONG); }
60+
<YYINITIAL> "register" { return $this->createToken(CParser::TK_REGISTER); }
61+
<YYINITIAL> "return" { return $this->createToken(CParser::TK_RETURN); }
62+
<YYINITIAL> "short" { return $this->createToken(CParser::TK_SHORT); }
63+
<YYINITIAL> "signed" { return $this->createToken(CParser::TK_SIGNED); }
64+
<YYINITIAL> "sizeof" { return $this->createToken(CParser::TK_SIZEOF); }
65+
<YYINITIAL> "static" { return $this->createToken(CParser::TK_STATIC); }
66+
<YYINITIAL> "struct" { return $this->createToken(CParser::TK_STRUCT); }
67+
<YYINITIAL> "switch" { return $this->createToken(CParser::TK_SWITCH); }
68+
<YYINITIAL> "typedef" { return $this->createToken(CParser::TK_TYPEDEF); }
69+
<YYINITIAL> "union" { return $this->createToken(CParser::TK_UNION); }
70+
<YYINITIAL> "unsigned" { return $this->createToken(CParser::TK_UNSIGNED); }
71+
<YYINITIAL> "void" { return $this->createToken(CParser::TK_VOID); }
72+
<YYINITIAL> "volatile" { return $this->createToken(CParser::TK_VOLATILE); }
73+
<YYINITIAL> "while" { return $this->createToken(CParser::TK_WHILE); }
74+
75+
<YYINITIAL> {L}({L}|{D})* { return $this->createToken(CParser::TK_IDENTIFIER); }
76+
77+
<YYINITIAL> 0[xX]{H}+{IS}? { return $this->createToken(CParser::TK_CONSTANT); }
78+
<YYINITIAL> 0{D}+{IS}? { return $this->createToken(CParser::TK_CONSTANT); }
79+
<YYINITIAL> {D}+{IS}? { return $this->createToken(CParser::TK_CONSTANT); }
80+
<YYINITIAL> L?\'(\\.|[^\\\'])+\' { return $this->createToken(CParser::TK_CONSTANT); }
81+
82+
<YYINITIAL> {D}+{E}{FS}? { return $this->createToken(CParser::TK_CONSTANT); }
83+
<YYINITIAL> {D}*"."{D}+({E})?{FS}? { return $this->createToken(CParser::TK_CONSTANT); }
84+
<YYINITIAL> {D}+"."{D}*({E})?{FS}? { return $this->createToken(CParser::TK_CONSTANT); }
85+
86+
<YYINITIAL> L?\"(\\.|[^\\\"])*\" { return $this->createToken(CParser::TK_STRING_LITERAL); }
87+
88+
<YYINITIAL> "..." { return $this->createToken(CParser::TK_ELLIPSIS); }
89+
<YYINITIAL> ">>=" { return $this->createToken(CParser::TK_RIGHT_ASSIGN); }
90+
<YYINITIAL> "<<=" { return $this->createToken(CParser::TK_LEFT_ASSIGN); }
91+
<YYINITIAL> "+=" { return $this->createToken(CParser::TK_ADD_ASSIGN); }
92+
<YYINITIAL> "-=" { return $this->createToken(CParser::TK_SUB_ASSIGN); }
93+
<YYINITIAL> "*=" { return $this->createToken(CParser::TK_MUL_ASSIGN); }
94+
<YYINITIAL> "/=" { return $this->createToken(CParser::TK_DIV_ASSIGN); }
95+
<YYINITIAL> "%=" { return $this->createToken(CParser::TK_MOD_ASSIGN); }
96+
<YYINITIAL> "&=" { return $this->createToken(CParser::TK_AND_ASSIGN); }
97+
<YYINITIAL> "^=" { return $this->createToken(CParser::TK_XOR_ASSIGN); }
98+
<YYINITIAL> "|=" { return $this->createToken(CParser::TK_OR_ASSIGN); }
99+
<YYINITIAL> ">>" { return $this->createToken(CParser::TK_RIGHT_OP); }
100+
<YYINITIAL> "<<" { return $this->createToken(CParser::TK_LEFT_OP); }
101+
<YYINITIAL> "++" { return $this->createToken(CParser::TK_INC_OP); }
102+
<YYINITIAL> "--" { return $this->createToken(CParser::TK_DEC_OP); }
103+
<YYINITIAL> "->" { return $this->createToken(CParser::TK_PTR_OP); }
104+
<YYINITIAL> "&&" { return $this->createToken(CParser::TK_AND_OP); }
105+
<YYINITIAL> "||" { return $this->createToken(CParser::TK_OR_OP); }
106+
<YYINITIAL> "<=" { return $this->createToken(CParser::TK_LE_OP); }
107+
<YYINITIAL> ">=" { return $this->createToken(CParser::TK_GE_OP); }
108+
<YYINITIAL> "==" { return $this->createToken(CParser::TK_EQ_OP); }
109+
<YYINITIAL> "!=" { return $this->createToken(CParser::TK_NE_OP); }
110+
<YYINITIAL> ";" { return $this->createToken(CParser::TK_SEMIC); }
111+
<YYINITIAL> ("{"|"<%") { return $this->createToken(CParser::TK_LCURLY); }
112+
<YYINITIAL> ("}"|"%>") { return $this->createToken(CParser::TK_RCURLY); }
113+
<YYINITIAL> "," { return $this->createToken(CParser::TK_COMMA); }
114+
<YYINITIAL> ":" { return $this->createToken(CParser::TK_COLON); }
115+
<YYINITIAL> "=" { return $this->createToken(CParser::TK_EQUALS); }
116+
<YYINITIAL> "(" { return $this->createToken(CParser::TK_LPAREN); }
117+
<YYINITIAL> ")" { return $this->createToken(CParser::TK_RPAREN); }
118+
<YYINITIAL> ("["|"<:") { return $this->createToken(CParser::TK_LSQUARE); }
119+
<YYINITIAL> ("]"|":>") { return $this->createToken(CParser::TK_RSQUARE); }
120+
<YYINITIAL> "." { return $this->createToken(CParser::TK_PERIOD); }
121+
<YYINITIAL> "&" { return $this->createToken(CParser::TK_AMP); }
122+
<YYINITIAL> "!" { return $this->createToken(CParser::TK_EXCLAM); }
123+
<YYINITIAL> "~" { return $this->createToken(CParser::TK_TILDE); }
124+
<YYINITIAL> "-" { return $this->createToken(CParser::TK_MINUS); }
125+
<YYINITIAL> "+" { return $this->createToken(CParser::TK_PLUS); }
126+
<YYINITIAL> "*" { return $this->createToken(CParser::TK_STAR); }
127+
<YYINITIAL> "/" { return $this->createToken(CParser::TK_SLASH); }
128+
<YYINITIAL> "%" { return $this->createToken(CParser::TK_PERCENT); }
129+
<YYINITIAL> "<" { return $this->createToken(CParser::TK_LANGLE); }
130+
<YYINITIAL> ">" { return $this->createToken(CParser::TK_RANGLE); }
131+
<YYINITIAL> "^" { return $this->createToken(CParser::TK_CARET); }
132+
<YYINITIAL> "|" { return $this->createToken(CParser::TK_PIPE); }
133+
<YYINITIAL> "?" { return $this->createToken(CParser::TK_QUESTION); }
134+
135+
<YYINITIAL> [ \t\v\n\f] { }
136+
. { /* ignore bad characters */ }
137+

jlex.php

+12-5
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,11 @@ class JLexBase {
3636
function __construct($stream) {
3737
$this->yy_reader = $stream;
3838
$meta = stream_get_meta_data($stream);
39-
$this->yyfilename = $meta['uri'];
39+
if (!isset($meta['uri'])) {
40+
$this->yyfilename = '<<input>>';
41+
} else {
42+
$this->yyfilename = $meta['uri'];
43+
}
4044

4145
$this->yy_buffer = "";
4246
$this->yy_buffer_read = 0;
@@ -46,17 +50,17 @@ function __construct($stream) {
4650
$this->yychar = 0;
4751
$this->yyline = 0;
4852
$this->yy_at_bol = true;
49-
50-
$this->yy_build_tables();
5153
}
5254

5355
protected function yybegin($state) {
56+
// echo "yybegin:", $state, "\n";
5457
$this->yy_lexical_state = $state;
5558
}
5659

5760
protected function yy_advance() {
5861
if ($this->yy_buffer_index < $this->yy_buffer_read) {
59-
return $this->yy_buffer[$this->yy_buffer_index++];
62+
# echo "yy_advance: idx=", $this->yy_buffer_index, " ", ord($this->yy_buffer[$this->yy_buffer_index]), " ", $this->yy_buffer[$this->yy_buffer_index], "\n";
63+
return ord($this->yy_buffer[$this->yy_buffer_index++]);
6064
}
6165
if ($this->yy_buffer_start != 0) {
6266
/* shunt */
@@ -74,10 +78,12 @@ protected function yy_advance() {
7478

7579
while ($this->yy_buffer_index >= $this->yy_buffer_read) {
7680
$data = fread($this->yy_reader, 8192);
81+
if ($data === false) break;
7782
$this->yy_buffer .= $data;
7883
$this->yy_buffer_read .= strlen($data);
7984
}
80-
return $this->yy_buffer[$this->yy_buffer_index++];
85+
# echo "yy_advance: idx=", $this->yy_buffer_index, " ", ord($this->yy_buffer[$this->yy_buffer_index]), " ", $this->yy_buffer[$this->yy_buffer_index], "\n";
86+
return ord($this->yy_buffer[$this->yy_buffer_index++]);
8187
}
8288

8389
protected function yy_move_end() {
@@ -116,6 +122,7 @@ protected function yy_mark_end() {
116122
}
117123

118124
protected function yy_to_mark() {
125+
# echo "yy_to_mark: setting buffer index to ", $this->yy_buffer_end, "\n";
119126
$this->yy_buffer_index = $this->yy_buffer_end;
120127
$this->yy_at_bol = ($this->yy_buffer_end > $this->yy_buffer_start) &&
121128
("\r" == $this->yy_buffer[$this->yy_buffer_end-1] ||

simple.lex

+3-1
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,11 @@ include 'jlex.php';
44
%%
55

66
%{
7+
//<YYINITIAL> L? \" (\\.|[^\\\"])* \" { $this->createToken(CParser::TK_STRING_LITERAL); }
78
/* blah */
89
%}
910

11+
%function nextToken
1012
%line
1113
%char
1214
%state COMMENTS
@@ -49,5 +51,5 @@ WHITE_SPACE=([\ \n\r\t\f])+
4951
$this->yybegin(self::YYINITIAL);
5052
}
5153
<YYINITIAL> . {
52-
return new Symbol(sym.error, null);
54+
throw new Exception("bah!");
5355
}

0 commit comments

Comments
 (0)