Skip to content

Commit c276409

Browse files
moreganwchristian
authored andcommitted
extend _cast_or_op
1 parent 8155ca7 commit c276409

File tree

3 files changed

+104
-67
lines changed

3 files changed

+104
-67
lines changed

Changes

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@ Revision history for Perl extension PPI
3030
- fixed parsing of large numbers in Number::Exp on Solaris 80 (JMASLAK)
3131
- make remove_child actually return undef on failure to find child to
3232
remove
33+
- higher accuracy when deciding whether certain characters are operators
34+
or variable type casts (*&% etc.) (MOREGAN)
3335

3436
1.220 Tue 11 Nov 2014
3537
Summary:

lib/PPI/Token/Unknown.pm

Lines changed: 90 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -76,19 +76,14 @@ sub __TOKENIZER__on_char {
7676
}
7777
}
7878

79-
if ( $char eq '$' ) {
80-
my $_class = $self->_cast_or_op( $t );
81-
# Set class and rerun
82-
$t->{class} = $t->{token}->set_class( $_class );
83-
return $t->_finalize_token->__TOKENIZER__on_char( $t );
84-
}
85-
8679
if ( $char eq '*' || $char eq '=' ) {
8780
# Power operator '**' or mult-assign '*='
8881
$t->{class} = $t->{token}->set_class( 'Operator' );
8982
return 1;
9083
}
9184

85+
return $self->_as_cast_or_op($t) if $self->_is_cast_or_op($char);
86+
9287
$t->{class} = $t->{token}->set_class( 'Operator' );
9388
return $t->_finalize_token->__TOKENIZER__on_char( $t );
9489

@@ -176,18 +171,13 @@ sub __TOKENIZER__on_char {
176171
# Get rest of line
177172
pos $t->{line} = $t->{line_cursor} + 1;
178173
if ( $t->{line} =~ m/$CURLY_SYMBOL/gc ) {
179-
# control-character symbol (e.g. @{^_Foo})
174+
# control-character symbol (e.g. %{^_Foo})
180175
$t->{class} = $t->{token}->set_class( 'Magic' );
181176
return 1;
182177
}
183178
}
184179

185-
if ( $char =~ /[\$@%*{]/ ) {
186-
# It's a cast
187-
$t->{class} = $t->{token}->set_class( 'Cast' );
188-
return $t->_finalize_token->__TOKENIZER__on_char( $t );
189-
190-
}
180+
return $self->_as_cast_or_op($t) if $self->_is_cast_or_op($char);
191181

192182
# Probably the mod operator
193183
$t->{class} = $t->{token}->set_class( 'Operator' );
@@ -209,11 +199,7 @@ sub __TOKENIZER__on_char {
209199
return 1;
210200
}
211201

212-
if ( $char =~ /[\$@%{]/ ) {
213-
# The ampersand is a cast
214-
$t->{class} = $t->{token}->set_class( 'Cast' );
215-
return $t->_finalize_token->__TOKENIZER__on_char( $t );
216-
}
202+
return $self->_as_cast_or_op($t) if $self->_is_cast_or_op($char);
217203

218204
# Probably the binary and operator
219205
$t->{class} = $t->{token}->set_class( 'Operator' );
@@ -271,26 +257,95 @@ sub __TOKENIZER__on_char {
271257
PPI::Exception->throw('Unknown value in PPI::Token::Unknown token');
272258
}
273259

260+
sub _is_cast_or_op {
261+
my ( $self, $char ) = @_;
262+
return 1 if $char eq '$';
263+
return 1 if $char eq '@';
264+
return 1 if $char eq '%';
265+
return 1 if $char eq '*';
266+
return 1 if $char eq '{';
267+
return;
268+
}
269+
270+
sub _as_cast_or_op {
271+
my ( $self, $t ) = @_;
272+
my $class = _cast_or_op( $t );
273+
$t->{class} = $t->{token}->set_class( $class );
274+
return $t->_finalize_token->__TOKENIZER__on_char( $t );
275+
}
276+
277+
sub _prev_significant_w_cursor {
278+
my ( $tokens, $cursor, $extra_check ) = @_;
279+
while ( $cursor >= 0 ) {
280+
my $token = $tokens->[ $cursor-- ];
281+
next if !$token->significant;
282+
next if $extra_check and !$extra_check->($token);
283+
return ( $token, $cursor );
284+
}
285+
return ( undef, $cursor );
286+
}
287+
274288
# Operator/operand-sensitive, multiple or GLOB cast
275289
sub _cast_or_op {
276-
my ( undef, $t ) = @_;
277-
my ( $prev ) = @{ $t->_previous_significant_tokens(1) };
278-
return 'Cast' if !$prev;
279-
280-
return 'Operator' if
281-
$prev->isa('PPI::Token::Symbol')
282-
or
283-
$prev->isa('PPI::Token::Number')
284-
or
285-
(
286-
$prev->isa('PPI::Token::Structure')
287-
and
288-
$prev->content =~ /^(?:\)|\])$/
290+
my ( $t ) = @_;
291+
292+
my $tokens = $t->{tokens};
293+
my $cursor = scalar( @$tokens ) - 1;
294+
my $token;
295+
296+
( $token, $cursor ) = _prev_significant_w_cursor( $tokens, $cursor );
297+
return 'Cast' if !$token; # token was first in the document
298+
299+
if ( $token->isa( 'PPI::Token::Structure' ) and $token->content eq '}' ) {
300+
301+
# Scan the token stream backwards an arbitrarily long way,
302+
# looking for the matching opening curly brace.
303+
my $structure_depth = 1;
304+
( $token, $cursor ) = _prev_significant_w_cursor(
305+
$tokens, $cursor,
306+
sub {
307+
my ( $token ) = @_;
308+
return if !$token->isa( 'PPI::Token::Structure' );
309+
if ( $token eq '}' ) {
310+
$structure_depth++;
311+
return;
312+
}
313+
if ( $token eq '{' ) {
314+
$structure_depth--;
315+
return if $structure_depth;
316+
}
317+
return 1;
318+
}
289319
);
320+
return 'Operator' if !$token; # no matching '{', probably an unbalanced '}'
321+
322+
# Scan past any whitespace
323+
( $token, $cursor ) = _prev_significant_w_cursor( $tokens, $cursor );
324+
return 'Operator' if !$token; # Document began with what must be a hash constructor.
325+
return 'Operator' if $token->isa( 'PPI::Token::Symbol' ); # subscript
326+
327+
my %meth_or_subscript_end = map { $_ => 1 } qw@ -> } ] @;
328+
return 'Operator' if $meth_or_subscript_end{ $token->content }; # subscript
329+
330+
my $content = $token->content;
331+
my $produces_or_wants_value =
332+
( $token->isa( 'PPI::Token::Word' ) and ( $content eq 'do' or $content eq 'eval' ) );
333+
return $produces_or_wants_value ? 'Operator' : 'Cast';
334+
}
335+
336+
my %list_start_or_term_end = map { $_ => 1 } qw@ ; ( { [ @;
337+
return 'Cast'
338+
if $token->isa( 'PPI::Token::Structure' ) and $list_start_or_term_end{ $token->content }
339+
or $token->isa( 'PPI::Token::Cast' )
340+
or $token->isa( 'PPI::Token::Operator' )
341+
or $token->isa( 'PPI::Token::Label' );
342+
343+
return 'Operator' if !$token->isa( 'PPI::Token::Word' );
344+
345+
( $token, $cursor ) = _prev_significant_w_cursor( $tokens, $cursor );
346+
return 'Cast' if !$token || $token->content ne '->';
290347

291-
# This is pretty weak, there's room for a dozen more tests before going with
292-
# a default. Or even better, a proper operator/operand method :(
293-
return 'Cast';
348+
return 'Operator';
294349
}
295350

296351
# Are we at a location where a ':' would indicate a subroutine attribute

t/ppi_token_unknown.t

Lines changed: 12 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -100,36 +100,28 @@ OPERATOR_CAST: {
100100
test_varying_whitespace( @nothing, @asterisk_cast, @scalar );
101101
}
102102

103-
{
104-
local %known_bad_seps = map { $_ => 1 } qw( null );
105103
test_varying_whitespace( @number, @percent_op, @scalar );
106104
test_varying_whitespace( @number, @percent_op, @list );
107105
test_varying_whitespace( @number, @percent_op, @hash );
108106
test_varying_whitespace( @number, @percent_op, @glob );
109107
test_varying_whitespace( @number, @percent_op, @hashctor1 );
110108
test_varying_whitespace( @number, @percent_op, @hashctor2 );
111109
test_varying_whitespace( @number, @percent_op, @hashctor3 );
112-
}
113110
test_varying_whitespace( @number, @percenteq_op, @bareword );
114111
test_varying_whitespace( @number, @percenteq_op, @hashctor3 ); # doesn't compile, but make sure it's an operator
115112
{
116113
local %known_bad_seps = map { $_ => 1 } qw( space );
117114
test_varying_whitespace( @nothing, @percent_cast, @scalar );
118115
}
119116

120-
{
121-
local %known_bad_seps = map { $_ => 1 } qw( null );
122117
test_varying_whitespace( @number, @ampersand_op, @scalar );
123118
test_varying_whitespace( @number, @ampersand_op, @list );
124119
test_varying_whitespace( @number, @ampersand_op, @hash );
125-
}
120+
126121
test_varying_whitespace( @number, @ampersand_op, @glob );
127-
{
128-
local %known_bad_seps = map { $_ => 1 } qw( null );
129122
test_varying_whitespace( @number, @ampersand_op, @hashctor1 );
130123
test_varying_whitespace( @number, @ampersand_op, @hashctor2 );
131124
test_varying_whitespace( @number, @ampersand_op, @hashctor3 );
132-
}
133125
test_varying_whitespace( @number, @ampersandeq_op, @bareword );
134126
test_varying_whitespace( @number, @ampersandeq_op, @hashctor3 ); # doesn't compile, but make sure it's an operator
135127
{
@@ -156,27 +148,28 @@ OPERATOR_CAST: {
156148
}
157149

158150
my @single = ( "'3'", [ 'PPI::Token::Quote::Single' => "'3'", ] );
151+
test_varying_whitespace( @single, @asterisk_op, @scalar );
159152
{
160153
local %known_bad_seps = map { $_ => 1 } qw( null );
161-
test_varying_whitespace( @single, @asterisk_op, @scalar );
162154
test_varying_whitespace( @single, @asterisk_op, @hashctor3 );
155+
}
163156
test_varying_whitespace( @single, @percent_op, @scalar );
164157
test_varying_whitespace( @single, @percent_op, @hashctor3 );
165158
test_varying_whitespace( @single, @ampersand_op, @scalar );
166159
test_varying_whitespace( @single, @ampersand_op, @hashctor3 );
167160

168161
my @double = ( '"3"', [ 'PPI::Token::Quote::Double' => '"3"', ] );
169162
test_varying_whitespace( @double, @asterisk_op, @scalar );
163+
{
164+
local %known_bad_seps = map { $_ => 1 } qw( null );
170165
test_varying_whitespace( @double, @asterisk_op, @hashctor3 );
166+
}
171167
test_varying_whitespace( @double, @percent_op, @scalar );
172168
test_varying_whitespace( @double, @percent_op, @hashctor3 );
173169
test_varying_whitespace( @double, @ampersand_op, @scalar );
174170
test_varying_whitespace( @double, @ampersand_op, @hashctor3 );
175-
}
176171

177172
test_varying_whitespace( @scalar, @asterisk_op, @scalar );
178-
{
179-
local %known_bad_seps = map { $_ => 1 } qw( null );
180173
test_varying_whitespace( @scalar, @percent_op, @scalar );
181174
test_varying_whitespace( @scalar, @ampersand_op, @scalar );
182175

@@ -192,7 +185,7 @@ OPERATOR_CAST: {
192185
]
193186
);
194187
{
195-
local %known_bad_seps = ( %known_bad_seps, map { $_ => 1 } qw( space ) );
188+
local %known_bad_seps = map { $_ => 1 } qw( null space );
196189
test_varying_whitespace( @package, @asterisk_cast, @scalar, 1 );
197190
test_varying_whitespace( @package, @asterisk_cast, @hashctor3, 1 );
198191
test_varying_whitespace( @package, @percent_cast, @scalar, 1 );
@@ -201,7 +194,6 @@ OPERATOR_CAST: {
201194
test_varying_whitespace( @package, @ampersand_cast, @hashctor3, 1 );
202195
test_varying_whitespace( @package, @at_cast, @scalar, 1 );
203196
test_varying_whitespace( @package, @at_cast, @listctor, 1 );
204-
}
205197
}
206198

207199
my @sub = (
@@ -300,10 +292,11 @@ OPERATOR_CAST: {
300292
'PPI::Token::Structure' => '}',
301293
]
302294
);
295+
test_varying_whitespace( @evalblock, @asterisk_op, @scalar );
303296
{
304297
local %known_bad_seps = map { $_ => 1 } qw( null );
305-
test_varying_whitespace( @evalblock, @asterisk_op, @scalar );
306298
test_varying_whitespace( @evalblock, @asterisk_op, @hashctor3 );
299+
}
307300
test_varying_whitespace( @evalblock, @percent_op, @scalar );
308301
test_varying_whitespace( @evalblock, @percent_op, @hashctor3 );
309302
test_varying_whitespace( @evalblock, @ampersand_op, @scalar );
@@ -317,12 +310,14 @@ OPERATOR_CAST: {
317310
]
318311
);
319312
test_varying_whitespace( @evalstring, @asterisk_op, @scalar );
313+
{
314+
local %known_bad_seps = map { $_ => 1 } qw( null );
320315
test_varying_whitespace( @evalstring, @asterisk_op, @hashctor3 );
316+
}
321317
test_varying_whitespace( @evalstring, @percent_op, @scalar );
322318
test_varying_whitespace( @evalstring, @percent_op, @hashctor3 );
323319
test_varying_whitespace( @evalstring, @ampersand_op, @scalar );
324320
test_varying_whitespace( @evalstring, @ampersand_op, @hashctor3 );
325-
}
326321

327322
my @curly_subscript1 = (
328323
'$y->{x}',
@@ -383,8 +378,6 @@ OPERATOR_CAST: {
383378
]
384379
);
385380

386-
{
387-
local %known_bad_seps = map { $_ => 1 } qw( null );
388381
test_varying_whitespace( @curly_subscript1, @asterisk_op, @scalar );
389382
test_varying_whitespace( @curly_subscript1, @percent_op, @scalar );
390383
test_varying_whitespace( @curly_subscript1, @ampersand_op, @scalar );
@@ -394,13 +387,9 @@ OPERATOR_CAST: {
394387
test_varying_whitespace( @curly_subscript3, @asterisk_op, @scalar );
395388
test_varying_whitespace( @curly_subscript3, @percent_op, @scalar );
396389
test_varying_whitespace( @curly_subscript3, @ampersand_op, @scalar );
397-
}
398390
test_varying_whitespace( @square_subscript1, @asterisk_op, @scalar );
399-
{
400-
local %known_bad_seps = map { $_ => 1 } qw( null );
401391
test_varying_whitespace( @square_subscript1, @percent_op, @scalar );
402392
test_varying_whitespace( @square_subscript1, @ampersand_op, @scalar );
403-
}
404393

405394
{
406395
local %known_bad_seps = map { $_ => 1 } qw( space );
@@ -411,8 +400,6 @@ OPERATOR_CAST: {
411400
test_varying_whitespace( 'values', [ 'PPI::Token::Word' => 'values' ], @percent_cast, @hashctor3 );
412401
}
413402

414-
TODO: {
415-
local $TODO = "known bug";
416403
test_statement(
417404
'} *$a', # unbalanced '}' before '*', arbitrary decision
418405
[
@@ -423,7 +410,6 @@ TODO: {
423410
'PPI::Token::Symbol' => '$a',
424411
]
425412
);
426-
}
427413

428414
test_statement(
429415
'$bar = \%*$foo', # multiple consecutive casts
@@ -437,8 +423,6 @@ TODO: {
437423
]
438424
);
439425

440-
TODO: {
441-
local $TODO = "known bug";
442426
test_statement(
443427
'$#tmp*$#tmp2',
444428
[
@@ -447,7 +431,6 @@ TODO: {
447431
'PPI::Token::ArrayIndex' => '$#tmp2',
448432
]
449433
);
450-
}
451434

452435
test_statement(
453436
'[ %{$req->parameters} ]', # preceded by '['
@@ -484,8 +467,6 @@ TODO: {
484467
]
485468
);
486469

487-
TODO: {
488-
local $TODO = "known bug";
489470
test_statement(
490471
'++$i%$f', # '%' wrongly a cast through 1.220.
491472
[
@@ -496,7 +477,6 @@ TODO: {
496477
'PPI::Token::Symbol' => '$f',
497478
]
498479
);
499-
}
500480

501481
{ # these need to be fixed in PPI::Lexer->_statement, fixing these will break other tests that need to be changed
502482
local $TODO = "clarify type of statement in constructor";

0 commit comments

Comments
 (0)