diff --git a/lib/PPI/Lexer.pm b/lib/PPI/Lexer.pm index 94b1eb11..0d6e2cea 100644 --- a/lib/PPI/Lexer.pm +++ b/lib/PPI/Lexer.pm @@ -440,6 +440,28 @@ sub _statement { # Is it a token in our known classes list my $class = $STATEMENT_CLASSES{$Token->content}; + if ( $class ) { + # Is the next significant token a => + # Read ahead to the next significant token + my $Next; + while ( $Next = $self->_get_token ) { + if ( !$Next->significant ) { + push @{$self->{delayed}}, $Next; + next; + } + + last if + !$Next->isa( 'PPI::Token::Operator' ) or $Next->content ne '=>'; + + # Got the next token + # Is an ordinary expression + $self->_rollback( $Next ); + return 'PPI::Statement'; + } + + # Rollback and continue + $self->_rollback( $Next ); + } # Handle potential barewords for subscripts if ( $Parent->isa('PPI::Structure::Subscript') ) { @@ -533,8 +555,16 @@ sub _statement { } # Found the next significant token. + if ( + $Next->isa('PPI::Token::Operator') + and + $Next->content eq '=>' + ) { + # Is an ordinary expression + $self->_rollback( $Next ); + return 'PPI::Statement'; # Is it a v6 use? - if ( $Next->content eq 'v6' ) { + } elsif ( $Next->content eq 'v6' ) { $self->_rollback( $Next ); return 'PPI::Statement::Include::Perl6'; } else { diff --git a/lib/PPI/Token/Attribute.pm b/lib/PPI/Token/Attribute.pm index edda4d83..0a497ce0 100644 --- a/lib/PPI/Token/Attribute.pm +++ b/lib/PPI/Token/Attribute.pm @@ -79,7 +79,7 @@ Returns C if the attribute does not have parameters. sub parameters { my $self = shift; - $self->{content} =~ /\((.+)\)$/ ? $1 : undef; + $self->{content} =~ /\((.+)\)$/s ? $1 : undef; } diff --git a/lib/PPI/Token/HereDoc.pm b/lib/PPI/Token/HereDoc.pm index aed07d4e..3a255a85 100644 --- a/lib/PPI/Token/HereDoc.pm +++ b/lib/PPI/Token/HereDoc.pm @@ -201,14 +201,10 @@ sub __TOKENIZER__on_char { return undef; } - # Define $line outside of the loop, so that if we encounter the - # end of the file, we have access to the last line still. - my $line; - # Suck in the HEREDOC $token->{_heredoc} = \my @heredoc; my $terminator = $token->{_terminator} . "\n"; - while ( defined($line = $t->_get_line) ) { + while ( defined( my $line = $t->_get_line ) ) { if ( $line eq $terminator ) { # Keep the actual termination line for consistency # when we are re-assembling the file @@ -224,24 +220,25 @@ sub __TOKENIZER__on_char { # End of file. # Error: Didn't reach end of here-doc before end of file. - # $line might be undef if we get NO lines. - if ( defined $line and $line eq $token->{_terminator} ) { - # If the last line matches the terminator - # but is missing the newline, we want to allow - # it anyway (like perl itself does). In this case - # perl would normally throw a warning, but we will - # also ignore that as well. - pop @heredoc; - $token->{_terminator_line} = $line; - } else { - # The HereDoc was not properly terminated. - $token->{_terminator_line} = undef; - # Trim off the trailing whitespace - if ( defined $heredoc[-1] and $t->{source_eof_chop} ) { + # If the here-doc block is not empty, look at the last line to determine if + # the here-doc terminator is missing a newline (which Perl would fail to + # compile but is easy to detect) or if the here-doc block was just not + # terminated at all (which Perl would fail to compile as well). + $token->{_terminator_line} = undef; + if ( @heredoc and defined $heredoc[-1] ) { + # See PPI::Tokenizer, the algorithm there adds a space at the end of the + # document that we need to make sure we remove. + if ( $t->{source_eof_chop} ) { chop $heredoc[-1]; $t->{source_eof_chop} = ''; } + + # Check if the last line of the file matches the terminator without + # newline at the end. If so, remove it from the content and set it as + # the terminator line. + $token->{_terminator_line} = pop @heredoc + if $heredoc[-1] eq $token->{_terminator}; } # Set a hint for PPI::Document->serialize so it can diff --git a/lib/PPI/Token/QuoteLike/Words.pm b/lib/PPI/Token/QuoteLike/Words.pm index 1a69bc8a..fe634331 100644 --- a/lib/PPI/Token/QuoteLike/Words.pm +++ b/lib/PPI/Token/QuoteLike/Words.pm @@ -42,20 +42,27 @@ BEGIN { =head2 literal -Returns the words contained. Note that this method does not check the +Returns the words contained as a list. Note that this method does not check the context that the token is in; it always returns the list and not merely the last element if the token is in scalar context. =cut sub literal { - my $self = shift; - my $section = $self->{sections}->[0]; - return split ' ', substr( - $self->{content}, - $section->{position}, - $section->{size}, - ); + my ( $self ) = @_; + + my $content = $self->_section_content(0); + return if !defined $content; + + # Undo backslash escaping of '\', the left delimiter, + # and the right delimiter. The right delimiter will + # only exist with paired delimiters: qw() qw[] qw<> qw{}. + my ( $left, $right ) = ( $self->_delimiters, '', '' ); + $content =~ s/\\([\Q$left$right\\\E])/$1/g; + + my @words = split ' ', $content; + + return @words; } 1; diff --git a/lib/PPI/Token/Symbol.pm b/lib/PPI/Token/Symbol.pm index 7c2eb891..17733143 100644 --- a/lib/PPI/Token/Symbol.pm +++ b/lib/PPI/Token/Symbol.pm @@ -61,8 +61,8 @@ variations. sub canonical { my $symbol = shift->content; $symbol =~ s/\s+//; - $symbol =~ s/(?<=[\$\@\%\&\*])::/main::/; $symbol =~ s/\'/::/g; + $symbol =~ s/(?<=[\$\@\%\&\*])::/main::/; $symbol; } diff --git a/t/ppi_token_heredoc.t b/t/ppi_token_heredoc.t index 20755dac..309bb452 100644 --- a/t/ppi_token_heredoc.t +++ b/t/ppi_token_heredoc.t @@ -69,8 +69,6 @@ h { }, }; -TODO: { -local $TODO = "parsing bugs need to be fixed yet"; # Tests without a carriage return after the termination marker. h { name => 'Bareword terminator (no return).', @@ -122,7 +120,6 @@ h { _mode => 'literal', }, }; -} # Tests without a terminator. h { diff --git a/t/ppi_token_operator.t b/t/ppi_token_operator.t index b93a8d6f..fe8a26da 100644 --- a/t/ppi_token_operator.t +++ b/t/ppi_token_operator.t @@ -600,7 +600,6 @@ TODO: { OPERATOR_FAT_COMMA: { - my %known_bad = map { $_ => 1 } map { "$_=>2" } qw( default for foreach given goto if last local my next no our package redo require return state unless until use when while ); my @tests = ( { desc => 'integer with integer', @@ -682,15 +681,12 @@ OPERATOR_FAT_COMMA: { if ( $expected->[0] !~ /^PPI::Statement/ ) { unshift @$expected, 'PPI::Statement', $test->{code}; } -TODO: { - local $TODO = $known_bad{$test->{code}} ? "known bug" : undef; my $ok = is_deeply( $tokens, $expected, $test->{desc} ); if ( !$ok ) { diag "$test->{code} ($test->{desc})\n"; diag explain $tokens; diag explain $test->{expected}; } -} } } diff --git a/t/ppi_token_quotelike_words.t b/t/ppi_token_quotelike_words.t index 3d1674c0..85a52615 100644 --- a/t/ppi_token_quotelike_words.t +++ b/t/ppi_token_quotelike_words.t @@ -104,10 +104,7 @@ sub execute_test { my $found = $d->find( 'PPI::Token::QuoteLike::Words' ) || []; is( @$found, 1, "$msg - exactly one qw" ); is( $found->[0]->content, $code, "$msg content()" ); -TODO: { - local $TODO = $known_bad{$code} ? "known bug" : undef; is_deeply( [ $found->[0]->literal ], $expected, "literal()" ); # can't dump $msg, as it breaks TODO parsing -} return; } diff --git a/t/ppi_token_symbol.t b/t/ppi_token_symbol.t index d9062cfd..e9e985af 100644 --- a/t/ppi_token_symbol.t +++ b/t/ppi_token_symbol.t @@ -17,10 +17,7 @@ TOKEN_FROM_PARSE: { parse_and_test( '$x[0]', { content => '$x', canonical => '$x', raw_type => '$', symbol_type => '@', symbol => '@x' } ); parse_and_test( '$x{0}', { content => '$x', canonical => '$x', raw_type => '$', symbol_type => '%', symbol => '%x' } ); parse_and_test( '$::x', { content => '$::x', canonical => '$main::x', raw_type => '$', symbol_type => '$', symbol => '$main::x' } ); - { - local $ENV{TODO} = 'bug in canonical'; - parse_and_test( q{$'x}, { content => q{$'x}, canonical => '$main::x', raw_type => '$', symbol_type => '$', symbol => '$main::x' } ); - } + parse_and_test( q{$'x}, { content => q{$'x}, canonical => '$main::x', raw_type => '$', symbol_type => '$', symbol => '$main::x' } ); parse_and_test( '@x', { content => '@x', canonical => '@x', raw_type => '@', symbol_type => '@', symbol => '@x' } ); parse_and_test( '@x[0]', { content => '@x', canonical => '@x', raw_type => '@', symbol_type => '@', symbol => '@x' } );