Skip to content

Fix parsing here-docs without a carriage return after the terminator #72

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Makefile.PL
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ requires 'Digest::MD5' => '2.35';
requires 'Storable' => '2.17';

# Test-time dependencies (bundle as many as we can)
test_requires 'Test::Deep';
test_requires 'Test::More' => '0.86';
test_requires 'Test::NoWarnings' => '0.084';
test_requires 'Test::Object' => '0.07';
Expand Down
36 changes: 17 additions & 19 deletions lib/PPI/Token/HereDoc.pm
Original file line number Diff line number Diff line change
Expand Up @@ -201,14 +201,10 @@ sub __TOKENIZER__on_char {
return undef;
}

# Define $line outside of the loop, so that if we encounter the
# end of the file, we have access to the last line still.
my $line;

# Suck in the HEREDOC
$token->{_heredoc} = [];
my $terminator = $token->{_terminator} . "\n";
while ( defined($line = $t->_get_line) ) {
while ( defined( my $line = $t->_get_line ) ) {
if ( $line eq $terminator ) {
# Keep the actual termination line for consistency
# when we are re-assembling the file
Expand All @@ -224,24 +220,26 @@ sub __TOKENIZER__on_char {

# End of file.
# Error: Didn't reach end of here-doc before end of file.
# $line might be undef if we get NO lines.
if ( defined $line and $line eq $token->{_terminator} ) {
# If the last line matches the terminator
# but is missing the newline, we want to allow
# it anyway (like perl itself does). In this case
# perl would normally throw a warning, but we will
# also ignore that as well.
pop @{$token->{_heredoc}};
$token->{_terminator_line} = $line;
} else {
# The HereDoc was not properly terminated.
$token->{_terminator_line} = undef;

# Trim off the trailing whitespace
if ( defined $token->{_heredoc}->[-1] and $t->{source_eof_chop} ) {
# If the here-doc block is not empty, look at the last line to determine if
# the here-doc terminator is missing a newline (which Perl would fail to
# compile but is easy to detect) or if the here-doc block was just not
# terminated at all (which Perl would fail to compile as well).
$token->{_terminator_line} = undef;
if ( scalar( @{$token->{_heredoc}} ) != 0 && defined( $token->{_heredoc}->[-1] ) )
{
# See PPI::Tokenizer, the algorithm there adds a space at the end of the
# document that we need to make sure we remove.
if ( $t->{source_eof_chop} ) {
chop $token->{_heredoc}->[-1];
$t->{source_eof_chop} = '';
}

# Check if the last line of the file matches the terminator without newline
# at the end. If so, remove it from the content and set it as the
# terminator line.
$token->{_terminator_line} = pop @{$token->{_heredoc}}
if $token->{_heredoc}->[-1] eq $token->{_terminator};
}

# Set a hint for PPI::Document->serialize so it can
Expand Down
172 changes: 172 additions & 0 deletions t/ppi_token_heredoc.t
Original file line number Diff line number Diff line change
@@ -0,0 +1,172 @@
#!/usr/bin/perl

# Unit testing for PPI::Token::HereDoc

use strict;
BEGIN {
$| = 1;
$^W = 1;
no warnings 'once';
$PPI::XS_DISABLE = 1;
$PPI::Lexer::X_TOKENIZER ||= $ENV{X_TOKENIZER};
}
use Test::Deep;
use Test::More tests => 12;
use Test::NoWarnings;
use PPI;


# List of tests to perform. Each test requires the following information:
# - 'name': the name of the test in the output.
# - 'content': the Perl string to parse using PPI.
# - 'expected': a hashref with the keys being property names on the
# PPI::Token::HereDoc object, and the values being the expected value of
# that property after the heredoc block has been parsed.
my $tests = [
# Tests with a carriage return after the termination marker.
{
name => 'Bareword terminator.',
content => "my \$heredoc = <<HERE;\nLine 1\nLine 2\nHERE\n",
expected => {
_terminator_line => "HERE\n",
_damaged => undef,
_terminator => 'HERE',
_mode => 'interpolate',
},
},
{
name => 'Single-quoted bareword terminator.',
content => "my \$heredoc = <<'HERE';\nLine 1\nLine 2\nHERE\n",
expected => {
_terminator_line => "HERE\n",
_damaged => undef,
_terminator => 'HERE',
_mode => 'literal',
},
},
{
name => 'Double-quoted bareword terminator.',
content => "my \$heredoc = <<\"HERE\";\nLine 1\nLine 2\nHERE\n",
expected => {
_terminator_line => "HERE\n",
_damaged => undef,
_terminator => 'HERE',
_mode => 'interpolate',
},
},
{
name => 'Command-quoted terminator.',
content => "my \$heredoc = <<`HERE`;\nLine 1\nLine 2\nHERE\n",
expected => {
_terminator_line => "HERE\n",
_damaged => undef,
_terminator => 'HERE',
_mode => 'command',
},
},
{
name => 'Legacy escaped bareword terminator.',
content => "my \$heredoc = <<\\HERE;\nLine 1\nLine 2\nHERE\n",
expected => {
_terminator_line => "HERE\n",
_damaged => undef,
_terminator => 'HERE',
_mode => 'literal',
},
},
# Tests without a carriage return after the termination marker.
{
name => 'Bareword terminator (no return).',
content => "my \$heredoc = <<HERE;\nLine 1\nLine 2\nHERE",
expected => {
_terminator_line => 'HERE',
_damaged => 1,
_terminator => 'HERE',
_mode => 'interpolate',
},
},
{
name => 'Single-quoted bareword terminator (no return).',
content => "my \$heredoc = <<'HERE';\nLine 1\nLine 2\nHERE",
expected => {
_terminator_line => "HERE",
_damaged => 1,
_terminator => 'HERE',
_mode => 'literal',
},
},
{
name => 'Double-quoted bareword terminator (no return).',
content => "my \$heredoc = <<\"HERE\";\nLine 1\nLine 2\nHERE",
expected => {
_terminator_line => 'HERE',
_damaged => 1,
_terminator => 'HERE',
_mode => 'interpolate',
},
},
{
name => 'Command-quoted terminator (no return).',
content => "my \$heredoc = <<`HERE`;\nLine 1\nLine 2\nHERE",
expected => {
_terminator_line => 'HERE',
_damaged => 1,
_terminator => 'HERE',
_mode => 'command',
},
},
{
name => 'Legacy escaped bareword terminator (no return).',
content => "my \$heredoc = <<\\HERE;\nLine 1\nLine 2\nHERE",
expected => {
_terminator_line => 'HERE',
_damaged => 1,
_terminator => 'HERE',
_mode => 'literal',
},
},
# Tests without a terminator.
{
name => 'Unterminated heredoc block.',
content => "my \$heredoc = <<HERE;\nLine 1\nLine 2\n",
expected => {
_terminator_line => undef,
_damaged => 1,
_terminator => 'HERE',
_mode => 'interpolate',
},
}
];

foreach my $test ( @$tests ) {
subtest(
$test->{'name'},
sub {
plan( tests => 6 + keys %{ $test->{'expected'} } );

my $document = PPI::Document->new( \$test->{'content'} );
isa_ok( $document, 'PPI::Document' );

my $heredocs = $document->find('Token::HereDoc');
is( ref($heredocs), 'ARRAY', 'Found heredocs.' );
is( scalar(@$heredocs), 1, 'Found 1 heredoc block.' );
my $heredoc = $heredocs->[0];
isa_ok( $heredoc, 'PPI::Token::HereDoc');
can_ok( $heredoc, 'heredoc' );

my @content = $heredoc->heredoc();
is_deeply(
\@content,
[
"Line 1\n",
"Line 2\n",
],
'The returned content does not include the heredoc terminator.',
) || diag( "heredoc() returned ", explain( \@content ) );

foreach my $key ( keys %{ $test->{'expected'} } ) {
is( $heredoc->{ $key }, $test->{'expected'}->{ $key }, "Verify value for property '$key'." );
}
}
);
}
14 changes: 0 additions & 14 deletions t/ppi_token_operator.t
Original file line number Diff line number Diff line change
Expand Up @@ -33,20 +33,6 @@ FIND_ONE_OP: {
}


HEREDOC: {
my $source = '$a = <<PERL_END;' . "\n" . 'PERL_END';
my $doc = PPI::Document->new( \$source );
isa_ok( $doc, 'PPI::Document', "parsed '$source'" );
my $ops = $doc->find( 'Token::HereDoc' );
is( ref $ops, 'ARRAY', "found heredoc" );
is( @$ops, 1, "heredoc found exactly once" );

$ops = $doc->find( 'Token::Operator' );
is( ref $ops, 'ARRAY', "operator = found operators in heredoc test" );
is( @$ops, 1, "operator = found exactly once in heredoc test" );
}


PARSE_ALL_OPERATORS: {
foreach my $op ( sort keys %PPI::Token::Operator::OPERATOR ) {
my $source = $op eq '<>' ? '<>;' : "\$foo $op 2;";
Expand Down