Skip to content

Commit

Permalink
regen/HeaderParser: correctly handle backslash line continuations
Browse files Browse the repository at this point in the history
C says that backslash-newline pairs should be deleted from the source
before doing any parsing (like a source filter).

    #def\
    ine A\
    B /\
    * this is a comment *\
    / "\\
    n"

is thus equivalent to

    #define AB "\n"

(In particular, backslash-newline should never be replaced by a space.)
  • Loading branch information
mauke authored and demerphq committed Feb 12, 2025
1 parent 8419236 commit cbc3360
Show file tree
Hide file tree
Showing 2 changed files with 78 additions and 14 deletions.
19 changes: 10 additions & 9 deletions regen/HeaderParser.pm
Original file line number Diff line number Diff line change
Expand Up @@ -361,8 +361,8 @@ sub _precedence {
sub parse_expr {
my ($self, $expr)= @_;
if (defined $expr) {
$expr =~ s/\s*\\\n\s*/ /g;
$expr =~ s/defined\s+(\w+)/defined($1)/g;
$expr =~ s/\\\n//g;
$expr =~ s/\bdefined\s+(\w+)/defined($1)/g;
$self->_tokenize_expr($expr);
}
my $ret= $self->_parse_expr();
Expand Down Expand Up @@ -558,20 +558,20 @@ sub parse_fh {
while (defined(my $line= readline($fh))) {
my $start_line_num= $line_num++;
$self->{orig_content} .= $line;
while ($line =~ /\\\n\z/ or $line =~ m</\*(?:(?!\*/).)*\s*\z>s) {
while ($line =~ /\\\n\z/ or $line =~ m</(?:\\\n)*\*(?:(?!\*(?:\\\n)*/).)*\s*\z>s) {
defined(my $read_line= readline($fh))
or last;
$self->{orig_content} .= $read_line;
$line_num++;
$line .= $read_line;
}
while ($line =~ m!/\*(.*?)(\*/|\z)!gs) {
while ($line =~ m!/(?:\\\n)*\*(.*?)(\*(?:\\\n)*/|\z)!gs) {
my ($inner, $tail)= ($1, $2);
if ($tail ne "*/") {
if ($tail eq "") {
confess
"Unterminated comment starting at line $start_line_num\n";
}
elsif ($inner =~ m!/\*!) {
elsif ($inner =~ m!/(?:\\\n)*\*!) {
confess
"Nested/broken comment starting at line $start_line_num\n";
}
Expand All @@ -583,7 +583,7 @@ sub parse_fh {
my $level= @cond;
my $do_pop= 0;
my $flat= $line;
$flat =~ s/\s*\\\n\s*/ /g;
$flat =~ s/\\\n//g;
$flat =~ s!/\*.*?\*/! !gs;
$flat =~ s/\s+/ /g;
$flat =~ s/\s+\z//;
Expand Down Expand Up @@ -1592,8 +1592,9 @@ C preprocessor files are a bit tricky to parse properly, especially with a
=item Line Continuations
Any line ending in "\\\n" (that is backslash newline) is considered to be part
of a longer string which continues on the next line. Processors should replace
the "\\\n" typically with a space when converting to a "real" line.
of a longer string which continues on the next line. Processors should delete
the "\\\n" early on when converting to a "real" line, before doing any further
parsing.
=item Comments Acting As A Line Continuation
Expand Down
73 changes: 68 additions & 5 deletions t/porting/header_parser.t
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,16 @@ $hp->parse_text(<<~'EOF');
line */
#define C /* this is
a hidden line continuation */ D
#de\
fine E\
F 42 /\
* a different kind of
hidden line continuation *\
/
#\
if defined E\
F
#endif
# /* null directive */
#error #undef
#error #pragma
Expand Down Expand Up @@ -217,6 +227,51 @@ is($lines_as_str,<<~'DUMP_EOF', "Simple data structure as expected") or show_tex
"sub_type" => "#define",
"type" => "content"
}, 'HeaderLine' ),
bless( {
"cond" => [],
"flat" => "#define EF 42",
"level" => 0,
"line" => "#de\\\nfine E\\\nF 42 /\\\n* a different kind of\nhidden line continuation *\\\n/\n",
"n_lines" => 6,
"raw" => "#de\\\nfine E\\\nF 42 /\\\n* a different kind of\nhidden line continuation *\\\n/\n",
"source" => "(buffer)",
"start_line_num" => 13,
"sub_type" => "#define",
"type" => "content"
}, 'HeaderLine' ),
bless( {
"cond" => [
[
"defined(EF)"
]
],
"flat" => "#if defined(EF)",
"level" => 0,
"line" => "#if defined(EF)\n",
"n_lines" => 3,
"raw" => "#\\\nif defined E\\\nF\n",
"source" => "(buffer)",
"start_line_num" => 19,
"sub_type" => "#if",
"type" => "cond"
}, 'HeaderLine' ),
bless( {
"cond" => [
[
"defined(EF)"
]
],
"flat" => "#endif",
"inner_lines" => 3,
"level" => 0,
"line" => "#endif\n",
"n_lines" => 1,
"raw" => "#endif\n",
"source" => "(buffer)",
"start_line_num" => 22,
"sub_type" => "#endif",
"type" => "cond"
}, 'HeaderLine' ),
bless( {
"cond" => [],
"flat" => "#",
Expand All @@ -225,7 +280,7 @@ is($lines_as_str,<<~'DUMP_EOF', "Simple data structure as expected") or show_tex
"n_lines" => 1,
"raw" => "# /* null directive */\n",
"source" => "(buffer)",
"start_line_num" => 13,
"start_line_num" => 23,
"sub_type" => "text",
"type" => "content"
}, 'HeaderLine' ),
Expand All @@ -237,7 +292,7 @@ is($lines_as_str,<<~'DUMP_EOF', "Simple data structure as expected") or show_tex
"n_lines" => 1,
"raw" => "#error #undef\n",
"source" => "(buffer)",
"start_line_num" => 14,
"start_line_num" => 24,
"sub_type" => "#error",
"type" => "content"
}, 'HeaderLine' ),
Expand All @@ -249,7 +304,7 @@ is($lines_as_str,<<~'DUMP_EOF', "Simple data structure as expected") or show_tex
"n_lines" => 1,
"raw" => "#error #pragma\n",
"source" => "(buffer)",
"start_line_num" => 15,
"start_line_num" => 25,
"sub_type" => "#error",
"type" => "content"
}, 'HeaderLine' ),
Expand All @@ -261,7 +316,7 @@ is($lines_as_str,<<~'DUMP_EOF', "Simple data structure as expected") or show_tex
"n_lines" => 1,
"raw" => "#error #include\n",
"source" => "(buffer)",
"start_line_num" => 16,
"start_line_num" => 26,
"sub_type" => "#error",
"type" => "content"
}, 'HeaderLine' ),
Expand All @@ -273,7 +328,7 @@ is($lines_as_str,<<~'DUMP_EOF', "Simple data structure as expected") or show_tex
"n_lines" => 1,
"raw" => "#error #define\n",
"source" => "(buffer)",
"start_line_num" => 17,
"start_line_num" => 27,
"sub_type" => "#error",
"type" => "content"
}, 'HeaderLine' )
Expand All @@ -293,6 +348,14 @@ is($normal,<<~'EOF',"Normalized text as expected");
line */
#define C /* this is
a hidden line continuation */ D
#de\
fine E\
F 42 /\
* a different kind of
hidden line continuation *\
/
#if defined(EF)
#endif
# /* null directive */
#error #undef
#error #pragma
Expand Down

0 comments on commit cbc3360

Please sign in to comment.