aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBoris Kolpackov <boris@codesynthesis.com>2017-05-28 11:57:41 +0200
committerBoris Kolpackov <boris@codesynthesis.com>2017-05-28 11:57:41 +0200
commitb30107085c003b364fc11ba84b5e7130d0926940 (patch)
tree4defb98a3e707e1ab3137ccb555d85b186001382
parent2e19434e09b819105055ddc8e58f69db98ec8669 (diff)
Diagnose #include directives in C/C++ lexer
-rw-r--r--build2/cc/lexer.cxx48
-rw-r--r--build2/cc/lexer.hxx8
-rw-r--r--unit-tests/cc/lexer/preprocessor.test8
3 files changed, 41 insertions, 23 deletions
diff --git a/build2/cc/lexer.cxx b/build2/cc/lexer.cxx
index 40178bb..faf73c8 100644
--- a/build2/cc/lexer.cxx
+++ b/build2/cc/lexer.cxx
@@ -102,6 +102,8 @@ namespace build2
return;
}
+ const location l (&name_, c.line, c.column);
+
switch (c)
{
// Preprocessor lines.
@@ -110,12 +112,12 @@ namespace build2
{
// It is tempting to simply scan until the newline ignoring
// anything in between. However, these lines can start a
- // multi-line C-style comment. So we have to tokenize it. Note
- // that we assume there cannot be #include directives.
+ // multi-line C-style comment. So we have to tokenize them.
//
- // This may not work for things like #error that can contain
- // pretty much anything. Also note that lines that start with #
- // can contain # further down.
+ // Note that this may not work for things like #error that can
+ // contain pretty much anything. Also note that lines that start
+ // with '#' can contain '#' further down. In this case we need to
+ // be careful not to recurse (and consume multiple newlines).
//
// Finally, to support diagnostics properly we need to recognize
// #line directives.
@@ -140,10 +142,22 @@ namespace build2
// #line <integer> [<string literal>] ...
// # <integer> [<string literal>] ...
//
+ // Also diagnose #include while at it.
+ //
if (!(c >= '0' && c <= '9'))
{
next (t, c, false);
+ if (t.type == type::identifier)
+ {
+ if (t.value == "include")
+ fail (l) << "unexpected #include directive";
+ else if (t.value != "line")
+ continue;
+ }
+ else
+ continue;
+
if (t.type != type::identifier || t.value != "line")
continue;
@@ -505,15 +519,14 @@ namespace build2
void lexer::
char_literal (token& t, xchar c)
{
- uint64_t ln (c.line);
- uint64_t cn (c.column);
+ const location l (&name_, c.line, c.column);
for (char p (c);;) // Previous character (see below).
{
c = get ();
if (eos (c) || c == '\n')
- fail (location (&name_, ln, cn)) << "unterminated character literal";
+ fail (l) << "unterminated character literal";
if (c == '\'' && p != '\\')
break;
@@ -535,15 +548,14 @@ namespace build2
void lexer::
string_literal (token& t, xchar c)
{
- uint64_t ln (c.line);
- uint64_t cn (c.column);
+ const location l (&name_, c.line, c.column);
for (char p (c);;) // Previous character (see below).
{
c = get ();
if (eos (c) || c == '\n')
- fail (location (&name_, ln, cn)) << "unterminated string literal";
+ fail (l) << "unterminated string literal";
if (c == '\"' && p != '\\')
break;
@@ -576,8 +588,7 @@ namespace build2
// Note that the <raw_characters> are not processed in any way, not even
// for line continuations.
//
- uint64_t ln (c.line);
- uint64_t cn (c.column);
+ const location l (&name_, c.line, c.column);
// As a first step, parse the delimiter (including the openning paren).
//
@@ -588,7 +599,7 @@ namespace build2
c = get ();
if (eos (c) || c == '\"' || c == ')' || c == '\\' || c == ' ')
- fail (location (&name_, ln, cn)) << "invalid raw string literal";
+ fail (l) << "invalid raw string literal";
if (c == '(')
break;
@@ -606,7 +617,7 @@ namespace build2
c = get (false); // No newline escaping.
if (eos (c)) // Note: newline is ok.
- fail (location (&name_, ln, cn)) << "invalid raw string literal";
+ fail (l) << "invalid raw string literal";
if (c != d[i] && i != 0) // Restart from the beginning.
i = 0;
@@ -664,18 +675,17 @@ namespace build2
if (c == '\"')
{
+ const location l (&name_, c.line, c.column);
+
string s (move (log_file_).string ()); // Move string rep out.
s.clear ();
- uint64_t ln (c.line);
- uint64_t cn (c.column);
-
for (char p ('\0'); p != '\"'; ) // Previous character.
{
c = get ();
if (eos (c) || c == '\n')
- fail (location (&name_, ln, cn)) << "unterminated string literal";
+ fail (l) << "unterminated string literal";
// Handle escapes.
//
diff --git a/build2/cc/lexer.hxx b/build2/cc/lexer.hxx
index 8767606..c74a0a9 100644
--- a/build2/cc/lexer.hxx
+++ b/build2/cc/lexer.hxx
@@ -22,10 +22,10 @@ namespace build2
//
// The input is a (partially-)preprocessed translation unit that may still
// contain comments, line continuations, and preprocessor directives such
- // as #line, #pragma, etc., but not #include's. Currently all preprocessor
- // directives except #line are ignored and no values are saved from
- // literals. The #line directive (and its shorthand notation) is
- // recognized to provide the logical token location.
+ // as #line, #pragma, but not #include (which is diagnosed). Currently,
+ // all preprocessor directives except #line are ignored and no values are
+ // saved from literals. The #line directive (and its shorthand notation)
+ // is recognized to provide the logical token location.
//
enum class token_type
{
diff --git a/unit-tests/cc/lexer/preprocessor.test b/unit-tests/cc/lexer/preprocessor.test
index a3fab9f..13b8baf 100644
--- a/unit-tests/cc/lexer/preprocessor.test
+++ b/unit-tests/cc/lexer/preprocessor.test
@@ -55,6 +55,14 @@ EOI
';' test.cxx:5:1
EOO
+: include
+:
+$* <<EOI 2>>EOE != 0
+#include <foo/bar>
+EOI
+stdin:1:1: error: unexpected #include directive
+EOE
+
: nested
:
$* <<EOI >>EOO