From 427efeae13912b66e1c030c7645a2b1f322dff4d Mon Sep 17 00:00:00 2001 From: Boris Kolpackov Date: Thu, 25 May 2017 15:40:06 +0200 Subject: Fix few bugs in C++ lexer and parser --- build2/cc/lexer.cxx | 25 +++++++++++++++++-------- build2/cc/lexer.hxx | 6 +++--- build2/cc/parser.cxx | 30 +++++++++++++++++++++--------- 3 files changed, 41 insertions(+), 20 deletions(-) (limited to 'build2') diff --git a/build2/cc/lexer.cxx b/build2/cc/lexer.cxx index 3eb5d5b..05c734c 100644 --- a/build2/cc/lexer.cxx +++ b/build2/cc/lexer.cxx @@ -76,7 +76,7 @@ namespace build2 using type = token_type; void lexer:: - next (token& t, xchar c) + next (token& t, xchar c, bool ignore_pp) { for (;; c = skip_spaces ()) { @@ -101,18 +101,27 @@ namespace build2 // that we assume there cannot be #include directives. // // This may not work for things like #error that can contain - // pretty much anything. + // pretty much anything. Also note that lines that start with + // # can contain # further down. // - for (;;) + if (ignore_pp) { - c = skip_spaces (false); // Stop at newline. + for (;;) + { + c = skip_spaces (false); // Stop at newline. - if (eos (c) || c == '\n') - break; + if (eos (c) || c == '\n') + break; - next (t, c); // Keep using the passed token for buffers. + next (t, c, false); // Keep using the passed token for buffers. + } + break; + } + else + { + t.type = type::punctuation; + return; } - break; } // Single-letter punctuation. // diff --git a/build2/cc/lexer.hxx b/build2/cc/lexer.hxx index 146b579..7865a4e 100644 --- a/build2/cc/lexer.hxx +++ b/build2/cc/lexer.hxx @@ -85,7 +85,7 @@ namespace build2 next () { token t; - next (t, skip_spaces ()); + next (t, skip_spaces (), true); return t; } @@ -98,13 +98,13 @@ namespace build2 token_type next (token& t) { - next (t, skip_spaces ()); + next (t, skip_spaces (), true); return t.type; } private: void - next (token&, xchar); + next (token&, xchar, bool); void number_literal (token&, xchar); diff --git a/build2/cc/parser.cxx b/build2/cc/parser.cxx index e5079b7..b21e99f 100644 --- a/build2/cc/parser.cxx +++ b/build2/cc/parser.cxx @@ -28,17 +28,21 @@ namespace build2 // If the source has errors then we want the compiler to issues the // diagnostics. However, the errors could as likely be because we are - // mis-parsing things. As a middle ground, we are going to issue - // warnings. + // mis-parsing things. Initially, as a middle ground, we were going to + // issue warnings. But the problem with this approach is that they are + // easy to miss. So for now we fail. // size_t bb (0); // {}-balance. bool ex (false); // True if inside top-level export{} block. token t; - while (l_->next (t) != type::eos) + for (bool n (true); (n ? l_->next (t) : t.type) != type::eos; ) { - // Break to stop, continue to continue. + // Break to stop, continue to continue, set n to false if the + // next token already extracted. // + n = true; + switch (t.type) { case type::lcbrace: @@ -89,16 +93,20 @@ namespace build2 parse_module (t, true); else if (id == "import") parse_import (t); - - // Something else, for example, export namespace. + else + n = false; // Something else (e.g., export namespace). break; } - default: break; + default: n = false; break; } } else if (id == "extern") - l_->next (t); // Skip to make sure not recognized as module. + { + // Skip to make sure not recognized as module. + // + n = l_->next (t) == type::identifier && t.value == "module"; + } } else if (ex && bb == 1) { @@ -116,7 +124,7 @@ namespace build2 } if (bb != 0) - warn (t) << "{}-imbalance detected"; + /*warn*/ fail (t) << "{}-imbalance detected"; return u; } @@ -130,6 +138,8 @@ namespace build2 l_->next (t); // Start of name. string n (parse_module_name (t)); + // Should be {}-balanced. + // for (; t.type != type::eos && t.type != type::semi; l_->next (t)) ; if (t.type != type::semi) @@ -153,6 +163,8 @@ namespace build2 l_->next (t); // Start of name. string n (parse_module_name (t)); + // Should be {}-balanced. + // for (; t.type != type::eos && t.type != type::semi; l_->next (t)) ; if (t.type != type::semi) -- cgit v1.1