From 427efeae13912b66e1c030c7645a2b1f322dff4d Mon Sep 17 00:00:00 2001 From: Boris Kolpackov Date: Thu, 25 May 2017 15:40:06 +0200 Subject: Fix few bugs in C++ lexer and parser --- build2/cc/lexer.cxx | 25 +++++++++++++++++-------- build2/cc/lexer.hxx | 6 +++--- build2/cc/parser.cxx | 30 +++++++++++++++++++++--------- unit-tests/cc/lexer/preprocessor.test | 9 +++++++++ unit-tests/cc/parser/module.test | 8 ++++---- 5 files changed, 54 insertions(+), 24 deletions(-) diff --git a/build2/cc/lexer.cxx b/build2/cc/lexer.cxx index 3eb5d5b..05c734c 100644 --- a/build2/cc/lexer.cxx +++ b/build2/cc/lexer.cxx @@ -76,7 +76,7 @@ namespace build2 using type = token_type; void lexer:: - next (token& t, xchar c) + next (token& t, xchar c, bool ignore_pp) { for (;; c = skip_spaces ()) { @@ -101,18 +101,27 @@ namespace build2 // that we assume there cannot be #include directives. // // This may not work for things like #error that can contain - // pretty much anything. + // pretty much anything. Also note that lines that start with + // # can contain # further down. // - for (;;) + if (ignore_pp) { - c = skip_spaces (false); // Stop at newline. + for (;;) + { + c = skip_spaces (false); // Stop at newline. - if (eos (c) || c == '\n') - break; + if (eos (c) || c == '\n') + break; - next (t, c); // Keep using the passed token for buffers. + next (t, c, false); // Keep using the passed token for buffers. + } + break; + } + else + { + t.type = type::punctuation; + return; } - break; } // Single-letter punctuation. // diff --git a/build2/cc/lexer.hxx b/build2/cc/lexer.hxx index 146b579..7865a4e 100644 --- a/build2/cc/lexer.hxx +++ b/build2/cc/lexer.hxx @@ -85,7 +85,7 @@ namespace build2 next () { token t; - next (t, skip_spaces ()); + next (t, skip_spaces (), true); return t; } @@ -98,13 +98,13 @@ namespace build2 token_type next (token& t) { - next (t, skip_spaces ()); + next (t, skip_spaces (), true); return t.type; } private: void - next (token&, xchar); + next (token&, xchar, bool); void number_literal (token&, xchar); diff --git a/build2/cc/parser.cxx b/build2/cc/parser.cxx index e5079b7..b21e99f 100644 --- a/build2/cc/parser.cxx +++ b/build2/cc/parser.cxx @@ -28,17 +28,21 @@ namespace build2 // If the source has errors then we want the compiler to issues the // diagnostics. However, the errors could as likely be because we are - // mis-parsing things. As a middle ground, we are going to issue - // warnings. + // mis-parsing things. Initially, as a middle ground, we were going to + // issue warnings. But the problem with this approach is that they are + // easy to miss. So for now we fail. // size_t bb (0); // {}-balance. bool ex (false); // True if inside top-level export{} block. token t; - while (l_->next (t) != type::eos) + for (bool n (true); (n ? l_->next (t) : t.type) != type::eos; ) { - // Break to stop, continue to continue. + // Break to stop, continue to continue, set n to false if the + // next token already extracted. // + n = true; + switch (t.type) { case type::lcbrace: @@ -89,16 +93,20 @@ namespace build2 parse_module (t, true); else if (id == "import") parse_import (t); - - // Something else, for example, export namespace. + else + n = false; // Something else (e.g., export namespace). break; } - default: break; + default: n = false; break; } } else if (id == "extern") - l_->next (t); // Skip to make sure not recognized as module. + { + // Skip to make sure not recognized as module. + // + n = l_->next (t) == type::identifier && t.value == "module"; + } } else if (ex && bb == 1) { @@ -116,7 +124,7 @@ namespace build2 } if (bb != 0) - warn (t) << "{}-imbalance detected"; + /*warn*/ fail (t) << "{}-imbalance detected"; return u; } @@ -130,6 +138,8 @@ namespace build2 l_->next (t); // Start of name. string n (parse_module_name (t)); + // Should be {}-balanced. + // for (; t.type != type::eos && t.type != type::semi; l_->next (t)) ; if (t.type != type::semi) @@ -153,6 +163,8 @@ namespace build2 l_->next (t); // Start of name. string n (parse_module_name (t)); + // Should be {}-balanced. + // for (; t.type != type::eos && t.type != type::semi; l_->next (t)) ; if (t.type != type::semi) diff --git a/unit-tests/cc/lexer/preprocessor.test b/unit-tests/cc/lexer/preprocessor.test index 2917649..e082062 100644 --- a/unit-tests/cc/lexer/preprocessor.test +++ b/unit-tests/cc/lexer/preprocessor.test @@ -36,3 +36,12 @@ $* <>EOO +#define FOO(x) #y +; +EOI +';' +EOO diff --git a/unit-tests/cc/parser/module.test b/unit-tests/cc/parser/module.test index f85c969..f2bbaf1 100644 --- a/unit-tests/cc/parser/module.test +++ b/unit-tests/cc/parser/module.test @@ -88,7 +88,7 @@ EOO : brace-missing : -$* <>EOE +$* <>EOE != 0 export { class foo @@ -97,12 +97,12 @@ export module foo; } EOI -stdin:8:1: warning: {}-imbalance detected +stdin:8:1: error: {}-imbalance detected EOE : brace-stray : -$* <>EOE +$* <>EOE != 0 export { class foo @@ -111,7 +111,7 @@ export } module foo; EOI -stdin:6:1: warning: {}-imbalance detected +stdin:6:1: error: {}-imbalance detected EOE : import-missing-name -- cgit v1.1