From 427efeae13912b66e1c030c7645a2b1f322dff4d Mon Sep 17 00:00:00 2001
From: Boris Kolpackov <boris@codesynthesis.com>
Date: Thu, 25 May 2017 15:40:06 +0200
Subject: Fix few bugs in C++ lexer and parser

---
 build2/cc/lexer.cxx  | 25 +++++++++++++++++--------
 build2/cc/lexer.hxx  |  6 +++---
 build2/cc/parser.cxx | 30 +++++++++++++++++++++---------
 3 files changed, 41 insertions(+), 20 deletions(-)

(limited to 'build2')

diff --git a/build2/cc/lexer.cxx b/build2/cc/lexer.cxx
index 3eb5d5b..05c734c 100644
--- a/build2/cc/lexer.cxx
+++ b/build2/cc/lexer.cxx
@@ -76,7 +76,7 @@ namespace build2
     using type = token_type;
 
     void lexer::
-    next (token& t, xchar c)
+    next (token& t, xchar c, bool ignore_pp)
     {
       for (;; c = skip_spaces ())
       {
@@ -101,18 +101,27 @@ namespace build2
             // that we assume there cannot be #include directives.
             //
             // This may not work for things like #error that can contain
-            // pretty much anything.
+            // pretty much anything. Also note that lines that start with
+            // # can contain # further down.
             //
-            for (;;)
+            if (ignore_pp)
             {
-              c = skip_spaces (false); // Stop at newline.
+              for (;;)
+              {
+                c = skip_spaces (false); // Stop at newline.
 
-              if (eos (c) || c == '\n')
-                break;
+                if (eos (c) || c == '\n')
+                  break;
 
-              next (t, c); // Keep using the passed token for buffers.
+                next (t, c, false); // Keep using the passed token for buffers.
+              }
+              break;
+            }
+            else
+            {
+              t.type = type::punctuation;
+              return;
             }
-            break;
           }
           // Single-letter punctuation.
           //
diff --git a/build2/cc/lexer.hxx b/build2/cc/lexer.hxx
index 146b579..7865a4e 100644
--- a/build2/cc/lexer.hxx
+++ b/build2/cc/lexer.hxx
@@ -85,7 +85,7 @@ namespace build2
       next ()
       {
         token t;
-        next (t, skip_spaces ());
+        next (t, skip_spaces (), true);
         return t;
       }
 
@@ -98,13 +98,13 @@ namespace build2
       token_type
       next (token& t)
       {
-        next (t, skip_spaces ());
+        next (t, skip_spaces (), true);
         return t.type;
       }
 
     private:
       void
-      next (token&, xchar);
+      next (token&, xchar, bool);
 
       void
       number_literal (token&, xchar);
diff --git a/build2/cc/parser.cxx b/build2/cc/parser.cxx
index e5079b7..b21e99f 100644
--- a/build2/cc/parser.cxx
+++ b/build2/cc/parser.cxx
@@ -28,17 +28,21 @@ namespace build2
 
       // If the source has errors then we want the compiler to issues the
       // diagnostics. However, the errors could as likely be because we are
-      // mis-parsing things. As a middle ground, we are going to issue
-      // warnings.
+      // mis-parsing things. Initially, as a middle ground, we were going to
+      // issue warnings. But the problem with this approach is that they are
+      // easy to miss. So for now we fail.
       //
       size_t bb (0);     // {}-balance.
       bool   ex (false); // True if inside top-level export{} block.
 
       token t;
-      while (l_->next (t) != type::eos)
+      for (bool n (true); (n ? l_->next (t) : t.type) != type::eos; )
       {
-        // Break to stop, continue to continue.
+        // Break to stop, continue to continue, set n to false if the
+        // next token already extracted.
         //
+        n = true;
+
         switch (t.type)
         {
         case type::lcbrace:
@@ -89,16 +93,20 @@ namespace build2
                       parse_module (t, true);
                     else if (id == "import")
                       parse_import (t);
-
-                    // Something else, for example, export namespace.
+                    else
+                      n = false; // Something else (e.g., export namespace).
 
                     break;
                   }
-                default: break;
+                default: n = false; break;
                 }
               }
               else if (id == "extern")
-                l_->next (t); // Skip to make sure not recognized as module.
+              {
+                // Skip to make sure not recognized as module.
+                //
+                n = l_->next (t) == type::identifier && t.value == "module";
+              }
             }
             else if (ex && bb == 1)
             {
@@ -116,7 +124,7 @@ namespace build2
       }
 
       if (bb != 0)
-        warn (t) << "{}-imbalance detected";
+        /*warn*/ fail (t) << "{}-imbalance detected";
 
       return u;
     }
@@ -130,6 +138,8 @@ namespace build2
       l_->next (t); // Start of name.
       string n (parse_module_name (t));
 
+      // Should be {}-balanced.
+      //
       for (; t.type != type::eos && t.type != type::semi; l_->next (t)) ;
 
       if (t.type != type::semi)
@@ -153,6 +163,8 @@ namespace build2
       l_->next (t); // Start of name.
       string n (parse_module_name (t));
 
+      // Should be {}-balanced.
+      //
       for (; t.type != type::eos && t.type != type::semi; l_->next (t)) ;
 
       if (t.type != type::semi)
-- 
cgit v1.1