From 4a9ee48613cf5c59e071400280b62358eb79987e Mon Sep 17 00:00:00 2001
From: Boris Kolpackov <boris@codesynthesis.com>
Date: Mon, 2 Mar 2015 09:52:59 +0200
Subject: Indicate whether token is separated from previous one by whitespaces

---
 build/lexer      |  32 ++++++-----
 build/lexer.cxx  | 159 ++++++++++++++++++++++++++++++-------------------------
 build/parser     |   2 +-
 build/parser.cxx |   2 +-
 build/token      |  14 +++--
 5 files changed, 117 insertions(+), 92 deletions(-)

(limited to 'build')

diff --git a/build/lexer b/build/lexer
index 1723ae0..67a94c8 100644
--- a/build/lexer
+++ b/build/lexer
@@ -20,11 +20,11 @@ namespace build
   public:
     lexer (std::istream& is, const std::string& name): is_ (is), fail (name) {}
 
+    // Scanner.
+    //
     token
     next ();
 
-    // Character interface.
-    //
   private:
     class xchar
     {
@@ -50,6 +50,22 @@ namespace build
       std::uint64_t c_;
     };
 
+    token
+    name (xchar, bool separated);
+
+    // Return true we have seen any spaces. Skipped empty lines don't
+    // count. In other words, we are only interested in spaces that
+    // are on the same line as the following non-space character.
+    //
+    bool
+    skip_spaces ();
+
+    xchar
+    escape ();
+
+    // Character interface.
+    //
+  private:
     xchar
     peek ();
 
@@ -67,18 +83,6 @@ namespace build
       return c.value () == xchar::traits_type::eof ();
     }
 
-    // Scanner.
-    //
-  private:
-    xchar
-    escape ();
-
-    void
-    skip_spaces ();
-
-    token
-    name (xchar);
-
     // Diagnostics.
     //
   private:
diff --git a/build/lexer.cxx b/build/lexer.cxx
index 6836322..4e29b43 100644
--- a/build/lexer.cxx
+++ b/build/lexer.cxx
@@ -11,13 +11,13 @@ namespace build
   token lexer::
   next ()
   {
-    skip_spaces ();
+    bool sep (skip_spaces ());
 
     xchar c (get ());
     uint64_t ln (c.line ()), cn (c.column ());
 
     if (is_eos (c))
-      return token (token_type::eos, ln, cn);
+      return token (token_type::eos, sep, ln, cn);
 
     switch (c)
     {
@@ -30,15 +30,15 @@ namespace build
         if (mode_ == mode::value)
           mode_ = mode::normal;
 
-        return token (token_type::newline, ln, cn);
+        return token (token_type::newline, sep, ln, cn);
       }
     case '{':
       {
-        return token (token_type::lcbrace, ln, cn);
+        return token (token_type::lcbrace, sep, ln, cn);
       }
     case '}':
       {
-        return token (token_type::rcbrace, ln, cn);
+        return token (token_type::rcbrace, sep, ln, cn);
       }
     }
 
@@ -53,12 +53,12 @@ namespace build
       {
       case ':':
         {
-          return token (token_type::colon, ln, cn);
+          return token (token_type::colon, sep, ln, cn);
         }
       case '=':
         {
           mode_ = mode::value;
-          return token (token_type::equal, ln, cn);
+          return token (token_type::equal, sep, ln, cn);
         }
       case '+':
         {
@@ -66,79 +66,18 @@ namespace build
             fail (c) << "expected = after +";
 
           mode_ = mode::value;
-          return token (token_type::plus_equal, ln, cn);
+          return token (token_type::plus_equal, sep, ln, cn);
         }
       }
     }
 
     // Otherwise it is a name.
     //
-    return name (c);
-  }
-
-  lexer::xchar lexer::
-  escape ()
-  {
-    xchar c (get ());
-
-    if (is_eos (c))
-      fail (c) << "unterminated escape sequence";
-
-    return c;
-  }
-
-  void lexer::
-  skip_spaces ()
-  {
-    xchar c (peek ());
-    bool start (c.column () == 1);
-
-    for (; !is_eos (c); c = peek ())
-    {
-      switch (c)
-      {
-      case ' ':
-      case '\t':
-        break;
-      case '\n':
-        {
-          // Skip empty lines.
-          //
-          if (start)
-            break;
-
-          return;
-        }
-      case '#':
-        {
-          get ();
-
-          // Read until newline or eos.
-          //
-          for (c = peek (); !is_eos (c) && c != '\n'; c = peek ())
-            get ();
-          continue;
-        }
-      case '\\':
-        {
-          get ();
-
-          if (peek () == '\n')
-            break;
-
-          unget (c);
-          // Fall through.
-        }
-      default:
-        return; // Not a space.
-      }
-
-      get ();
-    }
+    return name (c, sep);
   }
 
   token lexer::
-  name (xchar c)
+  name (xchar c, bool sep)
   {
     uint64_t ln (c.line ()), cn (c.column ());
     string lexeme;
@@ -198,7 +137,83 @@ namespace build
         break;
     }
 
-    return token (lexeme, ln, cn);
+    return token (lexeme, sep, ln, cn);
+  }
+
+  bool lexer::
+  skip_spaces ()
+  {
+    bool r (false);
+
+    xchar c (peek ());
+    bool start (c.column () == 1);
+
+    for (; !is_eos (c); c = peek ())
+    {
+      switch (c)
+      {
+      case ' ':
+      case '\t':
+        {
+          r = true;
+          break;
+        }
+      case '\n':
+        {
+          // Skip empty lines.
+          //
+          if (start)
+          {
+            r = false;
+            break;
+          }
+
+          return r;
+        }
+      case '#':
+        {
+          get ();
+
+          // Read until newline or eos.
+          //
+          for (c = peek (); !is_eos (c) && c != '\n'; c = peek ())
+            get ();
+
+          r = true;
+          continue;
+        }
+      case '\\':
+        {
+          get ();
+
+          if (peek () == '\n')
+          {
+            r = true;
+            break;
+          }
+
+          unget (c);
+          // Fall through.
+        }
+      default:
+        return r; // Not a space.
+      }
+
+      get ();
+    }
+
+    return r;
+  }
+
+  lexer::xchar lexer::
+  escape ()
+  {
+    xchar c (get ());
+
+    if (is_eos (c))
+      fail (c) << "unterminated escape sequence";
+
+    return c;
   }
 
   lexer::xchar lexer::
diff --git a/build/parser b/build/parser
index 4f099fe..53fc83c 100644
--- a/build/parser
+++ b/build/parser
@@ -77,7 +77,7 @@ namespace build
     lexer* lexer_;
     scope* scope_;
 
-    token peek_ {token_type::eos, 0, 0};
+    token peek_ {token_type::eos, false, 0, 0};
     bool peeked_ {false};
 
     std::unordered_set<path> include_;
diff --git a/build/parser.cxx b/build/parser.cxx
index d887d5f..6eca0e6 100644
--- a/build/parser.cxx
+++ b/build/parser.cxx
@@ -62,7 +62,7 @@ namespace build
     lexer_ = &l;
     scope_ = &s;
 
-    token t (type::eos, 0, 0);
+    token t (type::eos, false, 0, 0);
     type tt;
     next (t, tt);
 
diff --git a/build/token b/build/token
index a071987..e6930ff 100644
--- a/build/token
+++ b/build/token
@@ -31,6 +31,11 @@ namespace build
     token_type
     type () const {return t_;}
 
+    // Token is whitespace-separated from the previous token.
+    //
+    bool
+    separated () const {return s_;}
+
     std::string const&
     name () const {assert (t_ == token_type::name); return n_;}
 
@@ -38,14 +43,15 @@ namespace build
     std::uint64_t column () const {return c_;}
 
   public:
-    token (token_type t, std::uint64_t l, std::uint64_t c)
-        : t_ (t), l_ (l), c_ (c) {}
+    token (token_type t, bool s, std::uint64_t l, std::uint64_t c)
+        : t_ (t), s_ (s), l_ (l), c_ (c) {}
 
-    token (std::string n, std::uint64_t l, std::uint64_t c)
-        : t_ (token_type::name), n_ (std::move (n)), l_ (l), c_ (c) {}
+    token (std::string n, bool s, std::uint64_t l, std::uint64_t c)
+        : t_ (token_type::name), s_ (s), n_ (std::move (n)), l_ (l), c_ (c) {}
 
   private:
     token_type t_;
+    bool s_;
     std::string n_;
 
     std::uint64_t l_;
-- 
cgit v1.1