Add support for lexing and parsing name pairs

We will need it for the buildspec and also if/when we support map variable types.
author: Boris Kolpackov <boris@codesynthesis.com> 2015-03-06 09:15:40 +0200
committer: Boris Kolpackov <boris@codesynthesis.com> 2015-03-06 09:15:40 +0200
commit: 897a0e4fdf9ca90ee8d236a38e138a8ae6bc3627 (patch)
tree: d4c00de32d028823906d342fcd984faee8d977ff
parent: 9ef25ab2f9da89ab48ecce3fe1b8cbb0bc5f1e09 (diff)
6 files changed, 218 insertions, 46 deletions
diff --git a/build/lexer b/build/lexer
index d6817f2..787ba72 100644
--- a/build/lexer
+++ b/build/lexer
@@ -15,11 +15,29 @@
 
 namespace build
 {
+  // Context-dependent lexing mode. In the value mode we don't treat
+  // certain characters (e.g., +, =) as special so that we can use
+  // them in the variable values, e.g., 'foo = g++'. In contrast,
+  // in the variable mode, we restrict certain character (e.g., /)
+  // from appearing in the name. The pairs mode is just like value
+  // except that we split names separated by '='. The pairs mode must
+  // be set manually.
+  //
+  enum class lexer_mode {normal, value, variable, pairs};
+
   class lexer
   {
   public:
     lexer (std::istream& is, const std::string& name): is_ (is), fail (name) {}
 
+    // Note: sets mode for the next token.
+    //
+    void
+    mode (lexer_mode m) {next_mode_ = m;}
+
+    lexer_mode
+    mode () const {return mode_;}
+
     // Scanner.
     //
     token
@@ -108,15 +126,9 @@ namespace build
     xchar buf_ {0, 0, 0};
 
     bool eos_ {false};
-
-    // Context-dependent lexing mode. In the value mode we don't treat
-    // certain characters (e.g., +, =) as special so that we can use
-    // them in the variable values, e.g., 'foo = g++'. In contrast,
-    // in the variable mode, we restrict certain character (e.g., /)
-    // from appearing in the name.
-    //
-    enum class mode {normal, value, variable};
-    mode mode_ {mode::normal};
+    lexer_mode mode_ {lexer_mode::normal};
+    lexer_mode next_mode_; // Mode to switch to for the next token.
+    lexer_mode prev_mode_; // Mode to return to after this mode expires.
   };
 }
 
diff --git a/build/lexer.cxx b/build/lexer.cxx
index 5f394fc..9683567 100644
--- a/build/lexer.cxx
+++ b/build/lexer.cxx
@@ -11,6 +11,12 @@ namespace build
   token lexer::
   next ()
   {
+    if (mode_ != next_mode_)
+    {
+      prev_mode_ = mode_;
+      mode_ = next_mode_;
+    }
+
     bool sep (skip_spaces ());
 
     xchar c (get ());
@@ -27,8 +33,8 @@ namespace build
       {
         // Restore the normal mode at the end of the line.
         //
-        if (mode_ == mode::value)
-          mode_ = mode::normal;
+        if (mode_ == lexer_mode::value || mode_ == lexer_mode::pairs)
+          mode_ = next_mode_ = lexer_mode::normal;
 
         return token (token_type::newline, sep, ln, cn);
       }
@@ -42,7 +48,10 @@ namespace build
       }
     case '$':
       {
-        mode_ = mode::variable; // The next name is lexed in the var mode.
+        // The following name is lexed in the variable mode.
+        //
+        next_mode_ = lexer_mode::variable;
+
         return token (token_type::dollar, sep, ln, cn);
       }
     case '(':
@@ -56,9 +65,9 @@ namespace build
     }
 
     // The following characters are not treated as special in the
-    // value mode.
+    // value or pairs mode.
     //
-    if (mode_ != mode::value)
+    if (mode_ != lexer_mode::value && mode_ != lexer_mode::pairs)
     {
       // NOTE: remember to update name() if adding new punctuations.
       //
@@ -68,22 +77,39 @@ namespace build
         {
           return token (token_type::colon, sep, ln, cn);
         }
-      case '=':
-        {
-          mode_ = mode::value;
-          return token (token_type::equal, sep, ln, cn);
-        }
       case '+':
         {
           if (get () != '=')
             fail (c) << "expected = after +";
 
-          mode_ = mode::value;
+          next_mode_ = lexer_mode::value;
           return token (token_type::plus_equal, sep, ln, cn);
         }
       }
     }
 
+    // The following characters are not treated as special in the
+    // value mode.
+    //
+    if (mode_ != lexer_mode::value)
+    {
+      // NOTE: remember to update name() if adding new punctuations.
+      //
+      switch (c)
+      {
+      case '=':
+        {
+          // Unless we are already in the pairs mode, switch to the
+          // value mode.
+          //
+          if (next_mode_ != lexer_mode::pairs)
+            next_mode_ = lexer_mode::value;
+
+          return token (token_type::equal, sep, ln, cn);
+        }
+      }
+    }
+
     // Otherwise it is a name.
     //
     return name (c, sep);
@@ -101,14 +127,13 @@ namespace build
       bool done (false);
 
       // The following characters are not treated as special in the
-      // value mode.
+      // value or pairs mode.
       //
-      if (mode_ != mode::value)
+      if (mode_ != lexer_mode::value && mode_ != lexer_mode::pairs)
       {
         switch (c)
         {
         case ':':
-        case '=':
         case '+':
           {
             done = true;
@@ -120,10 +145,28 @@ namespace build
           break;
       }
 
+      // The following characters are not treated as special in the
+      // value mode.
+      //
+      if (mode_ != lexer_mode::value)
+      {
+        switch (c)
+        {
+        case '=':
+          {
+            done = true;
+            break;
+          }
+        }
+
+        if (done)
+          break;
+      }
+
       // While these extra characters are treated as the name end in
       // the variable mode.
       //
-      if (mode_ == mode::variable)
+      if (mode_ == lexer_mode::variable)
       {
         switch (c)
         {
@@ -171,8 +214,8 @@ namespace build
         break;
     }
 
-    if (mode_ == mode::variable)
-      mode_ = mode::normal;
+    if (mode_ == lexer_mode::variable)
+      next_mode_ = prev_mode_;
 
     return token (lexeme, sep, ln, cn);
   }
diff --git a/build/name b/build/name
index 00f0c00..fc89f44 100644
--- a/build/name
+++ b/build/name
@@ -19,6 +19,9 @@ namespace build
   // without a type and directory can be used to represent any text.
   // A name with directory and empty value represents a directory.
   //
+  // If pair is true, then this name and the next in the list form
+  // a pair.
+  //
   struct name
   {
     explicit
@@ -33,6 +36,7 @@ namespace build
     std::string type;
     path dir;
     std::string value;
+    bool pair {false};
   };
 
   typedef std::vector<name> names;
diff --git a/build/name.cxx b/build/name.cxx
index 6280676..46e2440 100644
--- a/build/name.cxx
+++ b/build/name.cxx
@@ -17,6 +17,7 @@ namespace build
   {
     bool ht (!n.type.empty ());
     bool hv (!n.value.empty ());
+    bool hd (false);
 
     if (ht)
       os << n.type << '{';
@@ -37,6 +38,8 @@ namespace build
         //
         if (s.back () != path::traits::directory_separator && (hv || !ht))
           os << path::traits::directory_separator;
+
+        hd = true;
       }
     }
 
@@ -45,14 +48,21 @@ namespace build
     if (ht)
       os << '}';
 
+    if (!ht && !hv && !hd)
+      os << "{}"; // Nothing got printed.
+
     return os;
   }
 
   ostream&
   operator<< (ostream& os, const names& ns)
   {
-    for (auto b (ns.begin ()), i (b), e (ns.end ()); i != e; ++i)
-      os << (i != b ? " " : "") << *i;
+    for (auto i (ns.begin ()), e (ns.end ()); i != e; )
+    {
+      const name& n (*i);
+      ++i;
+      os << n << (n.pair ? "=" : (i != e ? " " : ""));
+    }
 
     return os;
   }
diff --git a/build/parser b/build/parser
index 6e0be34..4375b27 100644
--- a/build/parser
+++ b/build/parser
@@ -51,12 +51,13 @@ namespace build
     names (token& t, token_type& tt)
     {
       names_type ns;
-      names (t, tt, ns, nullptr, nullptr);
+      names (t, tt, ns, 0, nullptr, nullptr);
       return ns;
     }
 
     void
-    names (token&, token_type&, names_type&,
+    names (token&, token_type&,
+           names_type&, std::size_t pair,
            const path* dir, const std::string* type);
 
     // Utilities.
diff --git a/build/parser.cxx b/build/parser.cxx
index 8f4f81d..56a61c5 100644
--- a/build/parser.cxx
+++ b/build/parser.cxx
@@ -191,6 +191,12 @@ namespace build
 
       if (tt == type::colon)
       {
+        // While '{}:' means empty name, '{$x}:' where x is empty list
+        // means empty list.
+        //
+        if (ns.empty ())
+          fail (t) << "target expected before :";
+
         next (t, tt);
 
         if (tt == type::newline)
@@ -625,8 +631,18 @@ namespace build
   }
 
   void parser::
-  names (token& t, type& tt, names_type& ns, const path* dp, const string* tp)
+  names (token& t,
+         type& tt,
+         names_type& ns,
+         size_t pair,
+         const path* dp,
+         const string* tp)
   {
+    // If pair is not 0, then it is an index + 1 of the first half of
+    // the pair for which we are parsing the second halves, e.g.,
+    // a={b c d{e f} {}}.
+    //
+
     // Buffer that is used to collect the complete name in case of an
     // unseparated variable expansion, e.g., 'foo$bar$(baz)fox'. The
     // idea is to concatenate all the individual parts in this buffer
@@ -634,6 +650,12 @@ namespace build
     //
     string concat;
 
+    // Number of names in the last group. This is used to detect when
+    // we need to add an empty first pair element (e.g., {=y}) or when
+    // we have a for now unsupported multi-name LHS (e.g., {x y}=z).
+    //
+    size_t count (0);
+
     for (bool first (true);; first = false)
     {
       // If the accumulating buffer is not empty, then we have two options:
@@ -714,7 +736,14 @@ namespace build
           }
 
           next (t, tt);
-          names (t, tt, ns, dp1, tp1);
+          count = ns.size ();
+          names (t, tt,
+                 ns,
+                 (pair != 0
+                  ? pair
+                  : (ns.empty () || !ns.back ().pair ? 0 : ns.size ())),
+                 dp1, tp1);
+          count = ns.size () - count;
 
           if (tt != type::rcbrace)
             fail (t) << "expected } instead of " << t;
@@ -723,6 +752,12 @@ namespace build
           continue;
         }
 
+        // If we are a second half of a pair, add another first half
+        // unless this is the first instance.
+        //
+        if (pair != 0 && pair != ns.size ())
+          ns.push_back (ns[pair - 1]);
+
         // If it ends with a directory separator, then it is a directory.
         // Note that at this stage we don't treat '.' and '..' as special
         // (unless they are specified with a directory separator) because
@@ -753,20 +788,7 @@ namespace build
                            (dp != nullptr ? *dp : path ()),
                            move (name));
 
-        continue;
-      }
-
-      // Untyped name group without a directory prefix, e.g., '{foo bar}'.
-      //
-      if (tt == type::lcbrace)
-      {
-        next (t, tt);
-        names (t, tt, ns, dp, tp);
-
-        if (tt != type::rcbrace)
-          fail (t) << "expected } instead of " << t;
-
-        tt = peek ();
+        count = 1;
         continue;
       }
 
@@ -887,20 +909,91 @@ namespace build
                          << "expansion";
             }
 
+            // If we are a second half of a pair.
+            //
+            if (pair != 0)
+            {
+              // Check that there are no nested pairs.
+              //
+              if (n.pair)
+                fail (t) << "nested pair in variable expansion";
+
+              // And add another first half unless this is the first instance.
+              //
+              if (pair != ns.size ())
+                ns.push_back (ns[pair - 1]);
+            }
+
             ns.emplace_back ((tp1 != nullptr ? *tp1 : string ()),
                              (dp1 != nullptr ? *dp1 : path ()),
                              n.value);
           }
+
+          count = lv.data.size ();
         }
 
         continue;
       }
 
+      // Untyped name group without a directory prefix, e.g., '{foo bar}'.
+      //
+      if (tt == type::lcbrace)
+      {
+        next (t, tt);
+        count = ns.size ();
+        names (t, tt,
+               ns,
+               (pair != 0
+                ? pair
+                : (ns.empty () || !ns.back ().pair ? 0 : ns.size ())),
+               dp, tp);
+        count = ns.size () - count;
+
+        if (tt != type::rcbrace)
+          fail (t) << "expected } instead of " << t;
+
+        tt = peek ();
+        continue;
+      }
+
+      // A pair separator (only in the pair mode).
+      //
+      if (tt == type::equal && lexer_->mode () == lexer_mode::pairs)
+      {
+        if (pair != 0)
+          fail (t) << "nested pair on the right hand side of a pair";
+
+        if (count > 1)
+          fail (t) << "multiple names on the left hand side of a pair";
+
+        if (count == 0)
+        {
+          // Empty LHS, (e.g., {=y}), create an empty name.
+          //
+          ns.emplace_back ((tp != nullptr ? *tp : string ()),
+                           (dp != nullptr ? *dp : path ()),
+                           "");
+          count = 1;
+        }
+
+        ns.back ().pair = true;
+        tt = peek ();
+        continue;
+      }
+
       if (!first)
         break;
 
+      // Our caller expected this to be a name.
+      //
       if (tt == type::rcbrace) // Empty name, e.g., dir{}.
       {
+        // If we are a second half of a pair, add another first half
+        // unless this is the first instance.
+        //
+        if (pair != 0 && pair != ns.size ())
+          ns.push_back (ns[pair - 1]);
+
         ns.emplace_back ((tp != nullptr ? *tp : string ()),
                          (dp != nullptr ? *dp : path ()),
                          "");
@@ -909,6 +1002,15 @@ namespace build
       else
         fail (t) << "expected name instead of " << t;
     }
+
+    // Handle the empty RHS in a pair, (e.g., {y=}).
+    //
+    if (!ns.empty () && ns.back ().pair)
+    {
+      ns.emplace_back ((tp != nullptr ? *tp : string ()),
+                       (dp != nullptr ? *dp : path ()),
+                       "");
+    }
   }
 
   void parser::
author	Boris Kolpackov <boris@codesynthesis.com>	2015-03-06 09:15:40 +0200
committer	Boris Kolpackov <boris@codesynthesis.com>	2015-03-06 09:15:40 +0200
commit	897a0e4fdf9ca90ee8d236a38e138a8ae6bc3627 (patch)
tree	d4c00de32d028823906d342fcd984faee8d977ff
parent	9ef25ab2f9da89ab48ecce3fe1b8cbb0bc5f1e09 (diff)