From 4bf42322fdd5dd7e01a3f61272bccc4a66a5585f Mon Sep 17 00:00:00 2001
From: Boris Kolpackov <boris@codesynthesis.com>
Date: Sat, 2 Apr 2016 12:28:56 +0200
Subject: Add attribute syntax infrastructure

---
 build2/lexer      |  26 +++++----
 build2/lexer.cxx  |  31 ++++++----
 build2/parser     |  10 ++++
 build2/parser.cxx | 166 ++++++++++++++++++++++++++++++++++++++++++++++--------
 build2/token      |   6 +-
 build2/token.cxx  |   2 +
 6 files changed, 192 insertions(+), 49 deletions(-)

(limited to 'build2')
diff --git a/build2/lexer b/build2/lexer
index 001d52c..c856344 100644
--- a/build2/lexer
+++ b/build2/lexer
@@ -42,32 +42,31 @@ namespace build2
            void (*processor) (token&, const lexer&) = nullptr)
         : char_scanner (is), fail (name), processor_ (processor), sep_ (false)
     {
-      mode_.push (lexer_mode::normal);
+      mode (lexer_mode::normal);
     }
 
     const path&
     name () const {return fail.name_;}
 
-    // Note: sets mode for the next token. If mode is pairs, then
-    // the second argument specifies the separator character.
+    // Note: sets mode for the next token. If mode is pairs, then the second
+    // argument specifies the separator character.
     //
     void
-    mode (lexer_mode m, char pair_separator = '=')
+    mode (lexer_mode m, char pair_separator = '\0')
     {
-      mode_.push (m);
-      pair_separator_ = pair_separator;
+      state_.push (state{m, pair_separator});
     }
 
     // Expire the current mode early.
     //
     void
-    expire_mode () {mode_.pop ();}
+    expire_mode () {state_.pop ();}
 
     lexer_mode
-    mode () const {return mode_.top ();}
+    mode () const {return state_.top ().mode;}
 
     char
-    pair_separator () const {return pair_separator_;}
+    pair_separator () const {return state_.top ().pair_separator;}
 
     // Scanner. Note that it is ok to call next() again after getting eos.
     //
@@ -123,8 +122,13 @@ namespace build2
 
     void (*processor_) (token&, const lexer&);
 
-    std::stack<lexer_mode> mode_;
-    char pair_separator_;
+    struct state
+    {
+      lexer_mode mode;
+      char pair_separator;
+    };
+    std::stack<state> state_;
+
     bool sep_; // True if we skipped spaces in peek().
   };
 }
diff --git a/build2/lexer.cxx b/build2/lexer.cxx
index 795387a..ba43839 100644
--- a/build2/lexer.cxx
+++ b/build2/lexer.cxx
@@ -24,7 +24,7 @@ namespace build2
   {
     // In the quoted mode we don't skip spaces.
     //
-    sep_ = mode_.top () != lexer_mode::quoted && skip_spaces ();
+    sep_ = state_.top ().mode != lexer_mode::quoted && skip_spaces ();
     xchar c (peek ());
     return make_pair (eos (c) ? '\0' : char (c), sep_);
   }
@@ -32,7 +32,8 @@ namespace build2
   token lexer::
   next_impl ()
   {
-    lexer_mode m (mode_.top ());
+    lexer_mode m (state_.top ().mode);
+    char ps (state_.top ().pair_separator);
 
     // For some modes we have dedicated imlementations of next().
     //
@@ -61,12 +62,14 @@ namespace build2
         // Expire value/pairs mode at the end of the line.
         //
         if (m == lexer_mode::value || m == lexer_mode::pairs)
-          mode_.pop ();
+          state_.pop ();
 
         return token (type::newline, sep, ln, cn);
       }
     case '{': return token (type::lcbrace, sep, ln, cn);
     case '}': return token (type::rcbrace, sep, ln, cn);
+    case '[': return token (type::lsbrace, sep, ln, cn);
+    case ']': return token (type::rsbrace, sep, ln, cn);
     case '$': return token (type::dollar, sep, ln, cn);
     case '(': return token (type::lparen, sep, ln, cn);
     case ')': return token (type::rparen, sep, ln, cn);
@@ -74,7 +77,7 @@ namespace build2
 
     // Handle pair separator.
     //
-    if (m == lexer_mode::pairs && c == pair_separator_)
+    if (m == lexer_mode::pairs && c == ps)
       return token (type::pair_separator, sep, ln, cn);
 
     // The following characters are not treated as special in the
@@ -129,6 +132,9 @@ namespace build2
     // This mode is quite a bit like the value mode when it comes to special
     // characters, except that we have some of our own.
     //
+    // Note: we don't treat [ and ] as special here. Maybe can use them for
+    // something later.
+    //
     switch (c)
     {
       // NOTE: remember to update name() if adding new special characters.
@@ -140,7 +146,7 @@ namespace build2
     case '(': return token (type::lparen, sep, ln, cn);
     case ')':
       {
-        mode_.pop (); // Expire eval mode.
+        state_.pop (); // Expire eval mode.
         return token (type::rparen, sep, ln, cn);
       }
     case '=':
@@ -191,7 +197,8 @@ namespace build2
     uint64_t ln (c.line), cn (c.column);
     string lexeme;
 
-    lexer_mode m (mode_.top ());
+    lexer_mode m (state_.top ().mode);
+    char ps (state_.top ().pair_separator);
     bool quoted (m == lexer_mode::quoted);
 
     for (; !eos (c); c = peek ())
@@ -200,7 +207,7 @@ namespace build2
 
       // Handle pair separator.
       //
-      if (m == lexer_mode::pairs && c == pair_separator_)
+      if (m == lexer_mode::pairs && c == ps)
         break;
 
       // The following characters are only special in the normal and
@@ -297,6 +304,8 @@ namespace build2
         case '#':
         case '{':
         case '}':
+        case '[':
+        case ']':
         case ')':
           {
             done = true;
@@ -355,14 +364,14 @@ namespace build2
             get ();
 
             if (m == lexer_mode::quoted)
-              mode_.pop ();
+              state_.pop ();
             else
             {
-              mode_.push (lexer_mode::quoted);
+              mode (lexer_mode::quoted);
               quoted = true;
             }
 
-            m = mode_.top ();
+            m = state_.top ().mode;
             continue;
           }
         }
@@ -384,7 +393,7 @@ namespace build2
     // Expire variable mode at the end of the name.
     //
     if (m == lexer_mode::variable)
-      mode_.pop ();
+      state_.pop ();
 
     return token (lexeme, sep, quoted, ln, cn);
   }
diff --git a/build2/parser b/build2/parser
index 0e15e4c..a4b5dc5 100644
--- a/build2/parser
+++ b/build2/parser
@@ -96,6 +96,13 @@ namespace build2
     void
     eval_trailer (token&, token_type&, names_type&);
 
+    // If the next token is [, parse the attribute sequence until ] storing
+    // it in attrs_, get the next token, verify it is not a newline or eos,
+    // and return true. Otherwise return false.
+    //
+    bool
+    attributes (token&, token_type&);
+
     // If chunk is true, then parse the smallest but complete, name-wise,
     // chunk of input. Note that in this case you may still end up with
     // multiple names, for example, {foo bar}.
@@ -290,6 +297,9 @@ namespace build2
     target* target_; // Current target, if any.
     scope* scope_;   // Current base scope (out_base).
     scope* root_;    // Current root scope (out_root).
+
+    vector<pair<string, string>> attrs_; // Current attributes, if any.
+
     target* default_target_;
     names_type export_value_;
 
diff --git a/build2/parser.cxx b/build2/parser.cxx
index 4c5bd4e..663ca0c 100644
--- a/build2/parser.cxx
+++ b/build2/parser.cxx
@@ -95,6 +95,11 @@ namespace build2
     //
     while (tt != type::eos)
     {
+      // Extract attributes if any.
+      //
+      location al (get_location (t, &path_));
+      bool ha (attributes (t, tt));
+
       // We always start with one or more names.
       //
       if (tt != type::name    &&
@@ -109,51 +114,44 @@ namespace build2
       if (tt == type::name && keyword (t))
       {
         const string& n (t.value);
+        void (parser::*f) (token&, token_type&) = nullptr;
 
         if (n == "print")
         {
           // @@ Is this the only place where it is valid? Probably also
           // in var namespace.
           //
-          print (t, tt);
-          continue;
+          f = &parser::print;
         }
         else if (n == "source")
         {
-          source (t, tt);
-          continue;
+          f = &parser::source;
         }
         else if (n == "include")
         {
-          include (t, tt);
-          continue;
+          f = &parser::include;
         }
         else if (n == "import")
         {
-          import (t, tt);
-          continue;
+          f = &parser::import;
         }
         else if (n == "export")
         {
-          export_ (t, tt);
-          continue;
+          f = &parser::export_;
         }
         else if (n == "using" ||
                  n == "using?")
         {
-          using_ (t, tt);
-          continue;
+          f = &parser::using_;
         }
         else if (n == "define")
         {
-          define (t, tt);
-          continue;
+          f = &parser::define;
         }
         else if (n == "if" ||
                  n == "if!")
         {
-          if_else (t, tt);
-          continue;
+          f = &parser::if_else;
         }
         else if (n == "else" ||
                  n == "elif" ||
@@ -163,10 +161,21 @@ namespace build2
           //
           fail (t) << n << " without if";
         }
+
+        if (f != nullptr)
+        {
+          if (ha)
+            fail (al) << "attributes before " << n;
+
+          (this->*f) (t, tt);
+          continue;
+        }
       }
 
       // ': foo' is equvalent to '{}: foo' and to 'dir{}: foo'.
       //
+      // @@ I think we should make ': foo' invalid.
+      //
       const location nloc (get_location (t, &path_));
       names_type ns (tt != type::colon
                      ? names (t, tt)
@@ -232,8 +241,6 @@ namespace build2
             bool dir (false);
             for (const auto& n: ns)
             {
-              // A name represents directory as an empty value.
-              //
               if (n.directory ())
               {
                 if (ns.size () != 1)
@@ -251,7 +258,12 @@ namespace build2
 
             if (dir)
             {
-              // Directory scope. Can contain anything that a top level can.
+              // Directory scope.
+              //
+              if (ha)
+                fail (al) << "attributes before directory scope";
+
+              // Can contain anything that a top level can.
               //
               enter_scope (move (ns[0].dir)); // Steal.
               clause (t, tt);
@@ -259,6 +271,9 @@ namespace build2
             }
             else
             {
+              if (ha)
+                fail (al) << "attributes before target scope";
+
               // @@ TODO: target scope.
             }
 
@@ -276,13 +291,23 @@ namespace build2
           }
 
           // If this is not a scope, then it is a target without any
-          // prerequisites.
+          // prerequisites. Fall through.
           //
         }
 
         // Dependency declaration or scope/target-specific variable
         // assignment.
         //
+
+        // Will have to stash them if later support attributes on
+        // target/scope.
+        //
+        if (ha)
+          fail (al) << "attributes before target/scope";
+
+        al = get_location (t, &path_);
+        ha = attributes (t, tt);
+
         if (tt == type::name    ||
             tt == type::lcbrace ||
             tt == type::dollar  ||
@@ -334,6 +359,8 @@ namespace build2
               var_pool.find (
                 variable_name (move (pns), ploc)));
 
+            //@@ TODO: handle attrs.
+
             // If we have multiple targets/scopes, then we save the value
             // tokens when parsing the first one and then replay them for
             // the subsequent. We have to do it this way because the value
@@ -349,13 +376,15 @@ namespace build2
 
               if (n.directory ())
               {
+                // Scope variable.
+                //
                 enter_scope (move (n.dir));
                 variable (t, tt, var, att);
                 leave_scope ();
               }
               else
               {
-                // Figure out if this is a target or type/pattern specific
+                // Figure out if this is a target or type/pattern-specific
                 // variable.
                 //
                 size_t p (n.value.find ('*'));
@@ -418,6 +447,9 @@ namespace build2
           //
           else
           {
+            if (ha)
+              fail (al) << "attributes before prerequisites";
+
             // Prepare the prerequisite list.
             //
             target::prerequisites_type ps;
@@ -484,6 +516,8 @@ namespace build2
       //
       if (tt == type::assign || tt == type::prepend || tt == type::append)
       {
+        //@@ TODO handle attrs.
+
         variable (t, tt, var_pool.find (variable_name (move (ns), nloc)), tt);
 
         if (tt == type::newline)
@@ -498,6 +532,9 @@ namespace build2
       //
       if (tt == type::newline && ns.empty ())
       {
+        if (ha)
+          fail (al) << "standalone attributes";
+
         next (t, tt);
         continue;
       }
@@ -755,6 +792,12 @@ namespace build2
     mode (lexer_mode::pairs, '@');
     next (t, tt);
 
+    // Get attributes, if any (note that here we will go into a nested pairs
+    // mode).
+    //
+    location al (get_location (t, &path_));
+    bool ha (attributes (t, tt));
+
     if (tt == type::name)
     {
       // Split the token into the variable name and value at position (p) of
@@ -818,12 +861,18 @@ namespace build2
           split (p);    // Returned name should be empty.
         }
       }
+    }
 
-      if (var != nullptr)
-        val = at == type::assign
-          ? &scope_->assign (*var)
-          : &scope_->append (*var);
+    if (var != nullptr)
+    {
+      // @@ TODO handle attrs.
+
+      val = at == type::assign
+        ? &scope_->assign (*var)
+        : &scope_->append (*var);
     }
+    else if (ha)
+      fail (al) << "attributes without variable";
 
     // The rest should be a list of projects and/or targets. Parse
     // them as names to get variable expansion and directory prefixes.
@@ -1271,6 +1320,73 @@ namespace build2
     }
   }
 
+  bool parser::
+  attributes (token& t, token_type& tt)
+  {
+    attrs_.clear ();
+
+    if (tt != type::lsbrace)
+      return false;
+
+    // Using '@' for key-value pairs would be just too ugly. Seeing that we
+    // control what goes into keys/values, let's use a much nicer '='.
+    //
+    mode (lexer_mode::pairs, '=');
+    next (t, tt);
+
+    if (tt != type::rsbrace && tt != type::newline && tt != type::eos)
+    {
+      const location l (get_location (t, &path_));
+      names_type ns (names (t, tt));
+
+      text << '[' << ns << ']';
+
+      for (auto i (ns.begin ()); i != ns.end (); ++i)
+      {
+        string k, v;
+
+        try
+        {
+          k = convert<string> (move (*i));
+        }
+        catch (const invalid_argument&)
+        {
+          fail (l) << "invalid attribute key '" << *i << "'";
+        }
+
+        if (i->pair)
+        {
+          try
+          {
+            v = convert<string> (move (*++i));
+          }
+          catch (const invalid_argument&)
+          {
+            fail (l) << "invalid attribute value '" << *i << "'";
+          }
+        }
+
+        attrs_.emplace_back (move (k), move (v));
+      }
+    }
+
+    // Manually expire the pairs mode if we haven't reached newline/eos (where
+    // it expires automatically).
+    //
+    if (lexer_->mode () == lexer_mode::pairs)
+      lexer_->expire_mode ();
+
+    if (tt != type::rsbrace)
+      fail (t) << "expected ']' instead of " << t;
+
+    next (t, tt);
+
+    if (tt == type::newline || tt == type::eos)
+      fail (t) << "standalone attributes";
+
+    return true;
+  }
+
   // Parse names inside {} and handle the following "crosses" (i.e.,
   // {a b}{x y}) if any. Return the number of names added to the list.
   //
diff --git a/build2/token b/build2/token
index 6202f44..6695010 100644
--- a/build2/token
+++ b/build2/token
@@ -17,8 +17,10 @@ namespace build2
     newline,
     pair_separator,
     colon,
-    lcbrace,
-    rcbrace,
+    lcbrace,   // {
+    rcbrace,   // }
+    lsbrace,   // [
+    rsbrace,   // ]
     assign,    // =
     prepend,   // =+
     append,    // +=
diff --git a/build2/token.cxx b/build2/token.cxx
index 7a36a2d..90aeff5 100644
--- a/build2/token.cxx
+++ b/build2/token.cxx
@@ -19,6 +19,8 @@ namespace build2
     case token_type::colon:          os << ":"; break;
     case token_type::lcbrace:        os << "{"; break;
     case token_type::rcbrace:        os << "}"; break;
+    case token_type::lsbrace:        os << "["; break;
+    case token_type::rsbrace:        os << "]"; break;
     case token_type::assign:         os << "="; break;
     case token_type::prepend:        os << "=+"; break;
     case token_type::append:         os << "+="; break;
-- 
cgit v1.1