Tighten up attribute recognition during parsing

Now it should be possible to use `[]` for wildcard patterns, for example: foo = foo.[hit]xx Note that a leading bracket expression will still be recognized as attributes and escaping or quoting it will inhibit pattern matching. To resolve this case we need to specify an empty attribute list: foo = [] [abc]-foo.cxx
author: Boris Kolpackov <boris@codesynthesis.com> 2019-11-14 12:55:54 +0200
committer: Boris Kolpackov <boris@codesynthesis.com> 2019-11-14 13:20:08 +0200
commit: 5ec57d68a5205173a02c34a24d7129347d43196c (patch)
tree: 303de46753bcde9f9ccff094d6591b6bb7583931 /libbuild2
parent: 62a688e3fd7d1fdb8ce5590ebe9cb99e90cbe5d7 (diff)
9 files changed, 286 insertions, 190 deletions
diff --git a/libbuild2/lexer+eval.test.testscript b/libbuild2/lexer+eval.test.testscript
index 963f3d0..46452a7 100644
--- a/libbuild2/lexer+eval.test.testscript
+++ b/libbuild2/lexer+eval.test.testscript
@@ -6,7 +6,7 @@ test.arguments = eval
 
 : punctuation
 :
-$* <:'x:x{x}x[x]x$x?x,x(x)' >>EOO
+$* <:'x:x{x}x$x?x,x(x)' >>EOO
 'x'
 :
 'x'
@@ -14,10 +14,6 @@ $* <:'x:x{x}x[x]x$x?x,x(x)' >>EOO
 'x'
 }
 'x'
-[
-'x'
-]
-'x'
 $
 'x'
 ?
diff --git a/libbuild2/lexer.cxx b/libbuild2/lexer.cxx
index 61d7fbf..b405929 100644
--- a/libbuild2/lexer.cxx
+++ b/libbuild2/lexer.cxx
@@ -23,11 +23,15 @@ namespace build2
   void lexer::
   mode (lexer_mode m, char ps, optional<const char*> esc)
   {
+    bool a (false); // attributes
+
     const char* s1 (nullptr);
     const char* s2 (nullptr);
-    bool s (true);
-    bool n (true);
-    bool q (true);
+
+    bool s (true); // space
+    bool n (true); // newline
+    bool q (true); // quotes
+
 
     if (!esc)
     {
@@ -39,35 +43,39 @@ namespace build2
     {
     case lexer_mode::normal:
       {
-        s1 = ":<>=+ $(){}[]#\t\n";
-        s2 = "    =           ";
+        a  = true;
+        s1 = ":<>=+ $(){}#\t\n";
+        s2 = "    =         ";
         break;
       }
     case lexer_mode::value:
       {
-        s1 = " $(){}[]#\t\n";
-        s2 = "           ";
+        s1 = " $(){}#\t\n";
+        s2 = "         ";
         break;
       }
     case lexer_mode::values:
       {
-        s1 = " $(){}[],#\t\n";
-        s2 = "            ";
+        // a: beginning and after `,`?
+        s1 = " $(){},#\t\n";
+        s2 = "          ";
         break;
       }
     case lexer_mode::switch_expressions:
       {
-        s1 = " $(){}[],:#\t\n";
-        s2 = "             ";
+        // a: beginning and after `,`?
+        s1 = " $(){},:#\t\n";
+        s2 = "           ";
         break;
       }
     case lexer_mode::case_patterns:
       {
-        s1 = " $(){}[],|:#\t\n";
-        s2 = "              ";
+        // a: beginning and after `,` & `|`?
+        s1 = " $(){},|:#\t\n";
+        s2 = "            ";
         break;
       }
-    case lexer_mode::attribute:
+    case lexer_mode::attributes:
       {
         s1 = " $(]#\t\n";
         s2 = "       ";
@@ -75,8 +83,8 @@ namespace build2
       }
     case lexer_mode::eval:
       {
-        s1 = ":<>=!&|?, $(){}[]#\t\n";
-        s2 = "   = &|             ";
+        s1 = ":<>=!&|?, $(){}#\t\n";
+        s2 = "   = &|           ";
         break;
       }
     case lexer_mode::buildspec:
@@ -91,8 +99,10 @@ namespace build2
         //
         // 3. Treat newline as an ordinary space.
         //
-        s1 = " $(){}[],\t\n";
-        s2 = "           ";
+        // Also note that we don't have buildspec attributes.
+        //
+        s1 = " $(){},\t\n";
+        s2 = "         ";
         n = false;
         break;
       }
@@ -109,13 +119,13 @@ namespace build2
     default: assert (false); // Unhandled custom mode.
     }
 
-    state_.push (state {m, ps, s, n, q, *esc, s1, s2});
+    state_.push (state {m, a, ps, s, n, q, *esc, s1, s2});
   }
 
   token lexer::
   next ()
   {
-    const state& st (state_.top ());
+    state& st (state_.top ());
     lexer_mode m (st.mode);
 
     // For some modes we have dedicated imlementations of next().
@@ -127,7 +137,7 @@ namespace build2
     case lexer_mode::values:
     case lexer_mode::switch_expressions:
     case lexer_mode::case_patterns:
-    case lexer_mode::attribute:
+    case lexer_mode::attributes:
     case lexer_mode::variable:
     case lexer_mode::buildspec:     break;
     case lexer_mode::eval:          return next_eval ();
@@ -147,6 +157,17 @@ namespace build2
                     ln, cn, token_printer);
     };
 
+    // Handle attributes (do it first to make sure the flag is cleared
+    // regardless of what we return).
+    //
+    if (st.attributes)
+    {
+      st.attributes = false;
+
+      if (c == '[')
+        return make_token (type::lsbrace);
+    }
+
     if (eos (c))
       return make_token (type::eos);
 
@@ -155,11 +176,11 @@ namespace build2
     if (c == st.sep_pair)
       return make_token (type::pair_separator, string (1, c));
 
+    // NOTE: remember to update mode(), next_eval() if adding any new special
+    // characters.
+
     switch (c)
     {
-      // NOTE: remember to update mode(), next_eval() if adding new special
-      // characters.
-      //
     case '\n':
       {
         // Expire value/values modes at the end of the line.
@@ -170,20 +191,13 @@ namespace build2
             m == lexer_mode::case_patterns)
           state_.pop ();
 
-        sep = true; // Treat newline as always separated.
-        return make_token (type::newline);
-      }
-    case '{': return make_token (type::lcbrace);
-    case '}': return make_token (type::rcbrace);
-    case '[': return make_token (type::lsbrace);
-    case ']':
-      {
-        // Expire attribute mode after closing ']'.
+        // Re-enable attributes in the normal mode.
         //
-        if (m == lexer_mode::attribute)
-          state_.pop ();
+        if (state_.top ().mode == lexer_mode::normal)
+          state_.top ().attributes = true;
 
-        return make_token (type::rsbrace);
+        sep = true; // Treat newline as always separated.
+        return make_token (type::newline);
       }
     case '$': return make_token (type::dollar);
     case ')': return make_token (type::rparen);
@@ -198,6 +212,31 @@ namespace build2
       }
     }
 
+    // The following characters are special in all modes except attributes.
+    //
+    if (m != lexer_mode::attributes)
+    {
+      switch (c)
+      {
+      case '{': return make_token (type::lcbrace);
+      case '}': return make_token (type::rcbrace);
+      }
+    }
+
+    // The following characters are special in the attributes modes.
+    //
+    if (m == lexer_mode::attributes)
+    {
+      switch (c)
+      {
+      case ']':
+        {
+          state_.pop (); // Expire the attributes mode after closing `]`.
+          return make_token (type::rsbrace);
+        }
+      }
+    }
+
     // The following characters are special in the normal, variable, and
     // switch_expressions modes.
     //
@@ -208,9 +247,6 @@ namespace build2
     {
       switch (c)
       {
-        // NOTE: remember to update mode(), next_eval() if adding new special
-        // characters.
-        //
       case ':': return make_token (type::colon);
       }
     }
@@ -221,9 +257,6 @@ namespace build2
     {
       switch (c)
       {
-        // NOTE: remember to update mode(), next_eval() if adding new special
-        // characters.
-        //
       case '=':
         {
           if (peek () == '+')
@@ -249,8 +282,6 @@ namespace build2
     //
     if (m == lexer_mode::normal)
     {
-      // NOTE: remember to update mode() if adding new special characters.
-      //
       switch (c)
       {
       case '<': return make_token (type::labrace);
@@ -265,8 +296,6 @@ namespace build2
         m == lexer_mode::switch_expressions ||
         m == lexer_mode::case_patterns)
     {
-      // NOTE: remember to update mode() if adding new special characters.
-      //
       switch (c)
       {
       case ',': return make_token (type::comma);
@@ -277,8 +306,6 @@ namespace build2
     //
     if (m == lexer_mode::case_patterns)
     {
-      // NOTE: remember to update mode() if adding new special characters.
-      //
       switch (c)
       {
       case '|': return make_token (type::bit_or);
@@ -294,13 +321,16 @@ namespace build2
   token lexer::
   next_eval ()
   {
+    // This mode is quite a bit like the value mode when it comes to special
+    // characters, except that we have some of our own.
+
     bool sep (skip_spaces ());
     xchar c (get ());
 
     if (eos (c))
       fail (c) << "unterminated evaluation context";
 
-    const state& st (state_.top ());
+    state& st (state_.top ());
 
     uint64_t ln (c.line), cn (c.column);
 
@@ -311,28 +341,30 @@ namespace build2
                     ln, cn, token_printer);
     };
 
-    // This mode is quite a bit like the value mode when it comes to special
-    // characters, except that we have some of our own.
+    // Handle attributes (do it first to make sure the flag is cleared
+    // regardless of what we return).
     //
+    if (st.attributes)
+    {
+      st.attributes = false;
+
+      if (c == '[')
+        return make_token (type::lsbrace);
+    }
 
     // Handle pair separator.
     //
     if (c == st.sep_pair)
       return make_token (type::pair_separator, string (1, c));
 
-    // Note: we don't treat [ and ] as special here. Maybe can use them for
-    // something later.
-    //
+    // NOTE: remember to update mode() if adding any new special characters.
+
     switch (c)
     {
-      // NOTE: remember to update mode() if adding new special characters.
-      //
     case '\n': fail (c) << "newline in evaluation context" << endf;
     case ':': return make_token (type::colon);
     case '{': return make_token (type::lcbrace);
     case '}': return make_token (type::rcbrace);
-    case '[': return make_token (type::lsbrace);
-    case ']': return make_token (type::rsbrace);
     case '$': return make_token (type::dollar);
     case '?': return make_token (type::question);
     case ',': return make_token (type::comma);
diff --git a/libbuild2/lexer.hxx b/libbuild2/lexer.hxx
index 59debc4..715926c 100644
--- a/libbuild2/lexer.hxx
+++ b/libbuild2/lexer.hxx
@@ -21,28 +21,29 @@ namespace build2
 {
   // Context-dependent lexing mode. Quoted modes are internal and should not
   // be set explicitly. In the value mode we don't treat certain characters
-  // (e.g., '+', '=') as special so that we can use them in the variable
-  // values, e.g., 'foo = g++'. In contrast, in the variable mode, we restrict
-  // certain character (e.g., '/') from appearing in the name. The values mode
-  // is like value but recogizes ',' as special (used in contexts where we
-  // need to list multiple values). The attribute mode is also like value
-  // except it doesn't treat '{' and '}' as special (so we cannot have name
-  // groups in attributes). The eval mode is used in the evaluation context.
+  // (e.g., `+`, `=`) as special so that we can use them in the variable
+  // values, e.g., `foo = g++`. In contrast, in the variable mode, we restrict
+  // certain character (e.g., `/`) from appearing in the name. The values mode
+  // is like value but recogizes `,` as special (used in contexts where we
+  // need to list multiple values). The attributes mode is also like value
+  // except it doesn't treat `{` and `}` as special (so we cannot have name
+  // groups in attributes) and recognizes the closing `]`. The eval mode is
+  // used in the evaluation context.
   //
   // A number of modes are "derived" from the value/values mode by recognizing
   // a few extra characters:
   //
   //   switch_expressions  values plus `:`
-  //   case_patterns       values plus '|' and ':'
+  //   case_patterns       values plus `|` and `:`
   //
   // Note that the normal, value/values and derived, as well as eval modes
   // split words separated by the pair character (to disable pairs one can
-  // pass '\0' as a pair character).
+  // pass `\0` as a pair character).
   //
   // The alternative modes must be set manually. The value/values and derived
   // modes automatically expires after the end of the line. The attribute mode
-  // expires after the closing ']'. The variable mode expires after the word
-  // token. And the eval mode expires after the closing ')'.
+  // expires after the closing `]`. The variable mode expires after the word
+  // token. And the eval mode expires after the closing `)`.
   //
   // Note that normally it is only safe to switch mode when the current token
   // is not quoted (or, more generally, when you are not in the double-quoted
@@ -50,6 +51,15 @@ namespace build2
   // variable name mode). Failed that your mode (which now will be the top of
   // the mode stack) will prevent proper recognition of the closing quote.
   //
+  // Finally, attributes recognition (the `[` token) cuts across most of the
+  // modes and is handled with a flag. In the normal mode it is automatically
+  // set at the beginning and after each newline. In all other modes it must
+  // be explicitly set at points where attributes are recognized. In all the
+  // cases it is automatically reset after lexing the next token (whether `[`
+  // or not).
+  //
+  // @@ Maybe also enable at the beginning of value?
+  //
 
   // Extendable/inheritable enum-like class.
   //
@@ -65,7 +75,7 @@ namespace build2
       values,
       case_patterns,
       switch_expressions,
-      attribute,
+      attributes,
       eval,
       single_quoted,
       double_quoted,
@@ -97,15 +107,20 @@ namespace build2
     name () const {return name_;}
 
     // Note: sets mode for the next token. The second argument can be used to
-    // specifythe pair separator character (if the mode supports pairs). If
-    // escapes not specified, then inherit the current mode's (thought a mode
-    // can also override it).
+    // specify the pair separator character (if the mode supports pairs). If
+    // escapes is not specified, then inherit the current mode's (though a
+    // mode can also override it).
     //
     virtual void
     mode (lexer_mode,
           char pair_separator = '\0',
           optional<const char*> escapes = nullopt);
 
+    // Enable attributes recognition for the next token.
+    //
+    void
+    enable_attributes () {state_.top ().attributes = true;}
+
     // Expire the current mode early.
     //
     void
@@ -136,6 +151,7 @@ namespace build2
     struct state
     {
       lexer_mode mode;
+      bool       attributes;
 
       char sep_pair;
       bool sep_space;    // Are whitespaces separators (see skip_spaces())?
diff --git a/libbuild2/lexer.test.cxx b/libbuild2/lexer.test.cxx
index 32151db..eeed532 100644
--- a/libbuild2/lexer.test.cxx
+++ b/libbuild2/lexer.test.cxx
@@ -31,12 +31,12 @@ namespace build2
         quote = true;
       else
       {
-        if      (a == "normal")    m = lexer_mode::normal;
-        else if (a == "variable")  m = lexer_mode::variable;
-        else if (a == "value")     m = lexer_mode::value;
-        else if (a == "attribute") m = lexer_mode::attribute;
-        else if (a == "eval")      m = lexer_mode::eval;
-        else if (a == "buildspec") m = lexer_mode::buildspec;
+        if      (a == "normal")     m = lexer_mode::normal;
+        else if (a == "variable")   m = lexer_mode::variable;
+        else if (a == "value")      m = lexer_mode::value;
+        else if (a == "attributes") m = lexer_mode::attributes;
+        else if (a == "eval")       m = lexer_mode::eval;
+        else if (a == "buildspec")  m = lexer_mode::buildspec;
         else                       assert (false);
         break;
       }
diff --git a/libbuild2/parser.cxx b/libbuild2/parser.cxx
index 9e586e0..be1ba0b 100644
--- a/libbuild2/parser.cxx
+++ b/libbuild2/parser.cxx
@@ -287,11 +287,15 @@ namespace build2
   {
     tracer trace ("parser::parse_clause", &path_);
 
-    // parse_clause() should always stop at a token that is at the beginning
-    // of the line (except for eof). That is, if something is called to parse
-    // a line, it should parse it until newline (or fail). This is important
-    // for if-else blocks, directory scopes, etc., that assume the '}' token
-    // they see is on the new line.
+    // This function should be called in the normal lexing mode with the first
+    // token of a line or an alternative arrangements may have to be made to
+    // recognize the attributes.
+    //
+    // It should also always stop at a token that is at the beginning of the
+    // line (except for eof). That is, if something is called to parse a line,
+    // it should parse it until newline (or fail). This is important for
+    // if-else blocks, directory scopes, etc., that assume the '}' token they
+    // see is on the new line.
     //
     bool parsed (false);
 
@@ -302,8 +306,7 @@ namespace build2
       assert (attributes_.empty ());
       auto at (attributes_push (t, tt));
 
-      // We should always start with one or more names, potentially
-      // <>-grouped.
+      // We always start with one or more names, potentially <>-grouped.
       //
       if (!(start_names (tt) || tt == type::labrace))
       {
@@ -454,6 +457,13 @@ namespace build2
         {
           // Parse target names inside < >.
           //
+          // We "reserve" the right to have attributes inside <> though what
+          // exactly that would mean is unclear. One potentially useful
+          // semantics would be the ability to specify attributes for ad hoc
+          // members though the fact that the primary target is listed first
+          // would make it rather unintuitive.
+          //
+          enable_attributes ();
           next (t, tt);
 
           auto at (attributes_push (t, tt));
@@ -621,7 +631,10 @@ namespace build2
           }
         };
 
-        if (next (t, tt) == type::newline)
+        enable_attributes (); // Recognize attributes after `:`.
+        next (t, tt);
+
+        if (tt == type::newline)
         {
           // See if this is a target block.
           //
@@ -734,85 +747,70 @@ namespace build2
       //
       // This can take any of the following forms:
       //
-      //              x = y
-      // foo/         x = y   (ns will have two elements)
-      // foo/ [attrs] x = y   (tt will be '[')
+      //        x = y
+      //   foo/ x = y   (ns will have two elements)
+      //
+      // And in the future we may also want to support:
+      //
+      //   foo/ bar/ x = y
       //
-      // In the future we may also want to support:
+      // Note that we don't support this:
       //
-      // foo/ bar/ x = y
+      //   foo/ [attrs] x = y
       //
-      if (tt == type::assign || tt == type::prepend || tt == type::append ||
-          tt == type::lsbrace)
+      // Because the meaning of `[attrs]` would be ambiguous (it could also be
+      // a name). Note that the above semantics can be easily achieved with an
+      // explicit directory scope:
+      //
+      //   foo/
+      //   {
+      //     [attrs] x = y
+      //   }
+      //
+      if (tt == type::assign || tt == type::prepend || tt == type::append)
       {
         // Detect and handle the directory scope. If things look off, then we
         // let parse_variable_name() complain.
         //
         dir_path d;
-
-        if ((ns.size () == 2 && ns[0].directory ())                      ||
-            (ns.size () == 1 && ns[0].directory () && tt == type::lsbrace))
+        if (ns.size () == 2 && ns[0].directory ())
         {
           if (at.first)
             fail (at.second) << "attributes before scope directory";
 
-          if (tt == type::lsbrace)
-          {
-            attributes_pop ();
-            attributes_push (t, tt);
-
-            d = move (ns[0].dir);
-            nloc = get_location (t);
-            ns = parse_names (t, tt, pattern_mode::ignore);
+          d = move (ns[0].dir);
+          ns.erase (ns.begin ());
 
-            // It got to be a variable assignment.
-            //
-            if (tt != type::assign  &&
-                tt != type::prepend &&
-                tt != type::append)
-              fail (t) << "expected variable assignment instead of " << t;
-          }
-          else
-          {
-            d = move (ns[0].dir);
-            ns.erase (ns.begin ());
-          }
+          // Make sure it's not a pattern (see also the target case above and
+          // scope below).
+          //
+          if (path_pattern (d))
+            fail (nloc) << "pattern in directory " << d.representation ();
         }
 
-        // Make sure not a pattern (see also the target case above and scope
-        // below).
-        //
-        if (path_pattern (d))
-          fail (nloc) << "pattern in directory " << d.representation ();
+        const variable& var (parse_variable_name (move (ns), nloc));
+        apply_variable_attributes (var);
 
-        if (tt != type::lsbrace)
+        if (var.visibility >= variable_visibility::target)
         {
-          const variable& var (parse_variable_name (move (ns), nloc));
-          apply_variable_attributes (var);
+          diag_record dr (fail (nloc));
 
-          if (var.visibility >= variable_visibility::target)
-          {
-            diag_record dr (fail (nloc));
+          dr << "variable " << var << " has " << var.visibility
+             << " visibility but is assigned on a scope";
 
-            dr << "variable " << var << " has " << var.visibility
-               << " visibility but is assigned on a scope";
-
-            if (var.visibility == variable_visibility::target)
-              dr << info << "consider changing it to '*: " << var << "'";
-          }
-
-          {
-            enter_scope sg (d.empty ()
-                            ? enter_scope ()
-                            : enter_scope (*this, move (d)));
-            parse_variable (t, tt, var, tt);
-          }
+          if (var.visibility == variable_visibility::target)
+            dr << info << "consider changing it to '*: " << var << "'";
+        }
 
-          next_after_newline (t, tt);
-          continue;
+        {
+          enter_scope sg (d.empty ()
+                          ? enter_scope ()
+                          : enter_scope (*this, move (d)));
+          parse_variable (t, tt, var, tt);
         }
 
-        // Not "our" attribute, see if anyone else likes it.
+        next_after_newline (t, tt);
+        continue;
       }
 
       // See if this is a directory scope.
@@ -873,7 +871,7 @@ namespace build2
     // Parse a target or prerequisite-specific variable block. If type is not
     // NULL, then this is a target type/pattern-specific block.
     //
-    // enter: first token of first line in the block
+    // enter: first token of first line in the block (normal lexer mode)
     // leave: rcbrace
     //
     // This is a more restricted variant of parse_clause() that only allows
@@ -1216,7 +1214,9 @@ namespace build2
       fail (ploc) << "no prerequisites in dependency chain or prerequisite-"
                   << "specific variable assignment";
 
+    enable_attributes (); // Recognize attributes after `:`.
     next (t, tt);
+
     auto at (attributes_push (t, tt));
 
     // @@ PAT: currently we pattern-expand prerequisite-specific vars.
@@ -1670,6 +1670,7 @@ namespace build2
     // manually looking for =/=+/+=.
     //
     mode (lexer_mode::value, '@');
+    enable_attributes (); // @@ VAL.
     next (t, tt);
 
     // Get variable attributes, if any (note that here we will go into a
@@ -1820,6 +1821,7 @@ namespace build2
     // being able to type them or to return NULL.
     //
     mode (lexer_mode::value, '@');
+    enable_attributes (); // @@ VAL.
     next (t, tt);
 
     auto at (attributes_push (t, tt));
@@ -1971,6 +1973,8 @@ namespace build2
     for (;;)
     {
       string k (move (t.value));
+
+      enable_attributes (); // Recognize attributes before value.
       next (t, tt);
 
       bool take (false); // Take this branch?
@@ -2131,7 +2135,9 @@ namespace build2
 
     do
     {
+      enable_attributes (); // Recognize attributes before value.
       next (t, tt);
+
       if (tt == type::newline || tt == type::eos)
         fail (t) << "expected switch expression instead of " << t;
 
@@ -2237,7 +2243,7 @@ namespace build2
           //
           mode (lexer_mode::case_patterns); // Recognize `|` and `,`.
 
-          auto parse_pattern = [this] (token& t, type& tt)
+          auto parse_pattern_with_attributes = [this] (token& t, type& tt)
           {
             return parse_value_with_attributes (
               t, tt, pattern_mode::ignore, "pattern", nullptr);
@@ -2245,7 +2251,9 @@ namespace build2
 
           for (size_t i (0);; ++i)
           {
+            enable_attributes (); // Recognize attributes before pattern.
             next (t, tt);
+
             if (tt == type::newline || tt == type::eos)
               fail (t) << "expected case pattern instead of " << t;
 
@@ -2254,10 +2262,10 @@ namespace build2
 
             // Handle pattern alternatives (<pattern>|<pattern>).
             //
-            for (;; next (t, tt))
+            for (;;)
             {
               const location l (get_location (t));
-              value p (parse_pattern (t, tt));
+              value p (parse_pattern_with_attributes (t, tt));
               expr& e (exprs[i]); // Note: value might be modified (typified).
 
               if (e.func)
@@ -2300,14 +2308,18 @@ namespace build2
                 pre_parse_ = true;
                 do
                 {
+                  enable_attributes (); // Recognize attributes before pattern.
                   next (t, tt); // Skip `|`.
-                  parse_pattern (t, tt);
+                  parse_pattern_with_attributes (t, tt);
                 }
                 while (tt == type::bit_or);
                 pre_parse_ = false;
 
                 break;
               }
+
+              enable_attributes (); // Recognize attributes before pattern.
+              next (t, tt);
             }
 
             if (!take)
@@ -2421,6 +2433,7 @@ namespace build2
     // First take care of the variable name. There is no reason not to
     // support variable attributes.
     //
+    enable_attributes ();
     next (t, tt);
     attributes_push (t, tt);
 
@@ -2445,6 +2458,7 @@ namespace build2
     // value on the RHS of an assignment (expansion, attributes).
     //
     mode (lexer_mode::value, '@');
+    enable_attributes (); // @@ VAL
     next (t, tt);
 
     value val (parse_value_with_attributes (t, tt, pattern_mode::expand));
@@ -2573,6 +2587,7 @@ namespace build2
     // condition) for the same reason as in if-else (see parse_if_else()).
     //
     mode (lexer_mode::value);
+    enable_attributes (); // @@ VAL
     next (t, tt);
 
     const location el (get_location (t));
@@ -2627,6 +2642,7 @@ namespace build2
     // (expansion, attributes).
     //
     mode (lexer_mode::value, '@');
+    enable_attributes (); // @@ VAL
     next (t, tt);
 
     if (value v = parse_value_with_attributes (t, tt, pattern_mode::expand))
@@ -2660,6 +2676,7 @@ namespace build2
     // (expansion, attributes).
     //
     mode (lexer_mode::value, '@');
+    enable_attributes (); // @@ VAL
     next (t, tt);
 
     if (value v = parse_value_with_attributes (t, tt, pattern_mode::expand))
@@ -2862,6 +2879,7 @@ namespace build2
   parse_variable_value (token& t, type& tt)
   {
     mode (lexer_mode::value, '@');
+    enable_attributes (); // @@ VAL.
     next (t, tt);
 
     // Parse value attributes if any. Note that it's ok not to have anything
@@ -3121,6 +3139,7 @@ namespace build2
     // leave: rparen
 
     mode (lexer_mode::eval, '@'); // Auto-expires at rparen.
+    enable_attributes (); // @@ VAL (eval)
     next (t, tt);
 
     if (tt == type::rparen)
@@ -3137,7 +3156,7 @@ namespace build2
   values parser::
   parse_eval_comma (token& t, type& tt, pattern_mode pmode, bool first)
   {
-    // enter: first token of LHS
+    // enter: first token of LHS (lexed with enabled attributes)
     // leave: next token after last RHS
 
     // Left-associative: parse in a loop for as long as we can.
@@ -3150,7 +3169,9 @@ namespace build2
 
     while (tt == type::comma)
     {
+      enable_attributes (); // Recognize attributes before value.
       next (t, tt);
+
       value rhs (parse_eval_ternary (t, tt, pmode));
 
       if (!pre_parse_)
@@ -3163,7 +3184,7 @@ namespace build2
   value parser::
   parse_eval_ternary (token& t, type& tt, pattern_mode pmode, bool first)
   {
-    // enter: first token of LHS
+    // enter: first token of LHS (lexed with enabled attributes)
     // leave: next token after last RHS
 
     // Right-associative (kind of): we parse what's between ?: without
@@ -3196,7 +3217,9 @@ namespace build2
     if (!pp)
       pre_parse_ = !q; // Short-circuit middle?
 
+    enable_attributes (); // Recognize attributes before value.
     next (t, tt);
+
     value mhs (parse_eval_ternary (t, tt, pmode));
 
     if (tt != type::colon)
@@ -3205,7 +3228,9 @@ namespace build2
     if (!pp)
       pre_parse_ = q; // Short-circuit right?
 
+    enable_attributes (); // Recognize attributes before value.
     next (t, tt);
+
     value rhs (parse_eval_ternary (t, tt, pmode));
 
     pre_parse_ = pp;
@@ -3215,7 +3240,7 @@ namespace build2
   value parser::
   parse_eval_or (token& t, type& tt, pattern_mode pmode, bool first)
   {
-    // enter: first token of LHS
+    // enter: first token of LHS (lexed with enabled attributes)
     // leave: next token after last RHS
 
     // Left-associative: parse in a loop for as long as we can.
@@ -3234,7 +3259,9 @@ namespace build2
         if (!pre_parse_ && convert<bool> (move (lhs)))
           pre_parse_ = true;
 
+        enable_attributes (); // Recognize attributes before value.
         next (t, tt);
+
         l = get_location (t);
         value rhs (parse_eval_and (t, tt, pmode));
 
@@ -3255,7 +3282,7 @@ namespace build2
   value parser::
   parse_eval_and (token& t, type& tt, pattern_mode pmode, bool first)
   {
-    // enter: first token of LHS
+    // enter: first token of LHS (lexed with enabled attributes)
     // leave: next token after last RHS
 
     // Left-associative: parse in a loop for as long as we can.
@@ -3274,7 +3301,9 @@ namespace build2
         if (!pre_parse_ && !convert<bool> (move (lhs)))
           pre_parse_ = true;
 
+        enable_attributes (); // Recognize attributes before value.
         next (t, tt);
+
         l = get_location (t);
         value rhs (parse_eval_comp (t, tt, pmode));
 
@@ -3295,7 +3324,7 @@ namespace build2
   value parser::
   parse_eval_comp (token& t, type& tt, pattern_mode pmode, bool first)
   {
-    // enter: first token of LHS
+    // enter: first token of LHS (lexed with enabled attributes)
     // leave: next token after last RHS
 
     // Left-associative: parse in a loop for as long as we can.
@@ -3312,7 +3341,9 @@ namespace build2
       type op (tt);
       location l (get_location (t));
 
+      enable_attributes (); // Recognize attributes before value.
       next (t, tt);
+
       value rhs (parse_eval_value (t, tt, pmode));
 
       if (pre_parse_)
@@ -3329,7 +3360,7 @@ namespace build2
   value parser::
   parse_eval_value (token& t, type& tt, pattern_mode pmode, bool first)
   {
-    // enter: first token of value
+    // enter: first token of value (lexed with enabled attributes)
     // leave: next token after value
 
     // Parse value attributes if any. Note that it's ok not to have anything
@@ -3344,7 +3375,9 @@ namespace build2
     {
     case type::log_not:
       {
+        enable_attributes (); // Recognize attributes before value.
         next (t, tt);
+
         v = parse_eval_value (t, tt, pmode);
 
         if (pre_parse_)
@@ -3498,7 +3531,7 @@ namespace build2
     // Using '@' for attribute key-value pairs would be just too ugly. Seeing
     // that we control what goes into keys/values, let's use a much nicer '='.
     //
-    mode (lexer_mode::attribute, '=');
+    mode (lexer_mode::attributes, '=');
     next (t, tt);
 
     has = (tt != type::rsbrace);
@@ -5372,7 +5405,7 @@ namespace build2
   // In fact, because this is only done in the buildspec mode, we can still
   // use eval contexts provided that we quote them: '"cle(an)"'. Note that
   // function calls also need quoting (since a separated '(' is not treated as
-  // function call): '"$identity(update)"'.
+  // a function call): '"$identity(update)"'.
   //
   // This poses a problem, though: if it's quoted then it is a concatenated
   // expansion and therefore cannot contain multiple values, for example,
diff --git a/libbuild2/parser.hxx b/libbuild2/parser.hxx
index d82496d..b07936c 100644
--- a/libbuild2/parser.hxx
+++ b/libbuild2/parser.hxx
@@ -535,8 +535,8 @@ namespace build2
         lexer_->mode (m, ps);
       else
         // As a sanity check, make sure the mode matches the next token. Note
-        // that we don't check the pair separator since it can be overriden by
-        // the lexer's mode() implementation.
+        // that we don't check the attributes flags or the pair separator
+        // since they can be overridden by the lexer's mode() implementation.
         //
         assert (replay_i_ != replay_data_.size () &&
                 replay_data_[replay_i_].mode == m);
@@ -555,6 +555,13 @@ namespace build2
     }
 
     void
+    enable_attributes ()
+    {
+      if (replay_ != replay::play)
+        lexer_->enable_attributes ();
+    }
+
+    void
     expire_mode ()
     {
       if (replay_ != replay::play)
diff --git a/libbuild2/test/script/lexer.cxx b/libbuild2/test/script/lexer.cxx
index 75c04c8..a65eb25 100644
--- a/libbuild2/test/script/lexer.cxx
+++ b/libbuild2/test/script/lexer.cxx
@@ -19,11 +19,14 @@ namespace build2
       void lexer::
       mode (base_mode m, char ps, optional<const char*> esc)
       {
+        bool a (false); // attributes
+
         const char* s1 (nullptr);
         const char* s2 (nullptr);
-        bool s (true);
-        bool n (true);
-        bool q (true);
+
+        bool s (true); // space
+        bool n (true); // newline
+        bool q (true); // quotes
 
         if (!esc)
         {
@@ -71,8 +74,8 @@ namespace build2
             // Note that we don't recognize ':' since having a trailing
             // variable assignment is illegal.
             //
-            s1 = "; $([]#\t\n";
-            s2 = "         ";
+            s1 = "; $(#\t\n";
+            s2 = "       ";
             break;
           }
 
@@ -128,7 +131,7 @@ namespace build2
             //
             assert (ps == '\0' ||
                     m == lexer_mode::eval ||
-                    m == lexer_mode::attribute);
+                    m == lexer_mode::attributes);
 
             base_lexer::mode (m, ps, esc);
             return;
@@ -136,7 +139,7 @@ namespace build2
         }
 
         assert (ps == '\0');
-        state_.push (state {m, ps, s, n, q, *esc, s1, s2});
+        state_.push (state {m, a, ps, s, n, q, *esc, s1, s2});
       }
 
       token lexer::
@@ -177,9 +180,6 @@ namespace build2
         xchar c (get ());
         uint64_t ln (c.line), cn (c.column);
 
-        if (eos (c))
-          return token (type::eos, sep, ln, cn, token_printer);
-
         state st (state_.top ()); // Make copy (see first/second_token).
         lexer_mode m (st.mode);
 
@@ -217,6 +217,22 @@ namespace build2
           return make_token (t, move (v));
         };
 
+        // Handle attributes (do it first to make sure the flag is cleared
+        // regardless of what we return).
+        //
+        if (st.attributes)
+        {
+          assert (m == lexer_mode::variable_line);
+
+          state_.top ().attributes = false;
+
+          if (c == '[')
+            return make_token (type::lsbrace);
+        }
+
+        if (eos (c))
+          return make_token (type::eos);
+
         // Expire certain modes at the end of the token. Do it early in case
         // we push any new mode (e.g., double quote).
         //
@@ -253,18 +269,6 @@ namespace build2
           }
         }
 
-
-        if (m == lexer_mode::variable_line)
-        {
-          switch (c)
-          {
-            // Attributes.
-            //
-          case '[': return make_token (type::lsbrace);
-          case ']': return make_token (type::rsbrace);
-          }
-        }
-
         // Line separators.
         //
         if (m == lexer_mode::command_line ||
diff --git a/libbuild2/test/script/parser.cxx b/libbuild2/test/script/parser.cxx
index f3f6ffa..aa78437 100644
--- a/libbuild2/test/script/parser.cxx
+++ b/libbuild2/test/script/parser.cxx
@@ -1288,10 +1288,11 @@ namespace build2
         // enter: assignment
         // leave: newline or semi
 
-        // We cannot reuse the value mode since it will recognize { which we
+        // We cannot reuse the value mode since it will recognize `{` which we
         // want to treat as a literal.
         //
         mode (lexer_mode::variable_line);
+        enable_attributes (); // @@ VAL
         next (t, tt);
 
         // Parse value attributes if any. Note that it's ok not to have
@@ -3446,11 +3447,13 @@ namespace build2
         path_ = &name;
 
         istringstream is (attributes);
-        lexer l (is, name, lexer_mode::attribute);
+        lexer l (is, name, lexer_mode::attributes);
         set_lexer (&l);
 
         token t;
         type tt;
+
+        enable_attributes (); // Enable `[` recognition.
         next (t, tt);
 
         if (tt != type::lsbrace && tt != type::eos)
diff --git a/libbuild2/token.hxx b/libbuild2/token.hxx
index 2370f8d..e420aa8 100644
--- a/libbuild2/token.hxx
+++ b/libbuild2/token.hxx
@@ -14,7 +14,8 @@
 
 namespace build2
 {
-  // Extendable/inheritable enum-like class.
+
+  // Token type.
   //
   // A line consists of a sequence of words separated by separators and
   // terminated with the newline. If whitespace is a separator, then it is
@@ -22,6 +23,8 @@ namespace build2
   //
   struct token_type
   {
+    // Extendable/inheritable enum-like class.
+    //
     enum
     {
       // NOTE: remember to update token_printer()!
@@ -145,10 +148,12 @@ namespace build2
   inline ostream&
   operator<< (ostream& o, const token& t) {t.printer (o, t, true); return o;}
 
-  // Extendable/inheritable enum-like class.
+  // Context-dependent lexing (see lexer_mode for details).
   //
   struct lexer_mode_base
   {
+    // Extendable/inheritable enum-like class.
+    //
     enum { value_next };
 
     using value_type = uint16_t;
author	Boris Kolpackov <boris@codesynthesis.com>	2019-11-14 12:55:54 +0200
committer	Boris Kolpackov <boris@codesynthesis.com>	2019-11-14 13:20:08 +0200
commit	5ec57d68a5205173a02c34a24d7129347d43196c (patch)
tree	303de46753bcde9f9ccff094d6591b6bb7583931 /libbuild2
parent	62a688e3fd7d1fdb8ce5590ebe9cb99e90cbe5d7 (diff)