From 5ec57d68a5205173a02c34a24d7129347d43196c Mon Sep 17 00:00:00 2001 From: Boris Kolpackov Date: Thu, 14 Nov 2019 12:55:54 +0200 Subject: Tighten up attribute recognition during parsing Now it should be possible to use `[]` for wildcard patterns, for example: foo = foo.[hit]xx Note that a leading bracket expression will still be recognized as attributes and escaping or quoting it will inhibit pattern matching. To resolve this case we need to specify an empty attribute list: foo = [] [abc]-foo.cxx --- libbuild2/lexer+eval.test.testscript | 6 +- libbuild2/lexer.cxx | 148 ++++++++++++++---------- libbuild2/lexer.hxx | 46 +++++--- libbuild2/lexer.test.cxx | 12 +- libbuild2/parser.cxx | 189 ++++++++++++++++++------------- libbuild2/parser.hxx | 11 +- libbuild2/test/script/lexer.cxx | 48 ++++---- libbuild2/test/script/parser.cxx | 7 +- libbuild2/token.hxx | 9 +- old-tests/attribute/buildfile | 4 +- old-tests/variable/override/test.sh | 3 +- tests/variable/scope-specific/testscript | 17 ++- 12 files changed, 300 insertions(+), 200 deletions(-) diff --git a/libbuild2/lexer+eval.test.testscript b/libbuild2/lexer+eval.test.testscript index 963f3d0..46452a7 100644 --- a/libbuild2/lexer+eval.test.testscript +++ b/libbuild2/lexer+eval.test.testscript @@ -6,7 +6,7 @@ test.arguments = eval : punctuation : -$* <:'x:x{x}x[x]x$x?x,x(x)' >>EOO +$* <:'x:x{x}x$x?x,x(x)' >>EOO 'x' : 'x' @@ -14,10 +14,6 @@ $* <:'x:x{x}x[x]x$x?x,x(x)' >>EOO 'x' } 'x' -[ -'x' -] -'x' $ 'x' ? diff --git a/libbuild2/lexer.cxx b/libbuild2/lexer.cxx index 61d7fbf..b405929 100644 --- a/libbuild2/lexer.cxx +++ b/libbuild2/lexer.cxx @@ -23,11 +23,15 @@ namespace build2 void lexer:: mode (lexer_mode m, char ps, optional esc) { + bool a (false); // attributes + const char* s1 (nullptr); const char* s2 (nullptr); - bool s (true); - bool n (true); - bool q (true); + + bool s (true); // space + bool n (true); // newline + bool q (true); // quotes + if (!esc) { @@ -39,35 +43,39 @@ namespace build2 { case lexer_mode::normal: { - s1 = ":<>=+ $(){}[]#\t\n"; - s2 = " = "; + a = true; + s1 = ":<>=+ $(){}#\t\n"; + s2 = " = "; break; } case lexer_mode::value: { - s1 = " $(){}[]#\t\n"; - s2 = " "; + s1 = " $(){}#\t\n"; + s2 = " "; break; } case lexer_mode::values: { - s1 = " $(){}[],#\t\n"; - s2 = " "; + // a: beginning and after `,`? + s1 = " $(){},#\t\n"; + s2 = " "; break; } case lexer_mode::switch_expressions: { - s1 = " $(){}[],:#\t\n"; - s2 = " "; + // a: beginning and after `,`? + s1 = " $(){},:#\t\n"; + s2 = " "; break; } case lexer_mode::case_patterns: { - s1 = " $(){}[],|:#\t\n"; - s2 = " "; + // a: beginning and after `,` & `|`? + s1 = " $(){},|:#\t\n"; + s2 = " "; break; } - case lexer_mode::attribute: + case lexer_mode::attributes: { s1 = " $(]#\t\n"; s2 = " "; @@ -75,8 +83,8 @@ namespace build2 } case lexer_mode::eval: { - s1 = ":<>=!&|?, $(){}[]#\t\n"; - s2 = " = &| "; + s1 = ":<>=!&|?, $(){}#\t\n"; + s2 = " = &| "; break; } case lexer_mode::buildspec: @@ -91,8 +99,10 @@ namespace build2 // // 3. Treat newline as an ordinary space. // - s1 = " $(){}[],\t\n"; - s2 = " "; + // Also note that we don't have buildspec attributes. + // + s1 = " $(){},\t\n"; + s2 = " "; n = false; break; } @@ -109,13 +119,13 @@ namespace build2 default: assert (false); // Unhandled custom mode. } - state_.push (state {m, ps, s, n, q, *esc, s1, s2}); + state_.push (state {m, a, ps, s, n, q, *esc, s1, s2}); } token lexer:: next () { - const state& st (state_.top ()); + state& st (state_.top ()); lexer_mode m (st.mode); // For some modes we have dedicated imlementations of next(). @@ -127,7 +137,7 @@ namespace build2 case lexer_mode::values: case lexer_mode::switch_expressions: case lexer_mode::case_patterns: - case lexer_mode::attribute: + case lexer_mode::attributes: case lexer_mode::variable: case lexer_mode::buildspec: break; case lexer_mode::eval: return next_eval (); @@ -147,6 +157,17 @@ namespace build2 ln, cn, token_printer); }; + // Handle attributes (do it first to make sure the flag is cleared + // regardless of what we return). + // + if (st.attributes) + { + st.attributes = false; + + if (c == '[') + return make_token (type::lsbrace); + } + if (eos (c)) return make_token (type::eos); @@ -155,11 +176,11 @@ namespace build2 if (c == st.sep_pair) return make_token (type::pair_separator, string (1, c)); + // NOTE: remember to update mode(), next_eval() if adding any new special + // characters. + switch (c) { - // NOTE: remember to update mode(), next_eval() if adding new special - // characters. - // case '\n': { // Expire value/values modes at the end of the line. @@ -170,20 +191,13 @@ namespace build2 m == lexer_mode::case_patterns) state_.pop (); - sep = true; // Treat newline as always separated. - return make_token (type::newline); - } - case '{': return make_token (type::lcbrace); - case '}': return make_token (type::rcbrace); - case '[': return make_token (type::lsbrace); - case ']': - { - // Expire attribute mode after closing ']'. + // Re-enable attributes in the normal mode. // - if (m == lexer_mode::attribute) - state_.pop (); + if (state_.top ().mode == lexer_mode::normal) + state_.top ().attributes = true; - return make_token (type::rsbrace); + sep = true; // Treat newline as always separated. + return make_token (type::newline); } case '$': return make_token (type::dollar); case ')': return make_token (type::rparen); @@ -198,6 +212,31 @@ namespace build2 } } + // The following characters are special in all modes except attributes. + // + if (m != lexer_mode::attributes) + { + switch (c) + { + case '{': return make_token (type::lcbrace); + case '}': return make_token (type::rcbrace); + } + } + + // The following characters are special in the attributes modes. + // + if (m == lexer_mode::attributes) + { + switch (c) + { + case ']': + { + state_.pop (); // Expire the attributes mode after closing `]`. + return make_token (type::rsbrace); + } + } + } + // The following characters are special in the normal, variable, and // switch_expressions modes. // @@ -208,9 +247,6 @@ namespace build2 { switch (c) { - // NOTE: remember to update mode(), next_eval() if adding new special - // characters. - // case ':': return make_token (type::colon); } } @@ -221,9 +257,6 @@ namespace build2 { switch (c) { - // NOTE: remember to update mode(), next_eval() if adding new special - // characters. - // case '=': { if (peek () == '+') @@ -249,8 +282,6 @@ namespace build2 // if (m == lexer_mode::normal) { - // NOTE: remember to update mode() if adding new special characters. - // switch (c) { case '<': return make_token (type::labrace); @@ -265,8 +296,6 @@ namespace build2 m == lexer_mode::switch_expressions || m == lexer_mode::case_patterns) { - // NOTE: remember to update mode() if adding new special characters. - // switch (c) { case ',': return make_token (type::comma); @@ -277,8 +306,6 @@ namespace build2 // if (m == lexer_mode::case_patterns) { - // NOTE: remember to update mode() if adding new special characters. - // switch (c) { case '|': return make_token (type::bit_or); @@ -294,13 +321,16 @@ namespace build2 token lexer:: next_eval () { + // This mode is quite a bit like the value mode when it comes to special + // characters, except that we have some of our own. + bool sep (skip_spaces ()); xchar c (get ()); if (eos (c)) fail (c) << "unterminated evaluation context"; - const state& st (state_.top ()); + state& st (state_.top ()); uint64_t ln (c.line), cn (c.column); @@ -311,28 +341,30 @@ namespace build2 ln, cn, token_printer); }; - // This mode is quite a bit like the value mode when it comes to special - // characters, except that we have some of our own. + // Handle attributes (do it first to make sure the flag is cleared + // regardless of what we return). // + if (st.attributes) + { + st.attributes = false; + + if (c == '[') + return make_token (type::lsbrace); + } // Handle pair separator. // if (c == st.sep_pair) return make_token (type::pair_separator, string (1, c)); - // Note: we don't treat [ and ] as special here. Maybe can use them for - // something later. - // + // NOTE: remember to update mode() if adding any new special characters. + switch (c) { - // NOTE: remember to update mode() if adding new special characters. - // case '\n': fail (c) << "newline in evaluation context" << endf; case ':': return make_token (type::colon); case '{': return make_token (type::lcbrace); case '}': return make_token (type::rcbrace); - case '[': return make_token (type::lsbrace); - case ']': return make_token (type::rsbrace); case '$': return make_token (type::dollar); case '?': return make_token (type::question); case ',': return make_token (type::comma); diff --git a/libbuild2/lexer.hxx b/libbuild2/lexer.hxx index 59debc4..715926c 100644 --- a/libbuild2/lexer.hxx +++ b/libbuild2/lexer.hxx @@ -21,28 +21,29 @@ namespace build2 { // Context-dependent lexing mode. Quoted modes are internal and should not // be set explicitly. In the value mode we don't treat certain characters - // (e.g., '+', '=') as special so that we can use them in the variable - // values, e.g., 'foo = g++'. In contrast, in the variable mode, we restrict - // certain character (e.g., '/') from appearing in the name. The values mode - // is like value but recogizes ',' as special (used in contexts where we - // need to list multiple values). The attribute mode is also like value - // except it doesn't treat '{' and '}' as special (so we cannot have name - // groups in attributes). The eval mode is used in the evaluation context. + // (e.g., `+`, `=`) as special so that we can use them in the variable + // values, e.g., `foo = g++`. In contrast, in the variable mode, we restrict + // certain character (e.g., `/`) from appearing in the name. The values mode + // is like value but recogizes `,` as special (used in contexts where we + // need to list multiple values). The attributes mode is also like value + // except it doesn't treat `{` and `}` as special (so we cannot have name + // groups in attributes) and recognizes the closing `]`. The eval mode is + // used in the evaluation context. // // A number of modes are "derived" from the value/values mode by recognizing // a few extra characters: // // switch_expressions values plus `:` - // case_patterns values plus '|' and ':' + // case_patterns values plus `|` and `:` // // Note that the normal, value/values and derived, as well as eval modes // split words separated by the pair character (to disable pairs one can - // pass '\0' as a pair character). + // pass `\0` as a pair character). // // The alternative modes must be set manually. The value/values and derived // modes automatically expires after the end of the line. The attribute mode - // expires after the closing ']'. The variable mode expires after the word - // token. And the eval mode expires after the closing ')'. + // expires after the closing `]`. The variable mode expires after the word + // token. And the eval mode expires after the closing `)`. // // Note that normally it is only safe to switch mode when the current token // is not quoted (or, more generally, when you are not in the double-quoted @@ -50,6 +51,15 @@ namespace build2 // variable name mode). Failed that your mode (which now will be the top of // the mode stack) will prevent proper recognition of the closing quote. // + // Finally, attributes recognition (the `[` token) cuts across most of the + // modes and is handled with a flag. In the normal mode it is automatically + // set at the beginning and after each newline. In all other modes it must + // be explicitly set at points where attributes are recognized. In all the + // cases it is automatically reset after lexing the next token (whether `[` + // or not). + // + // @@ Maybe also enable at the beginning of value? + // // Extendable/inheritable enum-like class. // @@ -65,7 +75,7 @@ namespace build2 values, case_patterns, switch_expressions, - attribute, + attributes, eval, single_quoted, double_quoted, @@ -97,15 +107,20 @@ namespace build2 name () const {return name_;} // Note: sets mode for the next token. The second argument can be used to - // specifythe pair separator character (if the mode supports pairs). If - // escapes not specified, then inherit the current mode's (thought a mode - // can also override it). + // specify the pair separator character (if the mode supports pairs). If + // escapes is not specified, then inherit the current mode's (though a + // mode can also override it). // virtual void mode (lexer_mode, char pair_separator = '\0', optional escapes = nullopt); + // Enable attributes recognition for the next token. + // + void + enable_attributes () {state_.top ().attributes = true;} + // Expire the current mode early. // void @@ -136,6 +151,7 @@ namespace build2 struct state { lexer_mode mode; + bool attributes; char sep_pair; bool sep_space; // Are whitespaces separators (see skip_spaces())? diff --git a/libbuild2/lexer.test.cxx b/libbuild2/lexer.test.cxx index 32151db..eeed532 100644 --- a/libbuild2/lexer.test.cxx +++ b/libbuild2/lexer.test.cxx @@ -31,12 +31,12 @@ namespace build2 quote = true; else { - if (a == "normal") m = lexer_mode::normal; - else if (a == "variable") m = lexer_mode::variable; - else if (a == "value") m = lexer_mode::value; - else if (a == "attribute") m = lexer_mode::attribute; - else if (a == "eval") m = lexer_mode::eval; - else if (a == "buildspec") m = lexer_mode::buildspec; + if (a == "normal") m = lexer_mode::normal; + else if (a == "variable") m = lexer_mode::variable; + else if (a == "value") m = lexer_mode::value; + else if (a == "attributes") m = lexer_mode::attributes; + else if (a == "eval") m = lexer_mode::eval; + else if (a == "buildspec") m = lexer_mode::buildspec; else assert (false); break; } diff --git a/libbuild2/parser.cxx b/libbuild2/parser.cxx index 9e586e0..be1ba0b 100644 --- a/libbuild2/parser.cxx +++ b/libbuild2/parser.cxx @@ -287,11 +287,15 @@ namespace build2 { tracer trace ("parser::parse_clause", &path_); - // parse_clause() should always stop at a token that is at the beginning - // of the line (except for eof). That is, if something is called to parse - // a line, it should parse it until newline (or fail). This is important - // for if-else blocks, directory scopes, etc., that assume the '}' token - // they see is on the new line. + // This function should be called in the normal lexing mode with the first + // token of a line or an alternative arrangements may have to be made to + // recognize the attributes. + // + // It should also always stop at a token that is at the beginning of the + // line (except for eof). That is, if something is called to parse a line, + // it should parse it until newline (or fail). This is important for + // if-else blocks, directory scopes, etc., that assume the '}' token they + // see is on the new line. // bool parsed (false); @@ -302,8 +306,7 @@ namespace build2 assert (attributes_.empty ()); auto at (attributes_push (t, tt)); - // We should always start with one or more names, potentially - // <>-grouped. + // We always start with one or more names, potentially <>-grouped. // if (!(start_names (tt) || tt == type::labrace)) { @@ -454,6 +457,13 @@ namespace build2 { // Parse target names inside < >. // + // We "reserve" the right to have attributes inside <> though what + // exactly that would mean is unclear. One potentially useful + // semantics would be the ability to specify attributes for ad hoc + // members though the fact that the primary target is listed first + // would make it rather unintuitive. + // + enable_attributes (); next (t, tt); auto at (attributes_push (t, tt)); @@ -621,7 +631,10 @@ namespace build2 } }; - if (next (t, tt) == type::newline) + enable_attributes (); // Recognize attributes after `:`. + next (t, tt); + + if (tt == type::newline) { // See if this is a target block. // @@ -734,85 +747,70 @@ namespace build2 // // This can take any of the following forms: // - // x = y - // foo/ x = y (ns will have two elements) - // foo/ [attrs] x = y (tt will be '[') + // x = y + // foo/ x = y (ns will have two elements) + // + // And in the future we may also want to support: + // + // foo/ bar/ x = y // - // In the future we may also want to support: + // Note that we don't support this: // - // foo/ bar/ x = y + // foo/ [attrs] x = y // - if (tt == type::assign || tt == type::prepend || tt == type::append || - tt == type::lsbrace) + // Because the meaning of `[attrs]` would be ambiguous (it could also be + // a name). Note that the above semantics can be easily achieved with an + // explicit directory scope: + // + // foo/ + // { + // [attrs] x = y + // } + // + if (tt == type::assign || tt == type::prepend || tt == type::append) { // Detect and handle the directory scope. If things look off, then we // let parse_variable_name() complain. // dir_path d; - - if ((ns.size () == 2 && ns[0].directory ()) || - (ns.size () == 1 && ns[0].directory () && tt == type::lsbrace)) + if (ns.size () == 2 && ns[0].directory ()) { if (at.first) fail (at.second) << "attributes before scope directory"; - if (tt == type::lsbrace) - { - attributes_pop (); - attributes_push (t, tt); - - d = move (ns[0].dir); - nloc = get_location (t); - ns = parse_names (t, tt, pattern_mode::ignore); + d = move (ns[0].dir); + ns.erase (ns.begin ()); - // It got to be a variable assignment. - // - if (tt != type::assign && - tt != type::prepend && - tt != type::append) - fail (t) << "expected variable assignment instead of " << t; - } - else - { - d = move (ns[0].dir); - ns.erase (ns.begin ()); - } + // Make sure it's not a pattern (see also the target case above and + // scope below). + // + if (path_pattern (d)) + fail (nloc) << "pattern in directory " << d.representation (); } - // Make sure not a pattern (see also the target case above and scope - // below). - // - if (path_pattern (d)) - fail (nloc) << "pattern in directory " << d.representation (); + const variable& var (parse_variable_name (move (ns), nloc)); + apply_variable_attributes (var); - if (tt != type::lsbrace) + if (var.visibility >= variable_visibility::target) { - const variable& var (parse_variable_name (move (ns), nloc)); - apply_variable_attributes (var); + diag_record dr (fail (nloc)); - if (var.visibility >= variable_visibility::target) - { - diag_record dr (fail (nloc)); + dr << "variable " << var << " has " << var.visibility + << " visibility but is assigned on a scope"; - dr << "variable " << var << " has " << var.visibility - << " visibility but is assigned on a scope"; - - if (var.visibility == variable_visibility::target) - dr << info << "consider changing it to '*: " << var << "'"; - } - - { - enter_scope sg (d.empty () - ? enter_scope () - : enter_scope (*this, move (d))); - parse_variable (t, tt, var, tt); - } + if (var.visibility == variable_visibility::target) + dr << info << "consider changing it to '*: " << var << "'"; + } - next_after_newline (t, tt); - continue; + { + enter_scope sg (d.empty () + ? enter_scope () + : enter_scope (*this, move (d))); + parse_variable (t, tt, var, tt); } - // Not "our" attribute, see if anyone else likes it. + next_after_newline (t, tt); + continue; } // See if this is a directory scope. @@ -873,7 +871,7 @@ namespace build2 // Parse a target or prerequisite-specific variable block. If type is not // NULL, then this is a target type/pattern-specific block. // - // enter: first token of first line in the block + // enter: first token of first line in the block (normal lexer mode) // leave: rcbrace // // This is a more restricted variant of parse_clause() that only allows @@ -1216,7 +1214,9 @@ namespace build2 fail (ploc) << "no prerequisites in dependency chain or prerequisite-" << "specific variable assignment"; + enable_attributes (); // Recognize attributes after `:`. next (t, tt); + auto at (attributes_push (t, tt)); // @@ PAT: currently we pattern-expand prerequisite-specific vars. @@ -1670,6 +1670,7 @@ namespace build2 // manually looking for =/=+/+=. // mode (lexer_mode::value, '@'); + enable_attributes (); // @@ VAL. next (t, tt); // Get variable attributes, if any (note that here we will go into a @@ -1820,6 +1821,7 @@ namespace build2 // being able to type them or to return NULL. // mode (lexer_mode::value, '@'); + enable_attributes (); // @@ VAL. next (t, tt); auto at (attributes_push (t, tt)); @@ -1971,6 +1973,8 @@ namespace build2 for (;;) { string k (move (t.value)); + + enable_attributes (); // Recognize attributes before value. next (t, tt); bool take (false); // Take this branch? @@ -2131,7 +2135,9 @@ namespace build2 do { + enable_attributes (); // Recognize attributes before value. next (t, tt); + if (tt == type::newline || tt == type::eos) fail (t) << "expected switch expression instead of " << t; @@ -2237,7 +2243,7 @@ namespace build2 // mode (lexer_mode::case_patterns); // Recognize `|` and `,`. - auto parse_pattern = [this] (token& t, type& tt) + auto parse_pattern_with_attributes = [this] (token& t, type& tt) { return parse_value_with_attributes ( t, tt, pattern_mode::ignore, "pattern", nullptr); @@ -2245,7 +2251,9 @@ namespace build2 for (size_t i (0);; ++i) { + enable_attributes (); // Recognize attributes before pattern. next (t, tt); + if (tt == type::newline || tt == type::eos) fail (t) << "expected case pattern instead of " << t; @@ -2254,10 +2262,10 @@ namespace build2 // Handle pattern alternatives (|). // - for (;; next (t, tt)) + for (;;) { const location l (get_location (t)); - value p (parse_pattern (t, tt)); + value p (parse_pattern_with_attributes (t, tt)); expr& e (exprs[i]); // Note: value might be modified (typified). if (e.func) @@ -2300,14 +2308,18 @@ namespace build2 pre_parse_ = true; do { + enable_attributes (); // Recognize attributes before pattern. next (t, tt); // Skip `|`. - parse_pattern (t, tt); + parse_pattern_with_attributes (t, tt); } while (tt == type::bit_or); pre_parse_ = false; break; } + + enable_attributes (); // Recognize attributes before pattern. + next (t, tt); } if (!take) @@ -2421,6 +2433,7 @@ namespace build2 // First take care of the variable name. There is no reason not to // support variable attributes. // + enable_attributes (); next (t, tt); attributes_push (t, tt); @@ -2445,6 +2458,7 @@ namespace build2 // value on the RHS of an assignment (expansion, attributes). // mode (lexer_mode::value, '@'); + enable_attributes (); // @@ VAL next (t, tt); value val (parse_value_with_attributes (t, tt, pattern_mode::expand)); @@ -2573,6 +2587,7 @@ namespace build2 // condition) for the same reason as in if-else (see parse_if_else()). // mode (lexer_mode::value); + enable_attributes (); // @@ VAL next (t, tt); const location el (get_location (t)); @@ -2627,6 +2642,7 @@ namespace build2 // (expansion, attributes). // mode (lexer_mode::value, '@'); + enable_attributes (); // @@ VAL next (t, tt); if (value v = parse_value_with_attributes (t, tt, pattern_mode::expand)) @@ -2660,6 +2676,7 @@ namespace build2 // (expansion, attributes). // mode (lexer_mode::value, '@'); + enable_attributes (); // @@ VAL next (t, tt); if (value v = parse_value_with_attributes (t, tt, pattern_mode::expand)) @@ -2862,6 +2879,7 @@ namespace build2 parse_variable_value (token& t, type& tt) { mode (lexer_mode::value, '@'); + enable_attributes (); // @@ VAL. next (t, tt); // Parse value attributes if any. Note that it's ok not to have anything @@ -3121,6 +3139,7 @@ namespace build2 // leave: rparen mode (lexer_mode::eval, '@'); // Auto-expires at rparen. + enable_attributes (); // @@ VAL (eval) next (t, tt); if (tt == type::rparen) @@ -3137,7 +3156,7 @@ namespace build2 values parser:: parse_eval_comma (token& t, type& tt, pattern_mode pmode, bool first) { - // enter: first token of LHS + // enter: first token of LHS (lexed with enabled attributes) // leave: next token after last RHS // Left-associative: parse in a loop for as long as we can. @@ -3150,7 +3169,9 @@ namespace build2 while (tt == type::comma) { + enable_attributes (); // Recognize attributes before value. next (t, tt); + value rhs (parse_eval_ternary (t, tt, pmode)); if (!pre_parse_) @@ -3163,7 +3184,7 @@ namespace build2 value parser:: parse_eval_ternary (token& t, type& tt, pattern_mode pmode, bool first) { - // enter: first token of LHS + // enter: first token of LHS (lexed with enabled attributes) // leave: next token after last RHS // Right-associative (kind of): we parse what's between ?: without @@ -3196,7 +3217,9 @@ namespace build2 if (!pp) pre_parse_ = !q; // Short-circuit middle? + enable_attributes (); // Recognize attributes before value. next (t, tt); + value mhs (parse_eval_ternary (t, tt, pmode)); if (tt != type::colon) @@ -3205,7 +3228,9 @@ namespace build2 if (!pp) pre_parse_ = q; // Short-circuit right? + enable_attributes (); // Recognize attributes before value. next (t, tt); + value rhs (parse_eval_ternary (t, tt, pmode)); pre_parse_ = pp; @@ -3215,7 +3240,7 @@ namespace build2 value parser:: parse_eval_or (token& t, type& tt, pattern_mode pmode, bool first) { - // enter: first token of LHS + // enter: first token of LHS (lexed with enabled attributes) // leave: next token after last RHS // Left-associative: parse in a loop for as long as we can. @@ -3234,7 +3259,9 @@ namespace build2 if (!pre_parse_ && convert (move (lhs))) pre_parse_ = true; + enable_attributes (); // Recognize attributes before value. next (t, tt); + l = get_location (t); value rhs (parse_eval_and (t, tt, pmode)); @@ -3255,7 +3282,7 @@ namespace build2 value parser:: parse_eval_and (token& t, type& tt, pattern_mode pmode, bool first) { - // enter: first token of LHS + // enter: first token of LHS (lexed with enabled attributes) // leave: next token after last RHS // Left-associative: parse in a loop for as long as we can. @@ -3274,7 +3301,9 @@ namespace build2 if (!pre_parse_ && !convert (move (lhs))) pre_parse_ = true; + enable_attributes (); // Recognize attributes before value. next (t, tt); + l = get_location (t); value rhs (parse_eval_comp (t, tt, pmode)); @@ -3295,7 +3324,7 @@ namespace build2 value parser:: parse_eval_comp (token& t, type& tt, pattern_mode pmode, bool first) { - // enter: first token of LHS + // enter: first token of LHS (lexed with enabled attributes) // leave: next token after last RHS // Left-associative: parse in a loop for as long as we can. @@ -3312,7 +3341,9 @@ namespace build2 type op (tt); location l (get_location (t)); + enable_attributes (); // Recognize attributes before value. next (t, tt); + value rhs (parse_eval_value (t, tt, pmode)); if (pre_parse_) @@ -3329,7 +3360,7 @@ namespace build2 value parser:: parse_eval_value (token& t, type& tt, pattern_mode pmode, bool first) { - // enter: first token of value + // enter: first token of value (lexed with enabled attributes) // leave: next token after value // Parse value attributes if any. Note that it's ok not to have anything @@ -3344,7 +3375,9 @@ namespace build2 { case type::log_not: { + enable_attributes (); // Recognize attributes before value. next (t, tt); + v = parse_eval_value (t, tt, pmode); if (pre_parse_) @@ -3498,7 +3531,7 @@ namespace build2 // Using '@' for attribute key-value pairs would be just too ugly. Seeing // that we control what goes into keys/values, let's use a much nicer '='. // - mode (lexer_mode::attribute, '='); + mode (lexer_mode::attributes, '='); next (t, tt); has = (tt != type::rsbrace); @@ -5372,7 +5405,7 @@ namespace build2 // In fact, because this is only done in the buildspec mode, we can still // use eval contexts provided that we quote them: '"cle(an)"'. Note that // function calls also need quoting (since a separated '(' is not treated as - // function call): '"$identity(update)"'. + // a function call): '"$identity(update)"'. // // This poses a problem, though: if it's quoted then it is a concatenated // expansion and therefore cannot contain multiple values, for example, diff --git a/libbuild2/parser.hxx b/libbuild2/parser.hxx index d82496d..b07936c 100644 --- a/libbuild2/parser.hxx +++ b/libbuild2/parser.hxx @@ -535,8 +535,8 @@ namespace build2 lexer_->mode (m, ps); else // As a sanity check, make sure the mode matches the next token. Note - // that we don't check the pair separator since it can be overriden by - // the lexer's mode() implementation. + // that we don't check the attributes flags or the pair separator + // since they can be overridden by the lexer's mode() implementation. // assert (replay_i_ != replay_data_.size () && replay_data_[replay_i_].mode == m); @@ -555,6 +555,13 @@ namespace build2 } void + enable_attributes () + { + if (replay_ != replay::play) + lexer_->enable_attributes (); + } + + void expire_mode () { if (replay_ != replay::play) diff --git a/libbuild2/test/script/lexer.cxx b/libbuild2/test/script/lexer.cxx index 75c04c8..a65eb25 100644 --- a/libbuild2/test/script/lexer.cxx +++ b/libbuild2/test/script/lexer.cxx @@ -19,11 +19,14 @@ namespace build2 void lexer:: mode (base_mode m, char ps, optional esc) { + bool a (false); // attributes + const char* s1 (nullptr); const char* s2 (nullptr); - bool s (true); - bool n (true); - bool q (true); + + bool s (true); // space + bool n (true); // newline + bool q (true); // quotes if (!esc) { @@ -71,8 +74,8 @@ namespace build2 // Note that we don't recognize ':' since having a trailing // variable assignment is illegal. // - s1 = "; $([]#\t\n"; - s2 = " "; + s1 = "; $(#\t\n"; + s2 = " "; break; } @@ -128,7 +131,7 @@ namespace build2 // assert (ps == '\0' || m == lexer_mode::eval || - m == lexer_mode::attribute); + m == lexer_mode::attributes); base_lexer::mode (m, ps, esc); return; @@ -136,7 +139,7 @@ namespace build2 } assert (ps == '\0'); - state_.push (state {m, ps, s, n, q, *esc, s1, s2}); + state_.push (state {m, a, ps, s, n, q, *esc, s1, s2}); } token lexer:: @@ -177,9 +180,6 @@ namespace build2 xchar c (get ()); uint64_t ln (c.line), cn (c.column); - if (eos (c)) - return token (type::eos, sep, ln, cn, token_printer); - state st (state_.top ()); // Make copy (see first/second_token). lexer_mode m (st.mode); @@ -217,6 +217,22 @@ namespace build2 return make_token (t, move (v)); }; + // Handle attributes (do it first to make sure the flag is cleared + // regardless of what we return). + // + if (st.attributes) + { + assert (m == lexer_mode::variable_line); + + state_.top ().attributes = false; + + if (c == '[') + return make_token (type::lsbrace); + } + + if (eos (c)) + return make_token (type::eos); + // Expire certain modes at the end of the token. Do it early in case // we push any new mode (e.g., double quote). // @@ -253,18 +269,6 @@ namespace build2 } } - - if (m == lexer_mode::variable_line) - { - switch (c) - { - // Attributes. - // - case '[': return make_token (type::lsbrace); - case ']': return make_token (type::rsbrace); - } - } - // Line separators. // if (m == lexer_mode::command_line || diff --git a/libbuild2/test/script/parser.cxx b/libbuild2/test/script/parser.cxx index f3f6ffa..aa78437 100644 --- a/libbuild2/test/script/parser.cxx +++ b/libbuild2/test/script/parser.cxx @@ -1288,10 +1288,11 @@ namespace build2 // enter: assignment // leave: newline or semi - // We cannot reuse the value mode since it will recognize { which we + // We cannot reuse the value mode since it will recognize `{` which we // want to treat as a literal. // mode (lexer_mode::variable_line); + enable_attributes (); // @@ VAL next (t, tt); // Parse value attributes if any. Note that it's ok not to have @@ -3446,11 +3447,13 @@ namespace build2 path_ = &name; istringstream is (attributes); - lexer l (is, name, lexer_mode::attribute); + lexer l (is, name, lexer_mode::attributes); set_lexer (&l); token t; type tt; + + enable_attributes (); // Enable `[` recognition. next (t, tt); if (tt != type::lsbrace && tt != type::eos) diff --git a/libbuild2/token.hxx b/libbuild2/token.hxx index 2370f8d..e420aa8 100644 --- a/libbuild2/token.hxx +++ b/libbuild2/token.hxx @@ -14,7 +14,8 @@ namespace build2 { - // Extendable/inheritable enum-like class. + + // Token type. // // A line consists of a sequence of words separated by separators and // terminated with the newline. If whitespace is a separator, then it is @@ -22,6 +23,8 @@ namespace build2 // struct token_type { + // Extendable/inheritable enum-like class. + // enum { // NOTE: remember to update token_printer()! @@ -145,10 +148,12 @@ namespace build2 inline ostream& operator<< (ostream& o, const token& t) {t.printer (o, t, true); return o;} - // Extendable/inheritable enum-like class. + // Context-dependent lexing (see lexer_mode for details). // struct lexer_mode_base { + // Extendable/inheritable enum-like class. + // enum { value_next }; using value_type = uint16_t; diff --git a/old-tests/attribute/buildfile b/old-tests/attribute/buildfile index 7338641..a9abf24 100644 --- a/old-tests/attribute/buildfile +++ b/old-tests/attribute/buildfile @@ -6,10 +6,10 @@ #[foo=dir/file{bar}] # error: invalid attribute key #[foo] print hello # error: attributes before print -#[foo]./ # error: attributes before directory scope +#[foo] ./ # error: attributes before directory scope #{ #} -#[foo]./: # error: attributes before target scope +#[foo] ./: # error: attributes before target scope #./: [foo] buildfile # error: attributes before prerequisites #import [foo] libz # error: attributes without variable diff --git a/old-tests/variable/override/test.sh b/old-tests/variable/override/test.sh index 94ed61f..4675b7e 100755 --- a/old-tests/variable/override/test.sh +++ b/old-tests/variable/override/test.sh @@ -53,8 +53,7 @@ function test () fi } -fail foo=bar[] # error: unexpected [ in variable assignment 'foo=bar[]' -fail foo=[string]bar # error: typed override of variable foo +fail "foo= [string] bar" # error: typed override of variable foo #fail "!foo=bar" "!foo=BAR" # error: multiple global overrides of variable foo #fail "foo=bar" "foo=BAR" # error: multiple project overrides of variable foo #fail "%foo=bar" "%foo=BAR" # error: multiple project overrides of variable foo diff --git a/tests/variable/scope-specific/testscript b/tests/variable/scope-specific/testscript index 27210f9..111bbd9 100644 --- a/tests/variable/scope-specific/testscript +++ b/tests/variable/scope-specific/testscript @@ -9,14 +9,11 @@ $* <>EOO x = x foo/ x = X -foo/ [uint64] y=00 print $x print $(foo/ x) -print $(foo/ y) EOI x X -0 EOO : basic-block @@ -37,12 +34,20 @@ x 0 EOO -: expect-assignment +: unexpected-newline +: +$* <>EOE != 0 +foo/ y +EOI +:1:7: error: unexpected after foo/ y +EOE + +: expected-varname : $* <>EOE != 0 -foo/ [uint64] y +foo/ [uint64] y = 0 EOI -:1:16: error: expected variable assignment instead of +:1:1: error: expected variable name instead of foo/ [uint64] y EOE : unexpected-attribute -- cgit v1.1