From 4881a227779a78db1de2a7723e2a86f2b61453b3 Mon Sep 17 00:00:00 2001 From: Karen Arutyunov Date: Thu, 20 Oct 2022 19:39:57 +0300 Subject: Change attribute syntax in script to come after variable in set and for (set x [...], for x [...]) --- libbuild2/build/script/parser+for.test.testscript | 20 ++++---- libbuild2/build/script/parser.cxx | 62 ++++++++++++++--------- libbuild2/parser.cxx | 46 +++++++++-------- libbuild2/parser.hxx | 20 ++++++-- libbuild2/script/parser.cxx | 57 ++++++++++++--------- libbuild2/script/run.cxx | 32 +++++++----- libbuild2/test/script/parser+for.test.testscript | 20 ++++---- libbuild2/test/script/parser.cxx | 35 ++++++++----- 8 files changed, 174 insertions(+), 118 deletions(-) (limited to 'libbuild2') diff --git a/libbuild2/build/script/parser+for.test.testscript b/libbuild2/build/script/parser+for.test.testscript index 2a9f169..847b253 100644 --- a/libbuild2/build/script/parser+for.test.testscript +++ b/libbuild2/build/script/parser+for.test.testscript @@ -69,10 +69,10 @@ %cmd (b/|'b\\')% EOO - : typed-var-value + : typed-elem : $* <>~%EOO% - for [dir_path] x: a b + for x [dir_path]: a b cmd $x end EOI @@ -80,10 +80,10 @@ %cmd (b/|'b\\')% EOO - : typed-values-var-value + : typed-elem-value : $* <>~%EOO% - for [dir_path] x: [strings] a b + for x [dir_path]: [strings] a b cmd $x end EOI @@ -341,14 +341,14 @@ echo a b | for x EOO - : typed-var-value + : typed-elem : $* <>EOO - echo 'a b' | for -w [dir_paths] x + echo 'a b' | for -w x [dir_path] cmd $x end EOI - echo 'a b' | for -w [dir_paths] x + echo 'a b' | for -w x [dir_path] EOO } @@ -579,14 +579,14 @@ for x b >EOO - for -w [dir_path] x <'a b' + for -w x [dir_path] <'a b' cmd $x end EOI - for -w [dir_path] x <'a b' + for -w x [dir_path] <'a b' EOO } diff --git a/libbuild2/build/script/parser.cxx b/libbuild2/build/script/parser.cxx index 77fdfee..cb1fbef 100644 --- a/libbuild2/build/script/parser.cxx +++ b/libbuild2/build/script/parser.cxx @@ -213,8 +213,7 @@ namespace build2 // Determine the line type/start token. // - line_type lt ( - pre_parse_line_start (t, tt, lexer_mode::second_token)); + line_type lt (pre_parse_line_start (t, tt, lexer_mode::second_token)); line ln; @@ -258,17 +257,16 @@ namespace build2 // or the third (x <...) one. Note that the second form (... | for // x) is handled separately. // - // @@ Do we diagnose `... | for x: ...`? - // - // If the next token doesn't introduce a variable (doesn't start - // attributes and doesn't look like a variable name), then this is - // the third form. Otherwise, if colon follows the variable name, - // then this is the first form and the third form otherwise. + // If the next token doesn't look like a variable name, then this + // is the third form. Otherwise, if colon follows the variable + // name, potentially after the attributes, then this is the first + // form and the third form otherwise. // // Note that for the third form we will need to pass the 'for' // token as a program name to the command expression parsing // function since it will be gone from the token stream by that - // time. Thus, we save it. + // time. Thus, we save it. We also need to make sure the sensing + // always leaves the variable name token in t/tt. // // Note also that in this model it won't be possible to support // options in the first form. @@ -277,7 +275,7 @@ namespace build2 assert (pt.type == type::word && pt.value == "for"); mode (lexer_mode::for_loop); - next_with_attributes (t, tt); + next (t, tt); // Note that we also consider special variable names (those that // don't clash with the command line elements like redirects, etc) @@ -285,30 +283,48 @@ namespace build2 // string& n (t.value); - if (tt == type::lsbrace || // Attributes. - (tt == type::word && // Variable name. - t.qtype == quote_type::unquoted && - (n[0] == '_' || alpha (n[0]) || n == "~"))) + if (tt == type::word && t.qtype == quote_type::unquoted && + (n[0] == '_' || alpha (n[0]) || // Variable. + n == "~")) // Special variable. { - attributes_push (t, tt); - - if (tt != type::word || t.qtype != quote_type::unquoted) - fail (t) << "expected variable name instead of " << t; + // Detect patterns analogous to parse_variable_name() (so we + // diagnose `for x[string]: ...`). + // + if (n.find_first_of ("[*?") != string::npos) + fail (t) << "expected variable name instead of " << n; if (special_variable (n)) fail (t) << "attempt to set '" << n << "' special variable"; + // Parse out the element attributes, if present. + // + if (lexer_->peek_char ().first == '[') + { + // Save the variable name token before the attributes parsing + // and restore it afterwards. Also make sure that the token + // which follows the attributes stays in the stream. + // + token vt (move (t)); + next_with_attributes (t, tt); + + attributes_push (t, tt, + true /* standalone */, + false /* next_token */); + + t = move (vt); + tt = t.type; + } + if (lexer_->peek_char ().first == ':') lt = line_type::cmd_for_args; } if (lt == line_type::cmd_for_stream) // for x <... { - // At this point `t` contains the token that follows the `for` - // token and, potentially, the attributes. Now pre-parse the - // command expression in the command_line lexer mode starting - // from this position and also passing the 'for' token as a - // program name. + // At this point t/tt contains the variable name token. Now + // pre-parse the command expression in the command_line lexer + // mode starting from this position and also passing the 'for' + // token as a program name. // // Note that the fact that the potential attributes are already // parsed doesn't affect the command expression pre-parsing. diff --git a/libbuild2/parser.cxx b/libbuild2/parser.cxx index bc7b79b..d5514b7 100644 --- a/libbuild2/parser.cxx +++ b/libbuild2/parser.cxx @@ -5635,8 +5635,13 @@ namespace build2 } pair parser:: - attributes_push (token& t, type& tt, bool standalone) + attributes_push (token& t, type& tt, bool standalone, bool next_token) { + // To make sure that the attributes are not standalone we need to read the + // token which follows ']'. + // + assert (standalone || next_token); + location l (get_location (t)); bool has (tt == type::lsbrace); @@ -5710,28 +5715,27 @@ namespace build2 if (tt != type::rsbrace) fail (t) << "expected ']' instead of " << t; - next (t, tt); - - if (tt == type::newline || tt == type::eos) + if (next_token) { - if (!standalone) - fail (t) << "standalone attributes"; + next (t, tt); + + if (tt == type::newline || tt == type::eos) + { + if (!standalone) + fail (t) << "standalone attributes"; + } + // + // Verify that the attributes are separated from the following word or + // "word-producing" token. + // + else if (!t.separated && (tt == type::word || + tt == type::dollar || + tt == type::lparen || + tt == type::lcbrace)) + fail (t) << "whitespace required after attributes" << + info (l) << "use the '\\[' escape sequence if this is a wildcard " + << "pattern"; } - // - // We require attributes to be separated from the following word or - // "word-producing" tokens (`$` for variable expansions/function calls, - // `(` for eval contexts, and `{` for name generation) to reduce the - // possibility of confusing them with wildcard patterns. Consider: - // - // ./: [abc]-foo.txt - // - else if (!t.separated && (tt == type::word || - tt == type::dollar || - tt == type::lparen || - tt == type::lcbrace)) - fail (t) << "whitespace required after attributes" << - info (l) << "use the '\\[' escape sequence if this is a wildcard " - << "pattern"; return make_pair (has, l); } diff --git a/libbuild2/parser.hxx b/libbuild2/parser.hxx index 0d7e900..b3a5395 100644 --- a/libbuild2/parser.hxx +++ b/libbuild2/parser.hxx @@ -344,15 +344,25 @@ namespace build2 // Push a new entry into the attributes_ stack. If the next token is `[` // then parse the attribute sequence until ']' storing the result in the - // new stack entry. Then get the next token and, if standalone is false, - // verify it is not newline/eos (i.e., there is something after it). - // Return the indication of whether we have seen any attributes (note - // that the `[]` empty list does not count) and the location of `[`. + // new stack entry. Then, if next_token is true, get the next token and, + // if standalone is false, verify it is not newline/eos (i.e., there is + // something after it). If the next token is read and it is a word or a + // "word-producing" token (`$` for variable expansions/function calls, `(` + // for eval contexts, and `{` for name generation), then verify that it is + // separated to reduce the possibility of confusing it with a wildcard + // pattern. Consider: + // + // ./: [abc]-foo.txt + // + // Return the indication of whether we have seen any attributes (note that + // the `[]` empty list does not count) and the location of `[`. // // Note that during pre-parsing nothing is pushed into the stack. // pair - attributes_push (token&, token_type&, bool standalone = false); + attributes_push (token&, token_type&, + bool standalone = false, + bool next_token = true); attributes attributes_pop () diff --git a/libbuild2/script/parser.cxx b/libbuild2/script/parser.cxx index 87a51d8..2a213ab 100644 --- a/libbuild2/script/parser.cxx +++ b/libbuild2/script/parser.cxx @@ -2497,28 +2497,36 @@ namespace build2 if (!scan.more ()) fail (ll) << "for: missing variable name"; - // Either attributes or variable name. - // - string a (scan.next ()); - const string* ats (!scan.more () ? nullptr : &a); - string vname (!scan.more () ? move (a) : scan.next ()); - - if (scan.more ()) - fail (ll) << "for: unexpected argument '" - << scan.next () << "'"; - - if (ats != nullptr && ats->empty ()) - fail (ll) << "for: empty variable attributes"; - + string vname (scan.next ()); if (vname.empty ()) fail (ll) << "for: empty variable name"; + // Detect patterns analogous to parse_variable_name() (so + // we diagnose `for x[string]`). + // + if (vname.find_first_of ("[*?") != string::npos) + fail (ll) << "for: expected variable name instead of " + << vname; + // Let's also diagnose the `... | for x:...` misuse which // can probably be quite common. // if (vname.find (':') != string::npos) fail (ll) << "for: ':' after variable name"; + string attrs; + if (scan.more ()) + { + attrs = scan.next (); + + if (attrs.empty ()) + fail (ll) << "for: empty variable attributes"; + + if (scan.more ()) + fail (ll) << "for: unexpected argument '" + << scan.next () << "'"; + } + stream_reader sr ( move (in), pipe, !ops.newline (), ops.newline (), ops.exact (), @@ -2544,7 +2552,7 @@ namespace build2 // env.set_variable (vname, names {name (move (*s))}, - ats != nullptr ? *ats : empty_string, + attrs, ll); // Find the construct end, if it is not found yet. @@ -2582,15 +2590,9 @@ namespace build2 } case line_type::cmd_for_args: { - // Parse the variable name with the potential attributes. + // Parse the variable name. // - next_with_attributes (t, tt); - attributes_push (t, tt); - - // @@ TMP Currently we assume that these are the value (rather - // than the variable) attributes. - // - attributes val_attrs (attributes_pop ()); + next (t, tt); assert (tt == type::word && t.qtype == quote_type::unquoted); @@ -2609,9 +2611,18 @@ namespace build2 var = &var_pool->insert (move (vn)); } - next (t, tt); // Skip the colon. + // Parse the potential element attributes and skip the colon. + // + next_with_attributes (t, tt); + attributes_push (t, tt); + assert (tt == type::colon); + // Save element attributes so that we can inject them on each + // iteration. + // + attributes val_attrs (attributes_pop ()); + // Parse the value with the potential attributes. // // Note that we don't really need to change the mode since we diff --git a/libbuild2/script/run.cxx b/libbuild2/script/run.cxx index b7f3314..ca04443 100644 --- a/libbuild2/script/run.cxx +++ b/libbuild2/script/run.cxx @@ -1171,7 +1171,7 @@ namespace build2 // The set pseudo-builtin: set variable from the stdin input. // - // set [-e|--exact] [(-n|--newline)|(-w|--whitespace)] [] + // set [-e|--exact] [(-n|--newline)|(-w|--whitespace)] [] // static void set_builtin (environment& env, @@ -1195,18 +1195,27 @@ namespace build2 if (!scan.more ()) fail (ll) << "set: missing variable name"; - string a (scan.next ()); // Either attributes or variable name. - const string* ats (!scan.more () ? nullptr : &a); - string vname (!scan.more () ? move (a) : scan.next ()); + string vname (scan.next ()); + if (vname.empty ()) + fail (ll) << "set: empty variable name"; + // Detect patterns analogous to parser::parse_variable_name() (so we + // diagnose `set x[string]`). + // + if (vname.find_first_of ("[*?") != string::npos) + fail (ll) << "set: expected variable name instead of " << vname; + + string attrs; if (scan.more ()) - fail (ll) << "set: unexpected argument '" << scan.next () << "'"; + { + attrs = scan.next (); - if (ats != nullptr && ats->empty ()) - fail (ll) << "set: empty variable attributes"; + if (attrs.empty ()) + fail (ll) << "set: empty variable attributes"; - if (vname.empty ()) - fail (ll) << "set: empty variable name"; + if (scan.more ()) + fail (ll) << "set: unexpected argument '" << scan.next () << "'"; + } stream_reader sr (move (in), pipe, ops.whitespace (), ops.newline (), ops.exact (), @@ -1220,10 +1229,7 @@ namespace build2 for (optional s; (s = sr.next ()); ) ns.emplace_back (move (*s)); - env.set_variable (move (vname), - move (ns), - ats != nullptr ? *ats : empty_string, - ll); + env.set_variable (move (vname), move (ns), attrs, ll); } catch (const io_error& e) { diff --git a/libbuild2/test/script/parser+for.test.testscript b/libbuild2/test/script/parser+for.test.testscript index 5350f28..985f9c9 100644 --- a/libbuild2/test/script/parser+for.test.testscript +++ b/libbuild2/test/script/parser+for.test.testscript @@ -69,10 +69,10 @@ %cmd (b/|'b\\')% EOO - : typed-var-value + : typed-elem : $* <>~%EOO% - for [dir_path] x: a b + for x [dir_path]: a b cmd $x end EOI @@ -80,10 +80,10 @@ %cmd (b/|'b\\')% EOO - : typed-values-var-value + : typed-elem-value : $* <>~%EOO% - for [dir_path] x: [strings] a b + for x [dir_path]: [strings] a b cmd $x end EOI @@ -472,14 +472,14 @@ echo a b | for x EOO - : typed-var-value + : typed-elem : $* <>EOO - echo 'a b' | for -w [dir_path] x + echo 'a b' | for -w x [dir_path] cmd $x end EOI - echo 'a b' | for -w [dir_path] x + echo 'a b' | for -w x [dir_path] EOO } @@ -837,14 +837,14 @@ for x b >EOO - for -w [dir_path] x <'a b' + for -w x [dir_path] <'a b' cmd $x end EOI - for -w [dir_path] x <'a b' + for -w x [dir_path] <'a b' EOO } diff --git a/libbuild2/test/script/parser.cxx b/libbuild2/test/script/parser.cxx index a30f666..60656a1 100644 --- a/libbuild2/test/script/parser.cxx +++ b/libbuild2/test/script/parser.cxx @@ -497,27 +497,36 @@ namespace build2 assert (pt.type == type::word && pt.value == "for"); mode (lexer_mode::for_loop); - next_with_attributes (t, tt); + next (t, tt); string& n (t.value); - if (tt == type::lsbrace || // Attributes. - (tt == type::word && // Variable name. - t.qtype == quote_type::unquoted && - (n[0] == '_' || - alpha (n[0]) || - n == "*" || - n == "~" || - n == "@"))) + if (tt == type::word && t.qtype == quote_type::unquoted && + (n[0] == '_' || alpha (n[0]) || // Variable. + n == "*" || n == "~" || n == "@")) // Special variable. { - attributes_push (t, tt); - - if (tt != type::word || t.qtype != quote_type::unquoted) - fail (t) << "expected variable name instead of " << t; + // Detect patterns analogous to parse_variable_name() (so we + // diagnose `for x[string]: ...`). + // + if (n.find_first_of ("[*?") != string::npos) + fail (t) << "expected variable name instead of " << n; if (special_variable (n)) fail (t) << "attempt to set '" << n << "' variable directly"; + if (lexer_->peek_char ().first == '[') + { + token vt (move (t)); + next_with_attributes (t, tt); + + attributes_push (t, tt, + true /* standalone */, + false /* next_token */); + + t = move (vt); + tt = t.type; + } + if (lexer_->peek_char ().first == ':') lt = line_type::cmd_for_args; } -- cgit v1.1