From d5faeeab1d2115c02a330ac9c95d63ba225faabc Mon Sep 17 00:00:00 2001 From: Boris Kolpackov Date: Wed, 10 Jun 2020 10:00:55 +0200 Subject: Handle special variable names in base lexer via mode data --- libbuild2/build/script/lexer.cxx | 39 +++++------------- libbuild2/build/script/lexer.hxx | 3 -- libbuild2/lexer.cxx | 14 ++++++- libbuild2/lexer.hxx | 7 ++++ libbuild2/script/lexer.cxx | 2 +- .../test/script/lexer+variable.test.testscript | 3 +- libbuild2/test/script/lexer.cxx | 46 ++++++++++------------ 7 files changed, 52 insertions(+), 62 deletions(-) diff --git a/libbuild2/build/script/lexer.cxx b/libbuild2/build/script/lexer.cxx index a58f794..d849ac9 100644 --- a/libbuild2/build/script/lexer.cxx +++ b/libbuild2/build/script/lexer.cxx @@ -80,7 +80,15 @@ namespace build2 } default: { - base_lexer::mode (m, ps, esc); + // Recognize special variable names ($>, $<, $~). + // + if (m == lexer_mode::variable) + { + assert (data == 0); + data = reinterpret_cast ("><~"); + } + + base_lexer::mode (m, ps, esc, data); return; } } @@ -235,35 +243,6 @@ namespace build2 unget (c); return word (st, sep); } - - token lexer:: - word (state st, bool sep) - { - lexer_mode m (st.mode); - - // Customized implementation that handles special variable names ($>, - // $<, $~). - // - // @@ TODO: $(<), $(>): feels like this will have to somehow be - // handled at the top-level lexer level. Maybe provide a - // string of one-char special variable names as state::data? - // - if (m != lexer_mode::variable) - return base_lexer::word (st, sep); - - xchar c (peek ()); - - if (c != '>' && c != '<' && c != '~') - return base_lexer::word (st, sep); - - get (); - - state_.pop (); // Expire the variable mode. - return token (string (1, c), - sep, - quote_type::unquoted, false, - c.line, c.column); - } } } } diff --git a/libbuild2/build/script/lexer.hxx b/libbuild2/build/script/lexer.hxx index 7d919e5..646d3b9 100644 --- a/libbuild2/build/script/lexer.hxx +++ b/libbuild2/build/script/lexer.hxx @@ -69,9 +69,6 @@ namespace build2 private: token next_line (); - - virtual token - word (state, bool) override; }; } } diff --git a/libbuild2/lexer.cxx b/libbuild2/lexer.cxx index e50ec16..2f2ace4 100644 --- a/libbuild2/lexer.cxx +++ b/libbuild2/lexer.cxx @@ -741,7 +741,19 @@ namespace build2 // else if (m == lexer_mode::variable) { - if (c != '_' && !(lexeme.empty () ? alpha (c) : alnum (c))) + bool first (lexeme.empty ()); + + // Handle special variable names, if any. + // + if (first && + st.data != 0 && + strchr (reinterpret_cast (st.data), c) != nullptr) + { + get (); + lexeme += c; + done = true; + } + else if (c != '_' && !(first ? alpha (c) : alnum (c))) { if (c != '.') done = true; diff --git a/libbuild2/lexer.hxx b/libbuild2/lexer.hxx index d5f1c99..f577828 100644 --- a/libbuild2/lexer.hxx +++ b/libbuild2/lexer.hxx @@ -56,6 +56,13 @@ namespace build2 // single-quote mode. The number of closing braces to expect is passed as // mode data. // + // The mode data is also used by a few other modes. The buildspec mode uses + // it as a boolean value to decide whether to recognize newlines as tokens. + // In the variable mode the mode data may be a pointer to a C string that + // contains a list of special one-character variable names to recognize + // (e.g., $<, $~, etc). Note that the parser has a special kludge to also + // recognize them as $(<), etc. + // // The alternative modes must be set manually. The value/values and derived // modes automatically expires after the end of the line. The attribute and // subscript modes expires after the closing `]`. The variable mode expires diff --git a/libbuild2/script/lexer.cxx b/libbuild2/script/lexer.cxx index ce409c1..a18c1df 100644 --- a/libbuild2/script/lexer.cxx +++ b/libbuild2/script/lexer.cxx @@ -78,7 +78,7 @@ namespace build2 m == lexer_mode::eval || m == lexer_mode::attribute_value); - base_lexer::mode (m, ps, esc); + base_lexer::mode (m, ps, esc, data); return; } } diff --git a/libbuild2/test/script/lexer+variable.test.testscript b/libbuild2/test/script/lexer+variable.test.testscript index ac7d6be..5993c37 100644 --- a/libbuild2/test/script/lexer+variable.test.testscript +++ b/libbuild2/test/script/lexer+variable.test.testscript @@ -64,6 +64,7 @@ test.arguments = variable : multi-digit : $* <"10" 2>>EOE != 0 - :1:1: error: multi-digit special variable name + :1:2: error: multi-digit special variable name + info: use '($*[NN])' to access elements beyond 9 EOE } diff --git a/libbuild2/test/script/lexer.cxx b/libbuild2/test/script/lexer.cxx index e895d4a..c23dea4 100644 --- a/libbuild2/test/script/lexer.cxx +++ b/libbuild2/test/script/lexer.cxx @@ -92,16 +92,16 @@ namespace build2 } default: { - // Make sure pair separators are only enabled where we expect - // them. + // Recognize special variable names ($*, $N, $~, $@). See also an + // extra check in word() below. // - // @@ Should we disable pair separators in the eval mode? - // - assert (ps == '\0' || - m == lexer_mode::eval || - m == lexer_mode::attribute_value); + if (m == lexer_mode::variable) + { + assert (data == 0); + data = reinterpret_cast ("*~@0123456789"); + } - base_lexer::mode (m, ps, esc); + base_lexer::mode (m, ps, esc, data); return; } } @@ -335,27 +335,21 @@ namespace build2 { lexer_mode m (st.mode); - // Customized implementation that handles special variable names ($*, - // $N, $~, $@). - // - if (m != lexer_mode::variable) - return base_lexer::word (st, sep); - - xchar c (peek ()); - - if (c != '*' && c != '~' && c != '@' && !digit (c)) - return base_lexer::word (st, sep); + token r (base_lexer::word (st, sep)); - get (); + if (m == lexer_mode::variable) + { + if (r.value.size () == 1 && digit (r.value[0])) // $N + { + xchar c (peek ()); - if (digit (c) && digit (peek ())) - fail (c) << "multi-digit special variable name"; + if (digit (c)) // $NN + fail (c) << "multi-digit special variable name" << + info << "use '($*[NN])' to access elements beyond 9"; + } + } - state_.pop (); // Expire the variable mode. - return token (string (1, c), - sep, - quote_type::unquoted, false, - c.line, c.column); + return r; } } } -- cgit v1.1