From 5ec57d68a5205173a02c34a24d7129347d43196c Mon Sep 17 00:00:00 2001 From: Boris Kolpackov Date: Thu, 14 Nov 2019 12:55:54 +0200 Subject: Tighten up attribute recognition during parsing Now it should be possible to use `[]` for wildcard patterns, for example: foo = foo.[hit]xx Note that a leading bracket expression will still be recognized as attributes and escaping or quoting it will inhibit pattern matching. To resolve this case we need to specify an empty attribute list: foo = [] [abc]-foo.cxx --- libbuild2/lexer.cxx | 148 ++++++++++++++++++++++++++++++++-------------------- 1 file changed, 90 insertions(+), 58 deletions(-) (limited to 'libbuild2/lexer.cxx') diff --git a/libbuild2/lexer.cxx b/libbuild2/lexer.cxx index 61d7fbf..b405929 100644 --- a/libbuild2/lexer.cxx +++ b/libbuild2/lexer.cxx @@ -23,11 +23,15 @@ namespace build2 void lexer:: mode (lexer_mode m, char ps, optional esc) { + bool a (false); // attributes + const char* s1 (nullptr); const char* s2 (nullptr); - bool s (true); - bool n (true); - bool q (true); + + bool s (true); // space + bool n (true); // newline + bool q (true); // quotes + if (!esc) { @@ -39,35 +43,39 @@ namespace build2 { case lexer_mode::normal: { - s1 = ":<>=+ $(){}[]#\t\n"; - s2 = " = "; + a = true; + s1 = ":<>=+ $(){}#\t\n"; + s2 = " = "; break; } case lexer_mode::value: { - s1 = " $(){}[]#\t\n"; - s2 = " "; + s1 = " $(){}#\t\n"; + s2 = " "; break; } case lexer_mode::values: { - s1 = " $(){}[],#\t\n"; - s2 = " "; + // a: beginning and after `,`? + s1 = " $(){},#\t\n"; + s2 = " "; break; } case lexer_mode::switch_expressions: { - s1 = " $(){}[],:#\t\n"; - s2 = " "; + // a: beginning and after `,`? + s1 = " $(){},:#\t\n"; + s2 = " "; break; } case lexer_mode::case_patterns: { - s1 = " $(){}[],|:#\t\n"; - s2 = " "; + // a: beginning and after `,` & `|`? + s1 = " $(){},|:#\t\n"; + s2 = " "; break; } - case lexer_mode::attribute: + case lexer_mode::attributes: { s1 = " $(]#\t\n"; s2 = " "; @@ -75,8 +83,8 @@ namespace build2 } case lexer_mode::eval: { - s1 = ":<>=!&|?, $(){}[]#\t\n"; - s2 = " = &| "; + s1 = ":<>=!&|?, $(){}#\t\n"; + s2 = " = &| "; break; } case lexer_mode::buildspec: @@ -91,8 +99,10 @@ namespace build2 // // 3. Treat newline as an ordinary space. // - s1 = " $(){}[],\t\n"; - s2 = " "; + // Also note that we don't have buildspec attributes. + // + s1 = " $(){},\t\n"; + s2 = " "; n = false; break; } @@ -109,13 +119,13 @@ namespace build2 default: assert (false); // Unhandled custom mode. } - state_.push (state {m, ps, s, n, q, *esc, s1, s2}); + state_.push (state {m, a, ps, s, n, q, *esc, s1, s2}); } token lexer:: next () { - const state& st (state_.top ()); + state& st (state_.top ()); lexer_mode m (st.mode); // For some modes we have dedicated imlementations of next(). @@ -127,7 +137,7 @@ namespace build2 case lexer_mode::values: case lexer_mode::switch_expressions: case lexer_mode::case_patterns: - case lexer_mode::attribute: + case lexer_mode::attributes: case lexer_mode::variable: case lexer_mode::buildspec: break; case lexer_mode::eval: return next_eval (); @@ -147,6 +157,17 @@ namespace build2 ln, cn, token_printer); }; + // Handle attributes (do it first to make sure the flag is cleared + // regardless of what we return). + // + if (st.attributes) + { + st.attributes = false; + + if (c == '[') + return make_token (type::lsbrace); + } + if (eos (c)) return make_token (type::eos); @@ -155,11 +176,11 @@ namespace build2 if (c == st.sep_pair) return make_token (type::pair_separator, string (1, c)); + // NOTE: remember to update mode(), next_eval() if adding any new special + // characters. + switch (c) { - // NOTE: remember to update mode(), next_eval() if adding new special - // characters. - // case '\n': { // Expire value/values modes at the end of the line. @@ -170,20 +191,13 @@ namespace build2 m == lexer_mode::case_patterns) state_.pop (); - sep = true; // Treat newline as always separated. - return make_token (type::newline); - } - case '{': return make_token (type::lcbrace); - case '}': return make_token (type::rcbrace); - case '[': return make_token (type::lsbrace); - case ']': - { - // Expire attribute mode after closing ']'. + // Re-enable attributes in the normal mode. // - if (m == lexer_mode::attribute) - state_.pop (); + if (state_.top ().mode == lexer_mode::normal) + state_.top ().attributes = true; - return make_token (type::rsbrace); + sep = true; // Treat newline as always separated. + return make_token (type::newline); } case '$': return make_token (type::dollar); case ')': return make_token (type::rparen); @@ -198,6 +212,31 @@ namespace build2 } } + // The following characters are special in all modes except attributes. + // + if (m != lexer_mode::attributes) + { + switch (c) + { + case '{': return make_token (type::lcbrace); + case '}': return make_token (type::rcbrace); + } + } + + // The following characters are special in the attributes modes. + // + if (m == lexer_mode::attributes) + { + switch (c) + { + case ']': + { + state_.pop (); // Expire the attributes mode after closing `]`. + return make_token (type::rsbrace); + } + } + } + // The following characters are special in the normal, variable, and // switch_expressions modes. // @@ -208,9 +247,6 @@ namespace build2 { switch (c) { - // NOTE: remember to update mode(), next_eval() if adding new special - // characters. - // case ':': return make_token (type::colon); } } @@ -221,9 +257,6 @@ namespace build2 { switch (c) { - // NOTE: remember to update mode(), next_eval() if adding new special - // characters. - // case '=': { if (peek () == '+') @@ -249,8 +282,6 @@ namespace build2 // if (m == lexer_mode::normal) { - // NOTE: remember to update mode() if adding new special characters. - // switch (c) { case '<': return make_token (type::labrace); @@ -265,8 +296,6 @@ namespace build2 m == lexer_mode::switch_expressions || m == lexer_mode::case_patterns) { - // NOTE: remember to update mode() if adding new special characters. - // switch (c) { case ',': return make_token (type::comma); @@ -277,8 +306,6 @@ namespace build2 // if (m == lexer_mode::case_patterns) { - // NOTE: remember to update mode() if adding new special characters. - // switch (c) { case '|': return make_token (type::bit_or); @@ -294,13 +321,16 @@ namespace build2 token lexer:: next_eval () { + // This mode is quite a bit like the value mode when it comes to special + // characters, except that we have some of our own. + bool sep (skip_spaces ()); xchar c (get ()); if (eos (c)) fail (c) << "unterminated evaluation context"; - const state& st (state_.top ()); + state& st (state_.top ()); uint64_t ln (c.line), cn (c.column); @@ -311,28 +341,30 @@ namespace build2 ln, cn, token_printer); }; - // This mode is quite a bit like the value mode when it comes to special - // characters, except that we have some of our own. + // Handle attributes (do it first to make sure the flag is cleared + // regardless of what we return). // + if (st.attributes) + { + st.attributes = false; + + if (c == '[') + return make_token (type::lsbrace); + } // Handle pair separator. // if (c == st.sep_pair) return make_token (type::pair_separator, string (1, c)); - // Note: we don't treat [ and ] as special here. Maybe can use them for - // something later. - // + // NOTE: remember to update mode() if adding any new special characters. + switch (c) { - // NOTE: remember to update mode() if adding new special characters. - // case '\n': fail (c) << "newline in evaluation context" << endf; case ':': return make_token (type::colon); case '{': return make_token (type::lcbrace); case '}': return make_token (type::rcbrace); - case '[': return make_token (type::lsbrace); - case ']': return make_token (type::rsbrace); case '$': return make_token (type::dollar); case '?': return make_token (type::question); case ',': return make_token (type::comma); -- cgit v1.1