From b808c255b6a9ddba085bf5646e7d20ec344f2e2d Mon Sep 17 00:00:00 2001 From: Boris Kolpackov Date: Tue, 28 Apr 2020 08:48:53 +0200 Subject: Initial support for ad hoc recipes (still work in progress) --- libbuild2/lexer.cxx | 172 +++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 157 insertions(+), 15 deletions(-) (limited to 'libbuild2/lexer.cxx') diff --git a/libbuild2/lexer.cxx b/libbuild2/lexer.cxx index c0cadd3..6d3504c 100644 --- a/libbuild2/lexer.cxx +++ b/libbuild2/lexer.cxx @@ -14,7 +14,10 @@ namespace build2 pair, bool> lexer:: peek_chars () { - sep_ = skip_spaces (); + auto p (skip_spaces ()); + assert (!p.second); + sep_ = p.first; + char r[2] = {'\0', '\0'}; xchar c0 (peek ()); @@ -34,7 +37,7 @@ namespace build2 } void lexer:: - mode (lexer_mode m, char ps, optional esc) + mode (lexer_mode m, char ps, optional esc, uintptr_t data) { bool a (false); // attributes @@ -54,7 +57,11 @@ namespace build2 switch (m) { case lexer_mode::normal: + case lexer_mode::cmdvar: { + // Note: `%` is only recognized at the beginning of the line so it + // should not be included here. + // a = true; s1 = ":<>=+? $(){}#\t\n"; s2 = " == "; @@ -121,10 +128,16 @@ namespace build2 n = false; break; } + case lexer_mode::foreign: + assert (data > 1); + // Fall through. case lexer_mode::single_quoted: case lexer_mode::double_quoted: - s = false; - // Fall through. + { + assert (ps == '\0'); + s = false; + break; + } case lexer_mode::variable: { // These are handled in an ad hoc way in word(). @@ -134,7 +147,7 @@ namespace build2 default: assert (false); // Unhandled custom mode. } - state_.push (state {m, a, ps, s, n, q, *esc, s1, s2}); + state_.push (state {m, data, nullopt, a, ps, s, n, q, *esc, s1, s2}); } token lexer:: @@ -148,6 +161,7 @@ namespace build2 switch (m) { case lexer_mode::normal: + case lexer_mode::cmdvar: case lexer_mode::value: case lexer_mode::values: case lexer_mode::switch_expressions: @@ -158,10 +172,13 @@ namespace build2 case lexer_mode::buildspec: break; case lexer_mode::eval: return next_eval (); case lexer_mode::double_quoted: return next_quoted (); + case lexer_mode::foreign: return next_foreign (); default: assert (false); // Unhandled custom mode. } - bool sep (skip_spaces ()); + pair skip (skip_spaces ()); + bool sep (skip.first); // Separated from a previous character. + bool first (skip.second); // First non-whitespace character of a line. xchar c (get ()); uint64_t ln (c.line), cn (c.column); @@ -209,7 +226,8 @@ namespace build2 m == lexer_mode::case_patterns) state_.pop (); - // Re-enable attributes in the normal mode. + // Re-enable attributes in the normal mode (should never be needed in + // cmdvar). // if (state_.top ().mode == lexer_mode::normal) state_.top ().attributes = true; @@ -230,6 +248,32 @@ namespace build2 } } + // Line-leading tokens in the normal mode. + // + // Note: must come before any other (e.g., `{`) tests below. + // + if (m == lexer_mode::normal && first) + { + switch (c) + { + case '%': return make_token (type::percent); + case '{': + { + string v; + while (peek () == '{') + v += get (); + + if (!v.empty ()) + { + v += '{'; + return make_token (type::multi_lcbrace, move (v)); + } + + break; + } + } + } + // The following characters are special in all modes except attributes. // if (m != lexer_mode::attributes && m != lexer_mode::attribute_value) @@ -267,6 +311,7 @@ namespace build2 // switch_expressions modes. // if (m == lexer_mode::normal || + m == lexer_mode::cmdvar || m == lexer_mode::switch_expressions || m == lexer_mode::case_patterns) { @@ -278,7 +323,8 @@ namespace build2 // The following characters are special in the normal mode. // - if (m == lexer_mode::normal) + if (m == lexer_mode::normal || + m == lexer_mode::cmdvar) { switch (c) { @@ -315,7 +361,8 @@ namespace build2 // The following characters are special in the normal mode. // - if (m == lexer_mode::normal) + if (m == lexer_mode::normal || + m == lexer_mode::cmdvar) { switch (c) { @@ -361,7 +408,7 @@ namespace build2 // This mode is quite a bit like the value mode when it comes to special // characters, except that we have some of our own. - bool sep (skip_spaces ()); + bool sep (skip_spaces ().first); xchar c (get ()); if (eos (c)) @@ -485,6 +532,99 @@ namespace build2 } token lexer:: + next_foreign () + { + state& st (state_.top ()); + + if (st.hold) + { + token r (move (*st.hold)); + state_.pop (); // Expire foreign mode. + return r; + } + + auto count (state_.top ().data); // Number of closing braces to expect. + + xchar c (get ()); // First character of first line after `{{...`. + uint64_t ln (c.line), cn (c.column); + + string lexeme; + for (bool first (true); !eos (c); c = get ()) + { + // If this is the first character of a line, recognize closing braces. + // + if (first) + { + first = false; + + // If this turns not to be the closing braces, we need to add any + // characters we have extracted to lexeme. Instead of saving these + // characters in a temporary we speculatively add them to the lexeme + // but then chop them off if this turned out to be the closing braces. + // + size_t chop (lexeme.size ()); + + // Skip leading whitespaces, if any. + // + for (; c == ' ' || c == '\t'; c = get ()) + lexeme += c; + + uint64_t bln (c.line), bcn (c.column); // Position of first `}`. + + // Count braces. + // + auto i (count); + for (; c == '}'; c = get ()) + { + lexeme += c; + + if (--i == 0) + break; + } + + if (i == 0) // Got enough braces. + { + // Make sure there are only whitespaces/comments after. Note that + // now we must start peeking since newline is not "ours". + // + for (c = peek (); c == ' ' || c == '\t'; c = peek ()) + lexeme += get (); + + if (c == '\n' || c == '#' || eos (c)) + { + st.hold = token (type::multi_rcbrace, + string (count, '}'), + false, quote_type::unquoted, false, + bln, bcn, + token_printer); + + lexeme.resize (chop); + return token (move (lexeme), + false, quote_type::unquoted, false, + ln, cn); + } + + get (); // And fall through (not eos). + } + else + { + if (eos (c)) + break; + + // Fall through. + } + } + + if (c == '\n') + first = true; + + lexeme += c; + } + + return token (type::eos, false, c.line, c.column, token_printer); + } + + token lexer:: word (state st, bool sep) { lexer_mode m (st.mode); @@ -728,7 +868,7 @@ namespace build2 return token (move (lexeme), sep, qtype, qcomp, ln, cn); } - bool lexer:: + pair lexer:: skip_spaces () { bool r (sep_); @@ -739,7 +879,7 @@ namespace build2 // In some special modes we don't skip spaces. // if (!s.sep_space) - return r; + return make_pair (r, false); xchar c (peek ()); bool start (c.column == 1); @@ -758,6 +898,8 @@ namespace build2 { // In some modes we treat newlines as ordinary spaces. // + // Note that in this case we don't adjust start. + // if (!s.sep_newline) { r = true; @@ -772,7 +914,7 @@ namespace build2 break; } - return r; + return make_pair (r, start); } case '#': { @@ -833,12 +975,12 @@ namespace build2 } // Fall through. default: - return r; // Not a space. + return make_pair (r, start); // Not a space. } get (); } - return r; + return make_pair (r, start); } } -- cgit v1.1