From 7efdbab3cd38b7e1693f0a4a85a9933adb50fb9d Mon Sep 17 00:00:00 2001 From: Boris Kolpackov Date: Fri, 4 Nov 2016 08:17:01 +0200 Subject: Handle printing of extended tokens --- build2/lexer.cxx | 71 +++++++++++++++++++++++++++++++------------------------ build2/parser | 2 +- build2/parser.cxx | 15 ++++++------ build2/token | 37 +++++++++++++++++++---------- build2/token.cxx | 49 +++++++++++++++++++++----------------- 5 files changed, 101 insertions(+), 73 deletions(-) diff --git a/build2/lexer.cxx b/build2/lexer.cxx index 7b39623..02271e9 100644 --- a/build2/lexer.cxx +++ b/build2/lexer.cxx @@ -97,14 +97,19 @@ namespace build2 xchar c (get ()); uint64_t ln (c.line), cn (c.column); + auto make_token = [sep, ln, cn] (type t) + { + return token (t, sep, ln, cn, token_printer); + }; + if (eos (c)) - return token (type::eos, sep, ln, cn); + return make_token (type::eos); // Handle pair separator. // if ((m == lexer_mode::normal || m == lexer_mode::value) && c == state_.top ().sep_pair) - return token (type::pair_separator, sep, ln, cn); + return make_token (type::pair_separator); switch (c) { @@ -118,15 +123,15 @@ namespace build2 if (m == lexer_mode::value) state_.pop (); - return token (type::newline, sep, ln, cn); + return make_token (type::newline); } - case '{': return token (type::lcbrace, sep, ln, cn); - case '}': return token (type::rcbrace, sep, ln, cn); - case '[': return token (type::lsbrace, sep, ln, cn); - case ']': return token (type::rsbrace, sep, ln, cn); - case '$': return token (type::dollar, sep, ln, cn); - case '(': return token (type::lparen, sep, ln, cn); - case ')': return token (type::rparen, sep, ln, cn); + case '{': return make_token (type::lcbrace); + case '}': return make_token (type::rcbrace); + case '[': return make_token (type::lsbrace); + case ']': return make_token (type::rsbrace); + case '$': return make_token (type::dollar); + case '(': return make_token (type::lparen); + case ')': return make_token (type::rparen); } // The following characters are not treated as special in the value mode. @@ -138,23 +143,23 @@ namespace build2 // NOTE: remember to update mode(), next_eval() if adding new special // characters. // - case ':': return token (type::colon, sep, ln, cn); + case ':': return make_token (type::colon); case '=': { if (peek () == '+') { get (); - return token (type::prepend, sep, ln, cn); + return make_token (type::prepend); } else - return token (type::assign, sep, ln, cn); + return make_token (type::assign); } case '+': { if (peek () == '=') { get (); - return token (type::append, sep, ln, cn); + return make_token (type::append); } } } @@ -177,6 +182,11 @@ namespace build2 uint64_t ln (c.line), cn (c.column); + auto make_token = [sep, ln, cn] (type t) + { + return token (t, sep, ln, cn, token_printer); + }; + // This mode is quite a bit like the value mode when it comes to special // characters, except that we have some of our own. // @@ -184,7 +194,7 @@ namespace build2 // Handle pair separator. // if (c == state_.top ().sep_pair) - return token (type::pair_separator, sep, ln, cn); + return make_token (type::pair_separator); // Note: we don't treat [ and ] as special here. Maybe can use them for // something later. @@ -194,17 +204,17 @@ namespace build2 // NOTE: remember to update mode() if adding new special characters. // case '\n': fail (c) << "newline in evaluation context"; - case ':': return token (type::colon, sep, ln, cn); - case '{': return token (type::lcbrace, sep, ln, cn); - case '}': return token (type::rcbrace, sep, ln, cn); - case '[': return token (type::lsbrace, sep, ln, cn); - case ']': return token (type::rsbrace, sep, ln, cn); - case '$': return token (type::dollar, sep, ln, cn); - case '(': return token (type::lparen, sep, ln, cn); + case ':': return make_token (type::colon); + case '{': return make_token (type::lcbrace); + case '}': return make_token (type::rcbrace); + case '[': return make_token (type::lsbrace); + case ']': return make_token (type::rsbrace); + case '$': return make_token (type::dollar); + case '(': return make_token (type::lparen); case ')': { state_.pop (); // Expire eval mode. - return token (type::rparen, sep, ln, cn); + return make_token (type::rparen); } case '=': case '!': @@ -212,7 +222,7 @@ namespace build2 if (peek () == '=') { get (); - return token (c == '=' ? type::equal : type::not_equal, sep, ln, cn); + return make_token (c == '=' ? type::equal : type::not_equal); } break; } @@ -223,10 +233,9 @@ namespace build2 if (e) get (); - return token (c == '<' - ? e ? type::less_equal : type::less - : e ? type::greater_equal : type::greater, - sep, ln, cn); + return make_token (c == '<' + ? e ? type::less_equal : type::less + : e ? type::greater_equal : type::greater); } } @@ -248,8 +257,8 @@ namespace build2 switch (c) { - case '$': return token (type::dollar, false, ln, cn); - case '(': return token (type::lparen, false, ln, cn); + case '$': return token (type::dollar, false, ln, cn, token_printer); + case '(': return token (type::lparen, false, ln, cn, token_printer); } // Otherwise it is a name. @@ -435,7 +444,7 @@ namespace build2 if (m == lexer_mode::variable) state_.pop (); - return token (lexeme, sep, quoted, ln, cn); + return token (lexeme, sep, quoted, ln, cn, token_printer); } diff --git a/build2/parser b/build2/parser index 49bf59c..eeaec7b 100644 --- a/build2/parser +++ b/build2/parser @@ -375,7 +375,7 @@ namespace build2 target* default_target_; names_type export_value_; - token peek_ = token (token_type::eos, false, 0, 0); + token peek_ = token (token_type::eos, false, 0, 0, token_printer); bool peeked_ = false; enum class replay {stop, save, play} replay_ = replay::stop; diff --git a/build2/parser.cxx b/build2/parser.cxx index 0853c04..c7ede35 100644 --- a/build2/parser.cxx +++ b/build2/parser.cxx @@ -178,7 +178,7 @@ namespace build2 enter_buildfile (p); // Needs scope_. - token t (type::eos, false, 0, 0); + token t; type tt; next (t, tt); @@ -198,8 +198,8 @@ namespace build2 target_ = nullptr; scope_ = &s; + token t; type tt; - token t (type::eos, false, 0, 0); variable (t, tt, var, kind); return t; } @@ -212,8 +212,8 @@ namespace build2 target_ = nullptr; scope_ = &s; + token t; type tt; - token t (type::eos, false, 0, 0); value rhs (variable_value (t, tt)); value lhs; @@ -805,7 +805,7 @@ namespace build2 lexer* ol (lexer_); lexer_ = &l; - token t (type::eos, false, 0, 0); + token t; type tt; next (t, tt); clause (t, tt); @@ -938,7 +938,7 @@ namespace build2 target* odt (default_target_); default_target_ = nullptr; - token t (type::eos, false, 0, 0); + token t; type tt; next (t, tt); clause (t, tt); @@ -2040,7 +2040,8 @@ namespace build2 tt != type::lparen) || peeked ().separated)) { tt = type::name; - t = token (move (concat), true, false, t.line, t.column); + t = token ( + move (concat), true, false, t.line, t.column, token_printer); concat.clear (); } else if (!first) @@ -2744,7 +2745,7 @@ namespace build2 // mode (lexer_mode::value); - token t (type::eos, false, 0, 0); + token t; type tt; next (t, tt); diff --git a/build2/token b/build2/token index 04a7ebd..065429c 100644 --- a/build2/token +++ b/build2/token @@ -18,6 +18,8 @@ namespace build2 { enum { + // NOTE: remember to update token_printer()! + eos, name, newline, @@ -50,9 +52,16 @@ namespace build2 value_type v_; }; + class token; + + void + token_printer (ostream&, const token&, bool); + class token { public: + using printer_type = void (ostream&, const token&, bool diag); + token_type type; bool separated; // Whitespace-separated from the previous token. bool quoted; // Name (or some part of it) was quoted. @@ -62,23 +71,27 @@ namespace build2 uint64_t line; uint64_t column; + printer_type* printer; + public: - token (token_type t, bool s, uint64_t l, uint64_t c) - : type (t), separated (s), quoted (false), line (l), column (c) {} - - token (string n, bool s, bool q, uint64_t l, uint64_t c) - : type (token_type::name), - separated (s), - quoted (q), - value (move (n)), - line (l), - column (c) {} + token () + : token (token_type::eos, false, 0, 0, token_printer) {} + + token (token_type t, bool s, uint64_t l, uint64_t c, printer_type* p) + : type (t), separated (s), quoted (false), + line (l), column (c), + printer (p) {} + + token (string n, bool s, bool q, uint64_t l, uint64_t c, printer_type* p) + : type (token_type::name), separated (s), quoted (q), value (move (n)), + line (l), column (c), + printer (p) {} }; // Output the token value in a format suitable for diagnostics. // - ostream& - operator<< (ostream&, const token&); + inline ostream& + operator<< (ostream& o, const token& t) {t.printer (o, t, true); return o;} // Diagnostics plumbing. We assume that any diag stream for which we can use // token as location has its aux data pointing to pointer to path. diff --git a/build2/token.cxx b/build2/token.cxx index 5a47eb7..bf2249f 100644 --- a/build2/token.cxx +++ b/build2/token.cxx @@ -8,34 +8,39 @@ using namespace std; namespace build2 { - ostream& - operator<< (ostream& os, const token& t) + void + token_printer (ostream& os, const token& t, bool d) { + // Only quote non-name tokens for diagnostics. + // + const char* q (d ? "'" : ""); + switch (t.type) { - case token_type::eos: os << ""; break; + case token_type::eos: os << ""; break; case token_type::newline: os << ""; break; case token_type::pair_separator: os << ""; break; - case token_type::colon: os << "':'"; break; - case token_type::lcbrace: os << "'{'"; break; - case token_type::rcbrace: os << "'}'"; break; - case token_type::lsbrace: os << "'['"; break; - case token_type::rsbrace: os << "']'"; break; - case token_type::assign: os << "'='"; break; - case token_type::prepend: os << "'=+'"; break; - case token_type::append: os << "'+='"; break; - case token_type::equal: os << "'=='"; break; - case token_type::not_equal: os << "'!='"; break; - case token_type::less: os << "'<'"; break; - case token_type::greater: os << "'>'"; break; - case token_type::less_equal: os << "'<='"; break; - case token_type::greater_equal: os << "'>='"; break; - case token_type::dollar: os << "'$'"; break; - case token_type::lparen: os << "'('"; break; - case token_type::rparen: os << "')'"; break; case token_type::name: os << '\'' << t.value << '\''; break; - } - return os; + case token_type::colon: os << q << ':' << q; break; + case token_type::lcbrace: os << q << '{' << q; break; + case token_type::rcbrace: os << q << '}' << q; break; + case token_type::lsbrace: os << q << '[' << q; break; + case token_type::rsbrace: os << q << ']' << q; break; + case token_type::assign: os << q << '=' << q; break; + case token_type::prepend: os << q << "=+" << q; break; + case token_type::append: os << q << "+=" << q; break; + case token_type::equal: os << q << "==" << q; break; + case token_type::not_equal: os << q << "!=" << q; break; + case token_type::less: os << q << '<' << q; break; + case token_type::greater: os << q << '>' << q; break; + case token_type::less_equal: os << q << "<=" << q; break; + case token_type::greater_equal: os << q << ">=" << q; break; + case token_type::dollar: os << q << '$' << q; break; + case token_type::lparen: os << q << '(' << q; break; + case token_type::rparen: os << q << ')' << q; break; + + default: assert (false); // Unhandled extended token. + } } } -- cgit v1.1