diff options
Diffstat (limited to 'libbuild2/cc/lexer.cxx')
-rw-r--r-- | libbuild2/cc/lexer.cxx | 76 |
1 files changed, 49 insertions, 27 deletions
diff --git a/libbuild2/cc/lexer.cxx b/libbuild2/cc/lexer.cxx index d2be3d8..d20e0dc 100644 --- a/libbuild2/cc/lexer.cxx +++ b/libbuild2/cc/lexer.cxx @@ -138,10 +138,13 @@ namespace build2 using type = token_type; void lexer:: - next (token& t, xchar c, bool ignore_pp) + next (token& t, pair<xchar, bool> cf, bool ignore_pp) { - for (;; c = skip_spaces ()) + for (;; cf = skip_spaces ()) { + xchar c (cf.first); + + t.first = cf.second; t.file = &log_file_; t.line = log_line_ ? *log_line_ : c.line; t.column = c.column; @@ -197,7 +200,7 @@ namespace build2 { // Note that we keep using the passed token for buffers. // - c = skip_spaces (false); // Stop at newline. + c = skip_spaces (false).first; // Stop at newline. if (eos (c) || c == '\n') break; @@ -211,18 +214,21 @@ namespace build2 // #line <integer> [<string literal>] ... // # <integer> [<string literal>] ... // - // Also diagnose #include while at it. + // Also diagnose #include while at it if preprocessed. // if (!(c >= '0' && c <= '9')) { - next (t, c, false); + next (t, make_pair (c, false), false); if (t.type == type::identifier) { - if (t.value == "include") - fail (l) << "unexpected #include directive"; - else if (t.value != "line") + if (t.value != "line") + { + if (preprocessed_ && t.value == "include") + fail (l) << "unexpected #include directive"; + continue; + } } else continue; @@ -230,7 +236,7 @@ namespace build2 if (t.type != type::identifier || t.value != "line") continue; - c = skip_spaces (false); + c = skip_spaces (false).first; if (!(c >= '0' && c <= '9')) fail (c) << "line number expected after #line directive"; @@ -242,7 +248,7 @@ namespace build2 continue; // Parse the tail, if any. } - next (t, c, false); + next (t, make_pair (c, false), false); } break; } @@ -374,9 +380,13 @@ namespace build2 xchar p (peek ()); if (p == ':') + { geth (p); + t.type = type::scope; + } + else + t.type = type::colon; - t.type = type::punctuation; return; } // Number (and also .<N> above). @@ -727,8 +737,8 @@ namespace build2 // R"<delimiter>(<raw_characters>)<delimiter>" // // Where <delimiter> is a potentially-empty character sequence made of - // any source character but parentheses, backslash and spaces. It can be - // at most 16 characters long. + // any source character but parentheses, backslash, and spaces (in + // particular, it can be `"`). It can be at most 16 characters long. // // Note that the <raw_characters> are not processed in any way, not even // for line continuations. @@ -743,7 +753,7 @@ namespace build2 { c = geth (); - if (eos (c) || c == '\"' || c == ')' || c == '\\' || c == ' ') + if (eos (c) || c == ')' || c == '\\' || c == ' ') fail (l) << "invalid raw string literal"; if (c == '(') @@ -823,7 +833,7 @@ namespace build2 // See if we have the file. // - c = skip_spaces (false); + c = skip_spaces (false).first; if (c == '\"') { @@ -1007,16 +1017,24 @@ namespace build2 } auto lexer:: - skip_spaces (bool nl) -> xchar + skip_spaces (bool nl) -> pair<xchar, bool> { xchar c (get ()); + // Besides the first character, we also need to take into account any + // newlines that we are skipping. For example, the first character may + // be a space at the end of the line which we will skip along with the + // following newline. + // + bool first (c.column == 1); + for (; !eos (c); c = get ()) { switch (c) { case '\n': if (!nl) break; + first = true; // Fall through. case ' ': case '\t': @@ -1072,11 +1090,16 @@ namespace build2 if (!nl) break; + first = true; continue; } // C comment. // + // Note that for the first logic we consider a C comment to be + // entirely part of the same logical line even if there are + // newlines inside. + // if (p == '*') { get (p); @@ -1088,21 +1111,18 @@ namespace build2 if (eos (c)) fail (p) << "unterminated comment"; - if (c == '*' && (c = peek ()) == '/') + if (c == '*') { - get (c); - break; + if ((c = peek ()) == '/') + { + get (c); + break; + } } - - if (c != '*' && c != '\\') + else { // Direct buffer scan. // - // Note that we should call get() prior to the direct buffer - // scan (see butl::char_scanner for details). - // - get (c); - const char* b (gptr_); const char* e (egptr_); const char* p (b); @@ -1132,7 +1152,7 @@ namespace build2 break; } - return c; + return make_pair (c, first); } ostream& @@ -1142,6 +1162,8 @@ namespace build2 { case type::dot: o << "'.'"; break; case type::semi: o << "';'"; break; + case type::colon: o << "':'"; break; + case type::scope: o << "'::'"; break; case type::less: o << "'<'"; break; case type::greater: o << "'>'"; break; case type::lcbrace: o << "'{'"; break; |