aboutsummaryrefslogtreecommitdiff
path: root/build2/cc/lexer.cxx
diff options
context:
space:
mode:
Diffstat (limited to 'build2/cc/lexer.cxx')
-rw-r--r--build2/cc/lexer.cxx1129
1 files changed, 0 insertions, 1129 deletions
diff --git a/build2/cc/lexer.cxx b/build2/cc/lexer.cxx
deleted file mode 100644
index 7795192..0000000
--- a/build2/cc/lexer.cxx
+++ /dev/null
@@ -1,1129 +0,0 @@
-// file : build2/cc/lexer.cxx -*- C++ -*-
-// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd
-// license : MIT; see accompanying LICENSE file
-
-#include <build2/cc/lexer.hxx>
-
-using namespace std;
-using namespace butl;
-
-// bit 0 - identifier character (_0-9A-Ba-b).
-//
-static const uint8_t char_flags[256] =
-//0 1 2 3 4 5 6 7 8 9 A B C D E F
-{
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 2
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, // 3
- 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 4
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, // 5
- 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 6
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, // 7
-
- // 128-255
- 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0
-};
-
-// Diagnostics plumbing.
-//
-namespace butl // ADL
-{
- inline build2::location
- get_location (const butl::char_scanner::xchar& c, const void* data)
- {
- using namespace build2;
-
- assert (data != nullptr); // E.g., must be &lexer::name_.
- return location (static_cast<const path*> (data), c.line, c.column);
- }
-}
-
-namespace build2
-{
- namespace cc
- {
- auto lexer::
- peek (bool e) -> xchar
- {
- if (unget_)
- return ungetc_;
-
- if (unpeek_)
- return unpeekc_;
-
- xchar c (base::peek ());
-
- if (e && c == '\\')
- {
- get (c);
- xchar p (base::peek ());
-
- // Handle Windows CRLF sequence. Similar to char_scanner, we treat a
- // single CR as if it was followed by LF and also collapse multiple
- // CRs.
- //
- while (p == '\r')
- {
- get (p);
- p = base::peek ();
-
- if (p == '\n')
- break;
-
- // Pretend '\n' was there and recurse.
- //
- if (p != '\r')
- return peek (e);
- }
-
- if (p == '\n')
- {
- get (p);
- return peek (e); // Recurse.
- }
-
- // Save in the unpeek buffer so that it is returned on the subsequent
- // calls to peek() (until get()).
- //
- unpeek_ = true;
- unpeekc_ = c;
- }
-
- return c;
- }
-
- inline auto lexer::
- get (bool e) -> xchar
- {
- if (unget_)
- {
- unget_ = false;
- return ungetc_;
- }
- else
- {
- xchar c (peek (e));
- get (c);
- return c;
- }
- }
-
- inline void lexer::
- get (const xchar& c)
- {
- // Increment the logical line similar to how base will increment the
- // physical (the column counts are the same).
- //
- if (log_line_ && c == '\n' && !unget_)
- ++*log_line_;
-
- base::get (c);
- }
-
- inline auto lexer::
- geth (bool e) -> xchar
- {
- xchar c (get (e));
- cs_.append (c);
- return c;
- }
-
- inline void lexer::
- geth (const xchar& c)
- {
- get (c);
- cs_.append (c);
- }
-
- using type = token_type;
-
- void lexer::
- next (token& t, xchar c, bool ignore_pp)
- {
- for (;; c = skip_spaces ())
- {
- t.file = log_file_;
- t.line = log_line_ ? *log_line_ : c.line;
- t.column = c.column;
-
- if (eos (c))
- {
- t.type = type::eos;
- return;
- }
-
- const location l (&name_, c.line, c.column);
-
- // Hash the token's line. The reason is debug info. In fact, doing
- // this will make quite a few "noop" changes (like adding a newline
- // anywhere in the source) cause the checksum change. But there
- // doesn't seem to be any way around it: the case where we benefit
- // from the precise change detection the most (development) is also
- // where we will most likely have debug info enable.
- //
- // Note that in order not to make this completely useless we don't
- // hash the column. Even if it is part of the debug info, having it a
- // bit off shouldn't cause any significant mis-positioning. We also
- // don't hash the file path for each token instead only hashing it
- // when changed with the #line directive (as well as in the
- // constructor for the initial path).
- //
- cs_.append (t.line);
- cs_.append (c);
-
- switch (c)
- {
- // Preprocessor lines.
- //
- case '#':
- {
- // It is tempting to simply scan until the newline ignoring
- // anything in between. However, these lines can start a
- // multi-line C-style comment. So we have to tokenize them (and
- // hash the data for each token).
- //
- // Note that this may not work for things like #error that can
- // contain pretty much anything. Also note that lines that start
- // with '#' can contain '#' further down. In this case we need to
- // be careful not to recurse (and consume multiple newlines). Thus
- // the ignore_pp flag.
- //
- // Finally, to support diagnostics properly we need to recognize
- // #line directives.
- //
- if (ignore_pp)
- {
- for (bool first (true);;)
- {
- // Note that we keep using the passed token for buffers.
- //
- c = skip_spaces (false); // Stop at newline.
-
- if (eos (c) || c == '\n')
- break;
-
- if (first)
- {
- first = false;
-
- // Recognize #line and its shorthand version:
- //
- // #line <integer> [<string literal>] ...
- // # <integer> [<string literal>] ...
- //
- // Also diagnose #include while at it.
- //
- if (!(c >= '0' && c <= '9'))
- {
- next (t, c, false);
-
- if (t.type == type::identifier)
- {
- if (t.value == "include")
- fail (l) << "unexpected #include directive";
- else if (t.value != "line")
- continue;
- }
- else
- continue;
-
- if (t.type != type::identifier || t.value != "line")
- continue;
-
- c = skip_spaces (false);
-
- if (!(c >= '0' && c <= '9'))
- fail (c) << "line number expected after #line directive";
- }
-
- // Ok, this is #line and next comes the line number.
- //
- line_directive (t, c);
- continue; // Parse the tail, if any.
- }
-
- next (t, c, false);
- }
- break;
- }
- else
- {
- t.type = type::punctuation;
- return;
- }
- }
- // Single-letter punctuation.
- //
- case ';': t.type = type::semi; return;
- case '{': t.type = type::lcbrace; return;
- case '}': t.type = type::rcbrace; return;
- // Other single-letter punctuation.
- //
- case '(':
- case ')':
- case '[':
- case ']':
- case ',':
- case '?':
- case '~':
- case '\\': t.type = type::punctuation; return;
- // Potentially multi-letter punctuation.
- //
- case '.': // . .* .<N> ...
- {
- xchar p (peek ());
-
- if (p == '*')
- {
- geth (p);
- t.type = type::punctuation;
- return;
- }
- else if (p >= '0' && p <= '9')
- {
- number_literal (t, c);
- return;
- }
- else if (p == '.')
- {
- get (p);
-
- xchar q (peek ());
- if (q == '.')
- {
- cs_.append (p);
-
- geth (q);
- t.type = type::punctuation;
- return;
- }
- unget (p);
- // Fall through.
- }
-
- t.type = type::dot;
- return;
- }
- case '=': // = ==
- case '!': // ! !=
- case '*': // * *=
- case '/': // / /= (/* and // handled by skip_spaced() above)
- case '%': // % %=
- case '^': // ^ ^=
- {
- xchar p (peek ());
-
- if (p == '=')
- geth (p);
-
- t.type = type::punctuation;
- return;
- }
- case '<': // < <= << <<=
- case '>': // > >= >> >>=
- {
- xchar p (peek ());
-
- if (p == c)
- {
- geth (p);
- if ((p = peek ()) == '=')
- geth (p);
- t.type = type::punctuation;
- }
- else if (p == '=')
- {
- geth (p);
- t.type = type::punctuation;
- }
- else
- t.type = (c == '<' ? type::less : type::greater);
-
- return;
- }
- case '+': // + ++ +=
- case '-': // - -- -= -> ->*
- {
- xchar p (peek ());
-
- if (p == c || p == '=')
- geth (p);
- else if (c == '-' && p == '>')
- {
- geth (p);
- if ((p = peek ()) == '*')
- geth (p);
- }
-
- t.type = type::punctuation;
- return;
- }
- case '&': // & && &=
- case '|': // | || |=
- {
- xchar p (peek ());
-
- if (p == c || p == '=')
- geth (p);
-
- t.type = type::punctuation;
- return;
- }
- case ':': // : ::
- {
- xchar p (peek ());
-
- if (p == ':')
- geth (p);
-
- t.type = type::punctuation;
- return;
- }
- // Number (and also .<N> above).
- //
- case '0':
- case '1':
- case '2':
- case '3':
- case '4':
- case '5':
- case '6':
- case '7':
- case '8':
- case '9':
- {
- number_literal (t, c);
- return;
- }
- // Char/string literal, identifier, or other (\, $, @, `).
- //
- default:
- {
- bool raw (false); // Raw string literal.
-
- // Note: known not to be a digit (see above).
- //
- if (char_flags[static_cast<uint8_t> (c)] & 0x01)
- {
- // This smells a little: we know skip_spaces() did not peek at
- // the next character because this is not '/'. Which means the
- // position in the stream must be of this character + 1.
- //
- t.position = buf_->tellg () - 1;
-
- string& id (t.value);
- id = c;
-
- while (char_flags[static_cast<uint8_t> (c = peek ())] & 0x01)
- {
- geth (c);
- id += c;
-
- // Direct buffer scan. Note that we always follow up with the
- // normal peek() call which may load the next chunk, handle
- // line continuations, etc. In other words, the end of the
- // "raw" scan doesn't necessarily mean the end.
- //
- const char* b (gptr_);
- const char* p (b);
-
- for (const char* e (egptr_);
- p != e && char_flags[static_cast<uint8_t> (*p)] & 0x01;
- ++p) ;
-
- // Unrolling this loop doesn't make a difference.
- //
- // for (const char* e (egptr_ - 4); p < e; p += 4)
- // {
- // uint8_t c;
- //
- // c = static_cast<uint8_t> (p[0]);
- // if (!(char_flags[c] & 0x01)) break;
- //
- // c = static_cast<uint8_t> (p[1]);
- // if (!(char_flags[c] & 0x01)) {p += 1; break;}
- //
- // c = static_cast<uint8_t> (p[2]);
- // if (!(char_flags[c] & 0x01)) {p += 2; break;}
- //
- // c = static_cast<uint8_t> (p[3]);
- // if (!(char_flags[c] & 0x01)) {p += 3; break;}
- // }
-
- size_t n (p - b);
- id.append (b, n); cs_.append (b, n);
- gptr_ = p; buf_->gbump (static_cast<int> (n)); column += n;
- }
-
- // If the following character is a quote, see if the identifier
- // is one of the literal prefixes.
- //
- if (c == '\'' || c == '\"')
- {
- size_t n (id.size ()), i (0);
- switch (id[0])
- {
- case 'u':
- {
- if (n > 1 && id[1] == '8')
- ++i;
- }
- // Fall through.
- case 'L':
- case 'U':
- {
- ++i;
-
- if (c == '\"' && n > i && id[i] == 'R')
- {
- ++i;
- raw = true;
- }
- break;
- }
- case 'R':
- {
- if (c == '\"')
- {
- ++i;
- raw = true;
- }
- break;
- }
- }
-
- if (i == n) // All characters "consumed".
- {
- geth (c);
- id.clear ();
- }
- }
-
- if (!id.empty ())
- {
- t.type = type::identifier;
- return;
- }
- }
-
- switch (c)
- {
- case '\'':
- {
- char_literal (t, c);
- return;
- }
- case '\"':
- {
- if (raw)
- raw_string_literal (t, c);
- else
- string_literal (t, c);
- return;
- }
- default:
- {
- t.type = type::other;
- return;
- }
- }
- }
- }
- }
- }
-
- void lexer::
- number_literal (token& t, xchar c)
- {
- // note: c is hashed
-
- // A number (integer or floating point literal) can:
- //
- // 1. Start with a dot (which must be followed by a digit, e.g., .123).
- //
- // 2. Can have a radix prefix (0b101, 0123, 0X12AB).
- //
- // 3. Can have an exponent (1e10, 0x1.p-10, 1.).
- //
- // 4. Digits can be separated with ' (123'456, 0xff00'00ff).
- //
- // 5. End with a built-in or user defined literal (123f, 123UL, 123_X)
- //
- // Quoting from GCC's preprocessor documentation:
- //
- // "Formally preprocessing numbers begin with an optional period, a
- // required decimal digit, and then continue with any sequence of
- // letters, digits, underscores, periods, and exponents. Exponents are
- // the two-character sequences 'e+', 'e-', 'E+', 'E-', 'p+', 'p-', 'P+',
- // and 'P-'."
- //
- // So it looks like a "C++ number" is then any unseparated (with
- // whitespace or punctuation) sequence of those plus '. The only mildly
- // tricky part is then to recognize +/- as being part of the exponent.
- //
- while (!eos ((c = peek ())))
- {
- switch (c)
- {
- // All the whitespace, punctuation, and other characters that end
- // the number.
- //
- case ' ':
- case '\n':
- case '\t':
- case '\r':
- case '\f':
- case '\v':
-
- case '#':
- case ';':
- case '{':
- case '}':
- case '(':
- case ')':
- case '[':
- case ']':
- case ',':
- case '?':
- case '~':
- case '=':
- case '!':
- case '*':
- case '/':
- case '%':
- case '^':
- case '>':
- case '<':
- case '&':
- case '|':
- case ':':
- case '+': // The exponent case is handled below.
- case '-': // The exponent case is handled below.
- case '"':
- case '\\':
-
- case '@':
- case '$':
- case '`':
- break;
-
- // Recognize +/- after the exponent.
- //
- case 'e':
- case 'E':
- case 'p':
- case 'P':
- {
- geth (c);
- c = peek ();
- if (c == '+' || c == '-')
- geth (c);
- continue;
- }
-
- case '_':
- case '.':
- case '\'':
- default: // Digits and letters.
- {
- geth (c);
- continue;
- }
- }
-
- break;
- }
-
- t.type = type::number;
- }
-
- void lexer::
- char_literal (token& t, xchar c)
- {
- // note: c is hashed
-
- const location l (&name_, c.line, c.column);
-
- for (char p (c);;) // Previous character (see below).
- {
- c = geth ();
-
- if (eos (c) || c == '\n')
- fail (l) << "unterminated character literal";
-
- if (c == '\'' && p != '\\')
- break;
-
- // Keep track of \\-escapings so we don't confuse them with \', as in
- // '\\'.
- //
- p = (c == '\\' && p == '\\') ? '\0' : static_cast<char> (c);
- }
-
- // See if we have a user-defined suffix (which is an identifier).
- //
- if ((c = peek ()) == '_' || alpha (c))
- literal_suffix (c);
-
- t.type = type::character;
- }
-
- void lexer::
- string_literal (token& t, xchar c)
- {
- // note: c is hashed
-
- const location l (&name_, c.line, c.column);
-
- for (char p (c);;) // Previous character (see below).
- {
- c = geth ();
-
- if (eos (c) || c == '\n')
- fail (l) << "unterminated string literal";
-
- if (c == '\"' && p != '\\')
- break;
-
- // Keep track of \\-escapings so we don't confuse them with \", as in
- // "\\".
- //
- p = (c == '\\' && p == '\\') ? '\0' : static_cast<char> (c);
-
- // Direct buffer scan.
- //
- if (p != '\\')
- {
- const char* b (gptr_);
- const char* e (egptr_);
- const char* p (b);
-
- for (char c;
- p != e && (c = *p) != '\"' && c != '\\' && c != '\n';
- ++p) ;
-
- size_t n (p - b);
- cs_.append (b, n);
- gptr_ = p; buf_->gbump (static_cast<int> (n)); column += n;
- }
- }
-
- // See if we have a user-defined suffix (which is an identifier).
- //
- if ((c = peek ()) == '_' || alpha (c))
- literal_suffix (c);
-
- t.type = type::string;
- }
-
- void lexer::
- raw_string_literal (token& t, xchar c)
- {
- // note: c is hashed
-
- // The overall form is:
- //
- // R"<delimiter>(<raw_characters>)<delimiter>"
- //
- // Where <delimiter> is a potentially-empty character sequence made of
- // any source character but parentheses, backslash and spaces. It can be
- // at most 16 characters long.
- //
- // Note that the <raw_characters> are not processed in any way, not even
- // for line continuations.
- //
- const location l (&name_, c.line, c.column);
-
- // As a first step, parse the delimiter (including the openning paren).
- //
- string d (1, ')');
-
- for (;;)
- {
- c = geth ();
-
- if (eos (c) || c == '\"' || c == ')' || c == '\\' || c == ' ')
- fail (l) << "invalid raw string literal";
-
- if (c == '(')
- break;
-
- d += c;
- }
-
- d += '"';
-
- // Now parse the raw characters while trying to match the closing
- // delimiter.
- //
- for (size_t i (0);;) // Position to match in d.
- {
- c = geth (false); // No newline escaping.
-
- if (eos (c)) // Note: newline is ok.
- fail (l) << "invalid raw string literal";
-
- if (c != d[i] && i != 0) // Restart from the beginning.
- i = 0;
-
- if (c == d[i])
- {
- if (++i == d.size ())
- break;
- }
- }
-
- // See if we have a user-defined suffix (which is an identifier).
- //
- if ((c = peek ()) == '_' || alpha (c))
- literal_suffix (c);
-
- t.type = type::string;
- }
-
- void lexer::
- literal_suffix (xchar c)
- {
- // note: c is unhashed
-
- // Parse a user-defined literal suffix identifier.
- //
- for (geth (c); (c = peek ()) == '_' || alnum (c); geth (c)) ;
- }
-
- void lexer::
- line_directive (token& t, xchar c)
- {
- // enter: first digit of the line number
- // leave: last character of the line number or file string
- // note: c is unhashed
-
- // If our number and string tokens contained the literal values, then we
- // could have used that. However, we ignore the value (along with escape
- // processing, etc), for performance. Let's keep it that way and instead
- // handle it ourselves.
- //
- // Note also that we are not hashing these at the character level
- // instead hashing the switch to a new file path below and leaving the
- // line number to the token line hashing.
- //
- {
- string& s (t.value);
-
- for (s = c; (c = peek ()) >= '0' && c <= '9'; get (c))
- s += c;
-
- // The newline that ends the directive will increment the logical line
- // so subtract one to compensate. Note: can't be 0 and shouldn't throw
- // for valid lines.
- //
- log_line_ = stoull (s.c_str ()) - 1;
- }
-
- // See if we have the file.
- //
- c = skip_spaces (false);
-
- if (c == '\"')
- {
- const location l (&name_, c.line, c.column);
-
- // It is common to have a large number of #line directives that don't
- // change the file (they seem to be used to track macro locations or
- // some such). So we are going to optimize for this by comparing the
- // current path to what's in #line.
- //
- string& s (tmp_file_);
- s.clear ();
-
- for (char p ('\0'); p != '\"'; ) // Previous character.
- {
- c = get ();
-
- if (eos (c) || c == '\n')
- fail (l) << "unterminated string literal";
-
- // Handle escapes.
- //
- if (p == '\\')
- {
- p = '\0'; // Clear so we don't confuse \" and \\".
-
- // We only handle what can reasonably be expected in a file name.
- //
- switch (c)
- {
- case '\\':
- case '\'':
- case '\"': break; // Add as is.
- default:
- fail (c) << "unsupported escape sequence in #line directive";
- }
- }
- else
- {
- p = c;
-
- switch (c)
- {
- case '\\':
- case '\"': continue;
- }
- }
-
- s += c;
-
- // Direct buffer scan.
- //
- if (p != '\\')
- {
- const char* b (gptr_);
- const char* e (egptr_);
- const char* p (b);
-
- for (char c;
- p != e && (c = *p) != '\"' && c != '\\' && c != '\n';
- ++p) ;
-
- size_t n (p - b);
- s.append (b, n);
- gptr_ = p; buf_->gbump (static_cast<int> (n)); column += n;
- }
- }
-
- if (log_file_.string () == s)
- return;
-
- // Swap the two string buffers.
- //
- {
- string r (move (log_file_).string ()); // Move string rep out.
- r.swap (s);
- log_file_ = path (move (r)); // Move back in.
- }
-
- // If the path is relative, then prefix it with the current working
- // directory. Failed that, we will end up with different checksums for
- // invocations from different directories.
- //
- // While this should work fine for normal cross-compilation, it's an
- // entirely different story for the emulated case (e.g., msvc-linux
- // where the preprocessed output contains absolute Windows paths). So
- // we try to sense if things look fishy and leave the path alone.
- //
- // Also detect special names like <built-in> and <command-line>. Plus
- // GCC sometimes adds what looks like working directory (has trailing
- // slash). So ignore that as well.
- //
- // We now switched to using absolute translation unit paths (because
- // of __FILE__/assert(); see compile.cxx for details). But we might
- // still need this logic when we try to calculate location-independent
- // hash for distributed compilation/caching. The idea is to only hash
- // the part starting from the project root which is immutable. Plus
- // we will need -ffile-prefix-map to deal with __FILE__.
- //
- if (!log_file_.to_directory ())
- cs_.append (log_file_.string ());
-#if 0
- {
- using tr = path::traits;
- const string& f (log_file_.string ());
-
- if (f.find (':') != string::npos ||
- (f.front () == '<' && f.back () == '>') ||
- log_file_.absolute ())
- cs_.append (f);
- else
- {
- // This gets complicated and slow: the path may contain '..' and
- // '.' so strictly speaking we would need to normalize it.
- // Instead, we are going to handle leading '..'s ourselves (the
- // sane case) and ignore everything else (so if you have '..' or
- // '.' somewhere in the middle, then things might not work
- // optimally for you).
- //
- const string& d (work.string ());
-
- // Iterate over leading '..' in f "popping" the corresponding
- // number of trailing components from d.
- //
- size_t fp (0);
- size_t dp (d.size () - 1);
-
- for (size_t p;; )
- {
- // Note that in file we recognize any directory separator, not
- // just of this platform (see note about emulation above).
- //
- if (f.compare (fp, 2, "..") != 0 ||
- (f[fp + 2] != '/' && f[fp + 2] != '\\') || // Could be '\0'.
- (p = tr::rfind_separator (d, dp)) == string::npos)
- break;
-
- fp += 3;
- dp = p - 1;
- }
-
- cs_.append (d.c_str (), dp + 1);
- cs_.append (tr::directory_separator); // Canonical in work.
- cs_.append (f.c_str () + fp);
- }
- }
-#endif
- }
- else
- unget (c);
- }
-
- auto lexer::
- skip_spaces (bool nl) -> xchar
- {
- xchar c (get ());
-
- for (; !eos (c); c = get ())
- {
- switch (c)
- {
- case '\n':
- if (!nl) break;
- // Fall through.
- case ' ':
- case '\t':
- case '\r':
- case '\f':
- case '\v':
- {
- // Direct buffer scan.
- //
- const char* b (gptr_);
- const char* e (egptr_);
- const char* p (b);
-
- for (char c;
- p != e && ((c = *p) == ' ' || c == '\t');
- ++p) ;
-
- size_t n (p - b);
- gptr_ = p; buf_->gbump (static_cast<int> (n)); column += n;
-
- continue;
- }
- case '/':
- {
- xchar p (peek ());
-
- // C++ comment.
- //
- if (p == '/')
- {
- get (p);
-
- for (;;)
- {
- c = get ();
- if (c == '\n' || eos (c))
- break;
-
- // Direct buffer scan.
- //
- const char* b (gptr_);
- const char* e (egptr_);
- const char* p (b);
-
- for (char c;
- p != e && (c = *p) != '\n' && c != '\\';
- ++p) ;
-
- size_t n (p - b);
- gptr_ = p; buf_->gbump (static_cast<int> (n)); column += n;
- }
-
- if (!nl)
- break;
-
- continue;
- }
-
- // C comment.
- //
- if (p == '*')
- {
- get (p);
-
- for (;;)
- {
- c = get ();
-
- if (eos (c))
- fail (p) << "unterminated comment";
-
- if (c == '*' && (c = peek ()) == '/')
- {
- get (c);
- break;
- }
-
- // Direct buffer scan.
- //
- const char* b (gptr_);
- const char* e (egptr_);
- const char* p (b);
-
- for (char c;
- p != e && (c = *p) != '*' && c != '\\';
- ++p)
- {
- if (c == '\n')
- {
- if (log_line_) ++*log_line_;
- ++line;
- column = 1;
- }
- else
- ++column;
- }
-
- gptr_ = p; buf_->gbump (static_cast<int> (p - b));
- }
- continue;
- }
- break;
- }
- }
- break;
- }
-
- return c;
- }
-
- ostream&
- operator<< (ostream& o, const token& t)
- {
- switch (t.type)
- {
- case type::dot: o << "'.'"; break;
- case type::semi: o << "';'"; break;
- case type::less: o << "'<'"; break;
- case type::greater: o << "'>'"; break;
- case type::lcbrace: o << "'{'"; break;
- case type::rcbrace: o << "'}'"; break;
- case type::punctuation: o << "<punctuation>"; break;
-
- case type::identifier: o << '\'' << t.value << '\''; break;
-
- case type::number: o << "<number literal>"; break;
- case type::character: o << "<char literal>"; break;
- case type::string: o << "<string literal>"; break;
-
- case type::other: o << "<other>"; break;
- case type::eos: o << "<end of file>"; break;
- }
-
- return o;
- }
- }
-}