1 files changed, 0 insertions, 1129 deletions
diff --git a/build2/cc/lexer.cxx b/build2/cc/lexer.cxx
deleted file mode 100644
index 7795192..0000000
--- a/build2/cc/lexer.cxx
+++ /dev/null
@@ -1,1129 +0,0 @@
-// file      : build2/cc/lexer.cxx -*- C++ -*-
-// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd
-// license   : MIT; see accompanying LICENSE file
-
-#include <build2/cc/lexer.hxx>
-
-using namespace std;
-using namespace butl;
-
-// bit 0 - identifier character (_0-9A-Ba-b).
-//
-static const uint8_t char_flags[256] =
-//0    1    2    3    4    5    6    7      8    9    A    B    C    D    E    F
-{
-  0,   0,   0,   0,   0,   0,   0,   0,     0,   0,   0,   0,   0,   0,   0,   0, // 0
-  0,   0,   0,   0,   0,   0,   0,   0,     0,   0,   0,   0,   0,   0,   0,   0, // 1
-  0,   0,   0,   0,   0,   0,   0,   0,     0,   0,   0,   0,   0,   0,   0,   0, // 2
-  1,   1,   1,   1,   1,   1,   1,   1,     1,   1,   0,   0,   0,   0,   0,   0, // 3
-  0,   1,   1,   1,   1,   1,   1,   1,     1,   1,   1,   1,   1,   1,   1,   1, // 4
-  1,   1,   1,   1,   1,   1,   1,   1,     1,   1,   1,   0,   0,   0,   0,   1, // 5
-  0,   1,   1,   1,   1,   1,   1,   1,     1,   1,   1,   1,   1,   1,   1,   1, // 6
-  1,   1,   1,   1,   1,   1,   1,   1,     1,   1,   1,   0,   0,   0,   0,   0, // 7
-
-  // 128-255
-  0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,
-  0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,
-  0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,
-  0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0
-};
-
-// Diagnostics plumbing.
-//
-namespace butl // ADL
-{
-  inline build2::location
-  get_location (const butl::char_scanner::xchar& c, const void* data)
-  {
-    using namespace build2;
-
-    assert (data != nullptr); // E.g., must be &lexer::name_.
-    return location (static_cast<const path*> (data), c.line, c.column);
-  }
-}
-
-namespace build2
-{
-  namespace cc
-  {
-    auto lexer::
-    peek (bool e) -> xchar
-    {
-      if (unget_)
-        return ungetc_;
-
-      if (unpeek_)
-        return unpeekc_;
-
-      xchar c (base::peek ());
-
-      if (e && c == '\\')
-      {
-        get (c);
-        xchar p (base::peek ());
-
-        // Handle Windows CRLF sequence. Similar to char_scanner, we treat a
-        // single CR as if it was followed by LF and also collapse multiple
-        // CRs.
-        //
-        while (p == '\r')
-        {
-          get (p);
-          p = base::peek ();
-
-          if (p == '\n')
-            break;
-
-          // Pretend '\n' was there and recurse.
-          //
-          if (p != '\r')
-            return peek (e);
-        }
-
-        if (p == '\n')
-        {
-          get (p);
-          return peek (e); // Recurse.
-        }
-
-        // Save in the unpeek buffer so that it is returned on the subsequent
-        // calls to peek() (until get()).
-        //
-        unpeek_ = true;
-        unpeekc_ = c;
-      }
-
-      return c;
-    }
-
-    inline auto lexer::
-    get (bool e) -> xchar
-    {
-      if (unget_)
-      {
-        unget_ = false;
-        return ungetc_;
-      }
-      else
-      {
-        xchar c (peek (e));
-        get (c);
-        return c;
-      }
-    }
-
-    inline void lexer::
-    get (const xchar& c)
-    {
-      // Increment the logical line similar to how base will increment the
-      // physical (the column counts are the same).
-      //
-      if (log_line_ && c == '\n' && !unget_)
-        ++*log_line_;
-
-      base::get (c);
-    }
-
-    inline auto lexer::
-    geth (bool e) -> xchar
-    {
-      xchar c (get (e));
-      cs_.append (c);
-      return c;
-    }
-
-    inline void lexer::
-    geth (const xchar& c)
-    {
-      get (c);
-      cs_.append (c);
-    }
-
-    using type = token_type;
-
-    void lexer::
-    next (token& t, xchar c, bool ignore_pp)
-    {
-      for (;; c = skip_spaces ())
-      {
-        t.file = log_file_;
-        t.line = log_line_ ? *log_line_ : c.line;
-        t.column = c.column;
-
-        if (eos (c))
-        {
-          t.type = type::eos;
-          return;
-        }
-
-        const location l (&name_, c.line, c.column);
-
-        // Hash the token's line. The reason is debug info. In fact, doing
-        // this will make quite a few "noop" changes (like adding a newline
-        // anywhere in the source) cause the checksum change. But there
-        // doesn't seem to be any way around it: the case where we benefit
-        // from the precise change detection the most (development) is also
-        // where we will most likely have debug info enable.
-        //
-        // Note that in order not to make this completely useless we don't
-        // hash the column. Even if it is part of the debug info, having it a
-        // bit off shouldn't cause any significant mis-positioning. We also
-        // don't hash the file path for each token instead only hashing it
-        // when changed with the #line directive (as well as in the
-        // constructor for the initial path).
-        //
-        cs_.append (t.line);
-        cs_.append (c);
-
-        switch (c)
-        {
-          // Preprocessor lines.
-          //
-        case '#':
-          {
-            // It is tempting to simply scan until the newline ignoring
-            // anything in between. However, these lines can start a
-            // multi-line C-style comment. So we have to tokenize them (and
-            // hash the data for each token).
-            //
-            // Note that this may not work for things like #error that can
-            // contain pretty much anything. Also note that lines that start
-            // with '#' can contain '#' further down. In this case we need to
-            // be careful not to recurse (and consume multiple newlines). Thus
-            // the ignore_pp flag.
-            //
-            // Finally, to support diagnostics properly we need to recognize
-            // #line directives.
-            //
-            if (ignore_pp)
-            {
-              for (bool first (true);;)
-              {
-                // Note that we keep using the passed token for buffers.
-                //
-                c = skip_spaces (false); // Stop at newline.
-
-                if (eos (c) || c == '\n')
-                  break;
-
-                if (first)
-                {
-                  first = false;
-
-                  // Recognize #line and its shorthand version:
-                  //
-                  // #line <integer> [<string literal>] ...
-                  // #     <integer> [<string literal>] ...
-                  //
-                  // Also diagnose #include while at it.
-                  //
-                  if (!(c >= '0' && c <= '9'))
-                  {
-                    next (t, c, false);
-
-                    if (t.type == type::identifier)
-                    {
-                      if (t.value == "include")
-                        fail (l) << "unexpected #include directive";
-                      else if (t.value != "line")
-                        continue;
-                    }
-                    else
-                      continue;
-
-                    if (t.type != type::identifier || t.value != "line")
-                      continue;
-
-                    c = skip_spaces (false);
-
-                    if (!(c >= '0' && c <= '9'))
-                      fail (c) << "line number expected after #line directive";
-                  }
-
-                  // Ok, this is #line and next comes the line number.
-                  //
-                  line_directive (t, c);
-                  continue; // Parse the tail, if any.
-                }
-
-                next (t, c, false);
-              }
-              break;
-            }
-            else
-            {
-              t.type = type::punctuation;
-              return;
-            }
-          }
-          // Single-letter punctuation.
-          //
-        case ';': t.type = type::semi;    return;
-        case '{': t.type = type::lcbrace; return;
-        case '}': t.type = type::rcbrace; return;
-          // Other single-letter punctuation.
-          //
-        case '(':
-        case ')':
-        case '[':
-        case ']':
-        case ',':
-        case '?':
-        case '~':
-        case '\\': t.type = type::punctuation; return;
-          // Potentially multi-letter punctuation.
-          //
-        case '.': // . .* .<N> ...
-          {
-            xchar p (peek ());
-
-            if (p == '*')
-            {
-              geth (p);
-              t.type = type::punctuation;
-              return;
-            }
-            else if (p >= '0' && p <= '9')
-            {
-              number_literal (t, c);
-              return;
-            }
-            else if (p == '.')
-            {
-              get (p);
-
-              xchar q (peek ());
-              if (q == '.')
-              {
-                cs_.append (p);
-
-                geth (q);
-                t.type = type::punctuation;
-                return;
-              }
-              unget (p);
-              // Fall through.
-            }
-
-            t.type = type::dot;
-            return;
-          }
-        case '=': // = ==
-        case '!': // ! !=
-        case '*': // * *=
-        case '/': // / /=   (/* and // handled by skip_spaced() above)
-        case '%': // % %=
-        case '^': // ^ ^=
-          {
-            xchar p (peek ());
-
-            if (p == '=')
-              geth (p);
-
-            t.type = type::punctuation;
-            return;
-          }
-        case '<': // < <= << <<=
-        case '>': // > >= >> >>=
-          {
-            xchar p (peek ());
-
-            if (p == c)
-            {
-              geth (p);
-              if ((p = peek ()) == '=')
-                geth (p);
-              t.type = type::punctuation;
-            }
-            else if (p == '=')
-            {
-              geth (p);
-              t.type = type::punctuation;
-            }
-            else
-              t.type = (c == '<' ? type::less : type::greater);
-
-            return;
-          }
-        case '+': // + ++ +=
-        case '-': // - -- -= -> ->*
-          {
-            xchar p (peek ());
-
-            if (p == c || p == '=')
-              geth (p);
-            else if (c == '-' && p == '>')
-            {
-              geth (p);
-              if ((p = peek ()) == '*')
-                geth (p);
-            }
-
-            t.type = type::punctuation;
-            return;
-          }
-        case '&': // & && &=
-        case '|': // | || |=
-          {
-            xchar p (peek ());
-
-            if (p == c || p == '=')
-              geth (p);
-
-            t.type = type::punctuation;
-            return;
-          }
-        case ':': // : ::
-          {
-            xchar p (peek ());
-
-            if (p == ':')
-              geth (p);
-
-            t.type = type::punctuation;
-            return;
-          }
-          // Number (and also .<N> above).
-          //
-        case '0':
-        case '1':
-        case '2':
-        case '3':
-        case '4':
-        case '5':
-        case '6':
-        case '7':
-        case '8':
-        case '9':
-          {
-            number_literal (t, c);
-            return;
-          }
-          // Char/string literal, identifier, or other (\, $, @, `).
-          //
-        default:
-          {
-            bool raw (false); // Raw string literal.
-
-            // Note: known not to be a digit (see above).
-            //
-            if (char_flags[static_cast<uint8_t> (c)] & 0x01)
-            {
-              // This smells a little: we know skip_spaces() did not peek at
-              // the next character because this is not '/'. Which means the
-              // position in the stream must be of this character + 1.
-              //
-              t.position = buf_->tellg () - 1;
-
-              string& id (t.value);
-              id = c;
-
-              while (char_flags[static_cast<uint8_t> (c = peek ())] & 0x01)
-              {
-                geth (c);
-                id += c;
-
-                // Direct buffer scan. Note that we always follow up with the
-                // normal peek() call which may load the next chunk, handle
-                // line continuations, etc. In other words, the end of the
-                // "raw" scan doesn't necessarily mean the end.
-                //
-                const char* b (gptr_);
-                const char* p (b);
-
-                for (const char* e (egptr_);
-                     p != e && char_flags[static_cast<uint8_t> (*p)] & 0x01;
-                     ++p) ;
-
-                // Unrolling this loop doesn't make a difference.
-                //
-                // for (const char* e (egptr_ - 4); p < e; p += 4)
-                // {
-                //   uint8_t c;
-                //
-                //  c = static_cast<uint8_t> (p[0]);
-                //  if (!(char_flags[c] & 0x01)) break;
-                //
-                //  c = static_cast<uint8_t> (p[1]);
-                //  if (!(char_flags[c] & 0x01)) {p += 1; break;}
-                //
-                //  c = static_cast<uint8_t> (p[2]);
-                //  if (!(char_flags[c] & 0x01)) {p += 2; break;}
-                //
-                //  c = static_cast<uint8_t> (p[3]);
-                //  if (!(char_flags[c] & 0x01)) {p += 3; break;}
-                // }
-
-                size_t n (p - b);
-                id.append (b, n); cs_.append (b, n);
-                gptr_ = p; buf_->gbump (static_cast<int> (n)); column += n;
-              }
-
-              // If the following character is a quote, see if the identifier
-              // is one of the literal prefixes.
-              //
-              if (c == '\'' || c == '\"')
-              {
-                size_t n (id.size ()), i (0);
-                switch (id[0])
-                {
-                case 'u':
-                  {
-                    if (n > 1 && id[1] == '8')
-                      ++i;
-                  }
-                  // Fall through.
-                case 'L':
-                case 'U':
-                  {
-                    ++i;
-
-                    if (c == '\"' && n > i && id[i] == 'R')
-                    {
-                      ++i;
-                      raw = true;
-                    }
-                    break;
-                  }
-                case 'R':
-                  {
-                    if (c == '\"')
-                    {
-                      ++i;
-                      raw = true;
-                    }
-                    break;
-                  }
-                }
-
-                if (i == n) // All characters "consumed".
-                {
-                  geth (c);
-                  id.clear ();
-                }
-              }
-
-              if (!id.empty ())
-              {
-                t.type = type::identifier;
-                return;
-              }
-            }
-
-            switch (c)
-            {
-            case '\'':
-              {
-                char_literal (t, c);
-                return;
-              }
-            case '\"':
-              {
-                if (raw)
-                  raw_string_literal (t, c);
-                else
-                  string_literal (t, c);
-                return;
-              }
-            default:
-              {
-                t.type = type::other;
-                return;
-              }
-            }
-          }
-        }
-      }
-    }
-
-    void lexer::
-    number_literal (token& t, xchar c)
-    {
-      // note: c is hashed
-
-      // A number (integer or floating point literal) can:
-      //
-      // 1. Start with a dot (which must be followed by a digit, e.g., .123).
-      //
-      // 2. Can have a radix prefix (0b101, 0123, 0X12AB).
-      //
-      // 3. Can have an exponent (1e10, 0x1.p-10, 1.).
-      //
-      // 4. Digits can be separated with ' (123'456, 0xff00'00ff).
-      //
-      // 5. End with a built-in or user defined literal (123f, 123UL, 123_X)
-      //
-      // Quoting from GCC's preprocessor documentation:
-      //
-      // "Formally preprocessing numbers begin with an optional period, a
-      // required decimal digit, and then continue with any sequence of
-      // letters, digits, underscores, periods, and exponents. Exponents are
-      // the two-character sequences 'e+', 'e-', 'E+', 'E-', 'p+', 'p-', 'P+',
-      // and 'P-'."
-      //
-      // So it looks like a "C++ number" is then any unseparated (with
-      // whitespace or punctuation) sequence of those plus '. The only mildly
-      // tricky part is then to recognize +/- as being part of the exponent.
-      //
-      while (!eos ((c = peek ())))
-      {
-        switch (c)
-        {
-          // All the whitespace, punctuation, and other characters that end
-          // the number.
-          //
-        case ' ':
-        case '\n':
-        case '\t':
-        case '\r':
-        case '\f':
-        case '\v':
-
-        case '#':
-        case ';':
-        case '{':
-        case '}':
-        case '(':
-        case ')':
-        case '[':
-        case ']':
-        case ',':
-        case '?':
-        case '~':
-        case '=':
-        case '!':
-        case '*':
-        case '/':
-        case '%':
-        case '^':
-        case '>':
-        case '<':
-        case '&':
-        case '|':
-        case ':':
-        case '+': // The exponent case is handled below.
-        case '-': // The exponent case is handled below.
-        case '"':
-        case '\\':
-
-        case '@':
-        case '$':
-        case '`':
-          break;
-
-          // Recognize +/- after the exponent.
-          //
-        case 'e':
-        case 'E':
-        case 'p':
-        case 'P':
-          {
-            geth (c);
-            c = peek ();
-            if (c == '+' || c == '-')
-              geth (c);
-            continue;
-          }
-
-        case '_':
-        case '.':
-        case '\'':
-        default: // Digits and letters.
-          {
-            geth (c);
-            continue;
-          }
-        }
-
-        break;
-      }
-
-      t.type = type::number;
-    }
-
-    void lexer::
-    char_literal (token& t, xchar c)
-    {
-      // note: c is hashed
-
-      const location l (&name_, c.line, c.column);
-
-      for (char p (c);;) // Previous character (see below).
-      {
-        c = geth ();
-
-        if (eos (c) || c == '\n')
-          fail (l) << "unterminated character literal";
-
-        if (c == '\'' && p != '\\')
-          break;
-
-        // Keep track of \\-escapings so we don't confuse them with \', as in
-        // '\\'.
-        //
-        p = (c == '\\' && p == '\\') ? '\0' : static_cast<char> (c);
-      }
-
-      // See if we have a user-defined suffix (which is an identifier).
-      //
-      if ((c = peek ()) == '_' || alpha (c))
-        literal_suffix (c);
-
-      t.type = type::character;
-    }
-
-    void lexer::
-    string_literal (token& t, xchar c)
-    {
-      // note: c is hashed
-
-      const location l (&name_, c.line, c.column);
-
-      for (char p (c);;) // Previous character (see below).
-      {
-        c = geth ();
-
-        if (eos (c) || c == '\n')
-          fail (l) << "unterminated string literal";
-
-        if (c == '\"' && p != '\\')
-          break;
-
-        // Keep track of \\-escapings so we don't confuse them with \", as in
-        // "\\".
-        //
-        p = (c == '\\' && p == '\\') ? '\0' : static_cast<char> (c);
-
-        // Direct buffer scan.
-        //
-        if (p != '\\')
-        {
-          const char* b (gptr_);
-          const char* e (egptr_);
-          const char* p (b);
-
-          for (char c;
-               p != e && (c = *p) != '\"' && c != '\\' && c != '\n';
-               ++p) ;
-
-          size_t n (p - b);
-          cs_.append (b, n);
-          gptr_ = p; buf_->gbump (static_cast<int> (n)); column += n;
-        }
-      }
-
-      // See if we have a user-defined suffix (which is an identifier).
-      //
-      if ((c = peek ()) == '_' || alpha (c))
-        literal_suffix (c);
-
-      t.type = type::string;
-    }
-
-    void lexer::
-    raw_string_literal (token& t, xchar c)
-    {
-      // note: c is hashed
-
-      // The overall form is:
-      //
-      // R"<delimiter>(<raw_characters>)<delimiter>"
-      //
-      // Where <delimiter> is a potentially-empty character sequence made of
-      // any source character but parentheses, backslash and spaces. It can be
-      // at most 16 characters long.
-      //
-      // Note that the <raw_characters> are not processed in any way, not even
-      // for line continuations.
-      //
-      const location l (&name_, c.line, c.column);
-
-      // As a first step, parse the delimiter (including the openning paren).
-      //
-      string d (1, ')');
-
-      for (;;)
-      {
-        c = geth ();
-
-        if (eos (c) || c == '\"' || c == ')' || c == '\\' || c == ' ')
-          fail (l) << "invalid raw string literal";
-
-        if (c == '(')
-          break;
-
-        d += c;
-      }
-
-      d += '"';
-
-      // Now parse the raw characters while trying to match the closing
-      // delimiter.
-      //
-      for (size_t i (0);;) // Position to match in d.
-      {
-        c = geth (false); // No newline escaping.
-
-        if (eos (c)) // Note: newline is ok.
-          fail (l) << "invalid raw string literal";
-
-        if (c != d[i] && i != 0) // Restart from the beginning.
-          i = 0;
-
-        if (c == d[i])
-        {
-          if (++i == d.size ())
-            break;
-        }
-      }
-
-      // See if we have a user-defined suffix (which is an identifier).
-      //
-      if ((c = peek ()) == '_' || alpha (c))
-        literal_suffix (c);
-
-      t.type = type::string;
-    }
-
-    void lexer::
-    literal_suffix (xchar c)
-    {
-      // note: c is unhashed
-
-      // Parse a user-defined literal suffix identifier.
-      //
-      for (geth (c); (c = peek ()) == '_' || alnum (c); geth (c)) ;
-    }
-
-    void lexer::
-    line_directive (token& t, xchar c)
-    {
-      // enter: first digit of the line number
-      // leave: last character of the line number or file string
-      // note:  c is unhashed
-
-      // If our number and string tokens contained the literal values, then we
-      // could have used that. However, we ignore the value (along with escape
-      // processing, etc), for performance. Let's keep it that way and instead
-      // handle it ourselves.
-      //
-      // Note also that we are not hashing these at the character level
-      // instead hashing the switch to a new file path below and leaving the
-      // line number to the token line hashing.
-      //
-      {
-        string& s (t.value);
-
-        for (s = c; (c = peek ()) >= '0' && c <= '9'; get (c))
-          s += c;
-
-        // The newline that ends the directive will increment the logical line
-        // so subtract one to compensate. Note: can't be 0 and shouldn't throw
-        // for valid lines.
-        //
-        log_line_ = stoull (s.c_str ()) - 1;
-      }
-
-      // See if we have the file.
-      //
-      c = skip_spaces (false);
-
-      if (c == '\"')
-      {
-        const location l (&name_, c.line, c.column);
-
-        // It is common to have a large number of #line directives that don't
-        // change the file (they seem to be used to track macro locations or
-        // some such). So we are going to optimize for this by comparing the
-        // current path to what's in #line.
-        //
-        string& s (tmp_file_);
-        s.clear ();
-
-        for (char p ('\0'); p != '\"'; ) // Previous character.
-        {
-          c = get ();
-
-          if (eos (c) || c == '\n')
-            fail (l) << "unterminated string literal";
-
-          // Handle escapes.
-          //
-          if (p == '\\')
-          {
-            p = '\0'; // Clear so we don't confuse \" and \\".
-
-            // We only handle what can reasonably be expected in a file name.
-            //
-            switch (c)
-            {
-            case '\\':
-            case '\'':
-            case '\"': break; // Add as is.
-            default:
-              fail (c) << "unsupported escape sequence in #line directive";
-            }
-          }
-          else
-          {
-            p = c;
-
-            switch (c)
-            {
-            case '\\':
-            case '\"': continue;
-            }
-          }
-
-          s += c;
-
-          // Direct buffer scan.
-          //
-          if (p != '\\')
-          {
-            const char* b (gptr_);
-            const char* e (egptr_);
-            const char* p (b);
-
-            for (char c;
-                 p != e && (c = *p) != '\"' && c != '\\' && c != '\n';
-                 ++p) ;
-
-            size_t n (p - b);
-            s.append (b, n);
-            gptr_ = p; buf_->gbump (static_cast<int> (n)); column += n;
-          }
-        }
-
-        if (log_file_.string () == s)
-          return;
-
-        // Swap the two string buffers.
-        //
-        {
-          string r (move (log_file_).string ()); // Move string rep out.
-          r.swap (s);
-          log_file_ = path (move (r)); // Move back in.
-        }
-
-        // If the path is relative, then prefix it with the current working
-        // directory. Failed that, we will end up with different checksums for
-        // invocations from different directories.
-        //
-        // While this should work fine for normal cross-compilation, it's an
-        // entirely different story for the emulated case (e.g., msvc-linux
-        // where the preprocessed output contains absolute Windows paths). So
-        // we try to sense if things look fishy and leave the path alone.
-        //
-        // Also detect special names like <built-in> and <command-line>. Plus
-        // GCC sometimes adds what looks like working directory (has trailing
-        // slash). So ignore that as well.
-        //
-        // We now switched to using absolute translation unit paths (because
-        // of __FILE__/assert(); see compile.cxx for details). But we might
-        // still need this logic when we try to calculate location-independent
-        // hash for distributed compilation/caching. The idea is to only hash
-        // the part starting from the project root which is immutable. Plus
-        // we will need -ffile-prefix-map to deal with __FILE__.
-        //
-        if (!log_file_.to_directory ())
-          cs_.append (log_file_.string ());
-#if 0
-        {
-          using tr = path::traits;
-          const string& f (log_file_.string ());
-
-          if (f.find (':') != string::npos            ||
-              (f.front () == '<' && f.back () == '>') ||
-              log_file_.absolute ())
-            cs_.append (f);
-          else
-          {
-            // This gets complicated and slow: the path may contain '..' and
-            // '.'  so strictly speaking we would need to normalize it.
-            // Instead, we are going to handle leading '..'s ourselves (the
-            // sane case) and ignore everything else (so if you have '..'  or
-            // '.' somewhere in the middle, then things might not work
-            // optimally for you).
-            //
-            const string& d (work.string ());
-
-            // Iterate over leading '..' in f "popping" the corresponding
-            // number of trailing components from d.
-            //
-            size_t fp (0);
-            size_t dp (d.size () - 1);
-
-            for (size_t p;; )
-            {
-              // Note that in file we recognize any directory separator, not
-              // just of this platform (see note about emulation above).
-              //
-              if (f.compare (fp, 2, "..") != 0  ||
-                  (f[fp + 2] != '/' && f[fp + 2] != '\\') || // Could be '\0'.
-                  (p = tr::rfind_separator (d, dp)) == string::npos)
-                break;
-
-              fp += 3;
-              dp = p - 1;
-            }
-
-            cs_.append (d.c_str (), dp + 1);
-            cs_.append (tr::directory_separator); // Canonical in work.
-            cs_.append (f.c_str () + fp);
-          }
-        }
-#endif
-      }
-      else
-        unget (c);
-    }
-
-    auto lexer::
-    skip_spaces (bool nl) -> xchar
-    {
-      xchar c (get ());
-
-      for (; !eos (c); c = get ())
-      {
-        switch (c)
-        {
-        case '\n':
-          if (!nl) break;
-          // Fall through.
-        case ' ':
-        case '\t':
-        case '\r':
-        case '\f':
-        case '\v':
-          {
-            // Direct buffer scan.
-            //
-            const char* b (gptr_);
-            const char* e (egptr_);
-            const char* p (b);
-
-            for (char c;
-                 p != e && ((c = *p) == ' ' || c == '\t');
-                 ++p) ;
-
-            size_t n (p - b);
-            gptr_ = p; buf_->gbump (static_cast<int> (n)); column += n;
-
-            continue;
-          }
-        case '/':
-          {
-            xchar p (peek ());
-
-            // C++ comment.
-            //
-            if (p == '/')
-            {
-              get (p);
-
-              for (;;)
-              {
-                c = get ();
-                if (c == '\n' || eos (c))
-                  break;
-
-                // Direct buffer scan.
-                //
-                const char* b (gptr_);
-                const char* e (egptr_);
-                const char* p (b);
-
-                for (char c;
-                     p != e && (c = *p) != '\n' && c != '\\';
-                     ++p) ;
-
-                size_t n (p - b);
-                gptr_ = p; buf_->gbump (static_cast<int> (n)); column += n;
-              }
-
-              if (!nl)
-                break;
-
-              continue;
-            }
-
-            // C comment.
-            //
-            if (p == '*')
-            {
-              get (p);
-
-              for (;;)
-              {
-                c = get ();
-
-                if (eos (c))
-                  fail (p) << "unterminated comment";
-
-                if (c == '*' && (c = peek ()) == '/')
-                {
-                  get (c);
-                  break;
-                }
-
-                // Direct buffer scan.
-                //
-                const char* b (gptr_);
-                const char* e (egptr_);
-                const char* p (b);
-
-                for (char c;
-                     p != e && (c = *p) != '*' && c != '\\';
-                     ++p)
-                {
-                  if (c == '\n')
-                  {
-                    if (log_line_) ++*log_line_;
-                    ++line;
-                    column = 1;
-                  }
-                  else
-                    ++column;
-                }
-
-                gptr_ = p; buf_->gbump (static_cast<int> (p - b));
-              }
-              continue;
-            }
-            break;
-          }
-        }
-        break;
-      }
-
-      return c;
-    }
-
-    ostream&
-    operator<< (ostream& o, const token& t)
-    {
-      switch (t.type)
-      {
-      case type::dot:         o << "'.'";                   break;
-      case type::semi:        o << "';'";                   break;
-      case type::less:        o << "'<'";                   break;
-      case type::greater:     o << "'>'";                   break;
-      case type::lcbrace:     o << "'{'";                   break;
-      case type::rcbrace:     o << "'}'";                   break;
-      case type::punctuation: o << "<punctuation>";         break;
-
-      case type::identifier:  o << '\'' << t.value << '\''; break;
-
-      case type::number:      o << "<number literal>";      break;
-      case type::character:   o << "<char literal>";        break;
-      case type::string:      o << "<string literal>";      break;
-
-      case type::other:       o << "<other>";               break;
-      case type::eos:         o << "<end of file>";         break;
-      }
-
-      return o;
-    }
-  }
-}