From 977d07a3ae47ef204665d1eda2d642e5064724f3 Mon Sep 17 00:00:00 2001
From: Boris Kolpackov <boris@codesynthesis.com>
Date: Mon, 24 Jun 2019 12:01:19 +0200
Subject: Split build system into library and driver

---
 build2/lexer.cxx | 720 -------------------------------------------------------
 1 file changed, 720 deletions(-)
 delete mode 100644 build2/lexer.cxx

(limited to 'build2/lexer.cxx')
diff --git a/build2/lexer.cxx b/build2/lexer.cxx
deleted file mode 100644
index 8287640..0000000
--- a/build2/lexer.cxx
+++ /dev/null
@@ -1,720 +0,0 @@
-// file      : build2/lexer.cxx -*- C++ -*-
-// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd
-// license   : MIT; see accompanying LICENSE file
-
-#include <build2/lexer.hxx>
-
-#include <cstring> // strchr()
-
-using namespace std;
-
-namespace build2
-{
-  using type = token_type;
-
-  pair<char, bool> lexer::
-  peek_char ()
-  {
-    sep_ = skip_spaces ();
-    xchar c (peek ());
-    return make_pair (eos (c) ? '\0' : char (c), sep_);
-  }
-
-  void lexer::
-  mode (lexer_mode m, char ps, optional<const char*> esc)
-  {
-    const char* s1 (nullptr);
-    const char* s2 (nullptr);
-    bool s (true);
-    bool n (true);
-    bool q (true);
-
-    if (!esc)
-    {
-      assert (!state_.empty ());
-      esc = state_.top ().escapes;
-    }
-
-    switch (m)
-    {
-    case lexer_mode::normal:
-      {
-        s1 = ":<>=+ $(){}[]#\t\n";
-        s2 = "    =           ";
-        break;
-      }
-    case lexer_mode::value:
-      {
-        s1 = " $(){}[]#\t\n";
-        s2 = "           ";
-        break;
-      }
-    case lexer_mode::attribute:
-      {
-        s1 = " $(]#\t\n";
-        s2 = "       ";
-        break;
-      }
-    case lexer_mode::eval:
-      {
-        s1 = ":<>=!&|?, $(){}[]#\t\n";
-        s2 = "   = &|             ";
-        break;
-      }
-    case lexer_mode::buildspec:
-      {
-        // Like the value mode with these differences:
-        //
-        // 1. Returns '(' as a separated token provided the state stack depth
-        //    is less than or equal to 3 (initial state plus two buildspec)
-        //    (see parse_buildspec() for details).
-        //
-        // 2. Recognizes comma.
-        //
-        // 3. Treat newline as an ordinary space.
-        //
-        s1 = " $(){}[],\t\n";
-        s2 = "           ";
-        n = false;
-        break;
-      }
-    case lexer_mode::single_quoted:
-    case lexer_mode::double_quoted:
-      s = false;
-      // Fall through.
-    case lexer_mode::variable:
-      {
-        // These are handled in an ad hoc way in word().
-        assert (ps == '\0');
-        break;
-      }
-    default: assert (false); // Unhandled custom mode.
-    }
-
-    state_.push (state {m, ps, s, n, q, *esc, s1, s2});
-  }
-
-  token lexer::
-  next ()
-  {
-    const state& st (state_.top ());
-    lexer_mode m (st.mode);
-
-    // For some modes we have dedicated imlementations of next().
-    //
-    switch (m)
-    {
-    case lexer_mode::normal:
-    case lexer_mode::value:
-    case lexer_mode::attribute:
-    case lexer_mode::variable:
-    case lexer_mode::buildspec:     break;
-    case lexer_mode::eval:          return next_eval ();
-    case lexer_mode::double_quoted: return next_quoted ();
-    default:                        assert (false); // Unhandled custom mode.
-    }
-
-    bool sep (skip_spaces ());
-
-    xchar c (get ());
-    uint64_t ln (c.line), cn (c.column);
-
-    auto make_token = [&sep, ln, cn] (type t, string v = string ())
-    {
-      return token (t, move (v),
-                    sep, quote_type::unquoted, false,
-                    ln, cn, token_printer);
-    };
-
-    if (eos (c))
-      return make_token (type::eos);
-
-    // Handle pair separator.
-    //
-    if (c == st.sep_pair)
-      return make_token (type::pair_separator, string (1, c));
-
-    switch (c)
-    {
-      // NOTE: remember to update mode(), next_eval() if adding new special
-      // characters.
-      //
-    case '\n':
-      {
-        // Expire value mode at the end of the line.
-        //
-        if (m == lexer_mode::value)
-          state_.pop ();
-
-        sep = true; // Treat newline as always separated.
-        return make_token (type::newline);
-      }
-    case '{': return make_token (type::lcbrace);
-    case '}': return make_token (type::rcbrace);
-    case '[': return make_token (type::lsbrace);
-    case ']':
-      {
-        // Expire attribute mode after closing ']'.
-        //
-        if (m == lexer_mode::attribute)
-          state_.pop ();
-
-        return make_token (type::rsbrace);
-      }
-    case '$': return make_token (type::dollar);
-    case ')': return make_token (type::rparen);
-    case '(':
-      {
-        // Left paren is always separated in the buildspec mode.
-        //
-        if (m == lexer_mode::buildspec && state_.size () <= 3)
-          sep = true;
-
-        return make_token (type::lparen);
-      }
-    }
-
-    // The following characters are special in the normal and variable modes.
-    //
-    if (m == lexer_mode::normal || m == lexer_mode::variable)
-    {
-      switch (c)
-      {
-        // NOTE: remember to update mode(), next_eval() if adding new special
-        // characters.
-        //
-      case ':': return make_token (type::colon);
-      case '=':
-        {
-          if (peek () == '+')
-          {
-            get ();
-            return make_token (type::prepend);
-          }
-          else
-            return make_token (type::assign);
-        }
-      case '+':
-        {
-          if (peek () == '=')
-          {
-            get ();
-            return make_token (type::append);
-          }
-        }
-      }
-    }
-
-    // The following characters are special in the normal mode.
-    //
-    if (m == lexer_mode::normal)
-    {
-      // NOTE: remember to update mode() if adding new special characters.
-      //
-      switch (c)
-      {
-      case '<': return make_token (type::labrace);
-      case '>': return make_token (type::rabrace);
-      }
-    }
-
-    // The following characters are special in the buildspec mode.
-    //
-    if (m == lexer_mode::buildspec)
-    {
-      // NOTE: remember to update mode() if adding new special characters.
-      //
-      switch (c)
-      {
-      case ',': return make_token (type::comma);
-      }
-    }
-
-    // Otherwise it is a word.
-    //
-    unget (c);
-    return word (st, sep);
-  }
-
-  token lexer::
-  next_eval ()
-  {
-    bool sep (skip_spaces ());
-    xchar c (get ());
-
-    if (eos (c))
-      fail (c) << "unterminated evaluation context";
-
-    const state& st (state_.top ());
-
-    uint64_t ln (c.line), cn (c.column);
-
-    auto make_token = [sep, ln, cn] (type t, string v = string ())
-    {
-      return token (t, move (v),
-                    sep, quote_type::unquoted, false,
-                    ln, cn, token_printer);
-    };
-
-    // This mode is quite a bit like the value mode when it comes to special
-    // characters, except that we have some of our own.
-    //
-
-    // Handle pair separator.
-    //
-    if (c == st.sep_pair)
-      return make_token (type::pair_separator, string (1, c));
-
-    // Note: we don't treat [ and ] as special here. Maybe can use them for
-    // something later.
-    //
-    switch (c)
-    {
-      // NOTE: remember to update mode() if adding new special characters.
-      //
-    case '\n': fail (c) << "newline in evaluation context" << endf;
-    case ':': return make_token (type::colon);
-    case '{': return make_token (type::lcbrace);
-    case '}': return make_token (type::rcbrace);
-    case '[': return make_token (type::lsbrace);
-    case ']': return make_token (type::rsbrace);
-    case '$': return make_token (type::dollar);
-    case '?': return make_token (type::question);
-    case ',': return make_token (type::comma);
-    case '(': return make_token (type::lparen);
-    case ')':
-      {
-        state_.pop (); // Expire eval mode.
-        return make_token (type::rparen);
-      }
-      // Potentially two-character tokens.
-      //
-    case '=':
-    case '!':
-    case '<':
-    case '>':
-    case '|':
-    case '&':
-      {
-        xchar p (peek ());
-
-        type r (type::eos);
-        switch (c)
-        {
-        case '|': if (p == '|') r = type::log_or; break;
-        case '&': if (p == '&') r = type::log_and; break;
-
-        case '<': r = (p == '=' ? type::less_equal : type::less); break;
-        case '>': r = (p == '=' ? type::greater_equal : type::greater); break;
-
-        case '=': if (p == '=') r = type::equal; break;
-
-        case '!': r = (p == '=' ? type::not_equal : type::log_not); break;
-        }
-
-        if (r == type::eos)
-          break;
-
-        switch (r)
-        {
-        case type::less:
-        case type::greater:
-        case type::log_not: break;
-        default:            get ();
-        }
-
-        return make_token (r);
-      }
-    }
-
-    // Otherwise it is a word.
-    //
-    unget (c);
-    return word (st, sep);
-  }
-
-  token lexer::
-  next_quoted ()
-  {
-    xchar c (get ());
-
-    if (eos (c))
-      fail (c) << "unterminated double-quoted sequence";
-
-    uint64_t ln (c.line), cn (c.column);
-
-    auto make_token = [ln, cn] (type t)
-    {
-      return token (t, false, quote_type::double_, ln, cn, token_printer);
-    };
-
-    switch (c)
-    {
-    case '$': return make_token (type::dollar);
-    case '(': return make_token (type::lparen);
-    }
-
-    // Otherwise it is a word.
-    //
-    unget (c);
-    return word (state_.top (), false);
-  }
-
-  token lexer::
-  word (state st, bool sep)
-  {
-    lexer_mode m (st.mode);
-
-    xchar c (peek ());
-    assert (!eos (c));
-
-    uint64_t ln (c.line), cn (c.column);
-
-    string lexeme;
-    quote_type qtype (m == lexer_mode::double_quoted
-                      ? quote_type::double_
-                      : quote_type::unquoted);
-
-    // If we are already in the quoted mode then we didn't start with the
-    // quote character.
-    //
-    bool qcomp (false);
-
-    auto append = [&lexeme, &m, &qcomp] (char c)
-    {
-      lexeme += c;
-
-      // An unquoted character after a quoted fragment.
-      //
-      if (qcomp && m != lexer_mode::double_quoted)
-        qcomp = false;
-    };
-
-    for (; !eos (c); c = peek ())
-    {
-      // First handle escape sequences.
-      //
-      if (c == '\\')
-      {
-        // In the variable mode we treat the beginning of the escape sequence
-        // as a separator (think \"$foo\").
-        //
-        if (m == lexer_mode::variable)
-          break;
-
-        get ();
-        xchar p (peek ());
-
-        const char* esc (st.escapes);
-
-        if (esc == nullptr ||
-            (*esc != '\0' && !eos (p) && strchr (esc, p) != nullptr))
-        {
-          get ();
-
-          if (eos (p))
-            fail (p) << "unterminated escape sequence";
-
-          if (p != '\n') // Ignore if line continuation.
-            append (p);
-
-          continue;
-        }
-        else
-          unget (c); // Treat as a normal character.
-      }
-
-      bool done (false);
-
-      // Next take care of the double-quoted mode. This one is tricky since
-      // we push/pop modes while accumulating the same lexeme for example:
-      //
-      // foo" bar "baz
-      //
-      if (m == lexer_mode::double_quoted)
-      {
-        switch (c)
-        {
-          // Only these two characters are special in the double-quoted mode.
-          //
-        case '$':
-        case '(':
-          {
-            done = true;
-            break;
-          }
-          // End quote.
-          //
-        case '\"':
-          {
-            get ();
-            state_.pop ();
-
-            st = state_.top ();
-            m = st.mode;
-            continue;
-          }
-        }
-      }
-      // We also handle the variable mode in an ad hoc way.
-      //
-      else if (m == lexer_mode::variable)
-      {
-        if (c != '_' && !(lexeme.empty () ? alpha (c) : alnum (c)))
-        {
-          if (c != '.')
-            done = true;
-          else
-          {
-            // Normally '.' is part of the variable (namespace separator)
-            // unless it is trailing (think $major.$minor).
-            //
-            get ();
-            xchar p (peek ());
-            done = eos (p) || !(alpha (p) ||  p == '_');
-            unget (c);
-          }
-        }
-      }
-      else
-      {
-        // First check if it's a pair separator.
-        //
-        if (c == st.sep_pair)
-          done = true;
-        else
-        {
-          // Then see if this character or character sequence is a separator.
-          //
-          for (const char* p (strchr (st.sep_first, c));
-               p != nullptr;
-               p = done ? nullptr : strchr (p + 1, c))
-          {
-            char s (st.sep_second[p - st.sep_first]);
-
-            // See if it has a second.
-            //
-            if (s != ' ')
-            {
-              get ();
-              done = (peek () == s);
-              unget (c);
-            }
-            else
-              done = true;
-          }
-        }
-
-        // Handle single and double quotes if enabled for this mode and unless
-        // they were considered separators.
-        //
-        if (st.quotes && !done)
-        {
-          switch (c)
-          {
-          case '\'':
-            {
-              // Enter the single-quoted mode in case the derived lexer needs
-              // to notice this.
-              //
-              mode (lexer_mode::single_quoted);
-
-              switch (qtype)
-              {
-              case quote_type::unquoted:
-                qtype = quote_type::single;
-                qcomp = lexeme.empty ();
-                break;
-              case quote_type::single:
-                qcomp = false; // Non-contiguous.
-                break;
-              case quote_type::double_:
-                qtype = quote_type::mixed;
-                // Fall through.
-              case quote_type::mixed:
-                qcomp = false;
-                break;
-              }
-
-              get ();
-              for (c = get (); !eos (c) && c != '\''; c = get ())
-                lexeme += c;
-
-              if (eos (c))
-                fail (c) << "unterminated single-quoted sequence";
-
-              state_.pop ();
-              continue;
-            }
-          case '\"':
-            {
-              get ();
-
-              mode (lexer_mode::double_quoted);
-              st = state_.top ();
-              m = st.mode;
-
-              switch (qtype)
-              {
-              case quote_type::unquoted:
-                qtype = quote_type::double_;
-                qcomp = lexeme.empty ();
-                break;
-              case quote_type::double_:
-                qcomp = false; // Non-contiguous.
-                break;
-              case quote_type::single:
-                qtype = quote_type::mixed;
-                // Fall through.
-              case quote_type::mixed:
-                qcomp = false;
-                break;
-              }
-
-              continue;
-            }
-          }
-        }
-      }
-
-      if (done)
-        break;
-
-      get ();
-      append (c);
-    }
-
-    if (m == lexer_mode::double_quoted)
-    {
-      if (eos (c))
-        fail (c) << "unterminated double-quoted sequence";
-
-      // If we are still in the quoted mode then we didn't end with the quote
-      // character.
-      //
-      if (qcomp)
-        qcomp = false;
-    }
-
-    // Expire variable mode at the end of the word.
-    //
-    if (m == lexer_mode::variable)
-      state_.pop ();
-
-    return token (move (lexeme), sep, qtype, qcomp, ln, cn);
-  }
-
-  bool lexer::
-  skip_spaces ()
-  {
-    bool r (sep_);
-    sep_ = false;
-
-    const state& s (state_.top ());
-
-    // In some special modes we don't skip spaces.
-    //
-    if (!s.sep_space)
-      return r;
-
-    xchar c (peek ());
-    bool start (c.column == 1);
-
-    for (; !eos (c); c = peek ())
-    {
-      switch (c)
-      {
-      case ' ':
-      case '\t':
-        {
-          r = true;
-          break;
-        }
-      case '\n':
-        {
-          // In some modes we treat newlines as ordinary spaces.
-          //
-          if (!s.sep_newline)
-          {
-            r = true;
-            break;
-          }
-
-          // Skip empty lines.
-          //
-          if (start)
-          {
-            r = false;
-            break;
-          }
-
-          return r;
-        }
-      case '#':
-        {
-          r = true;
-          get ();
-
-          // See if this is a multi-line comment in the form:
-          //
-          /*
-             #\
-             ...
-             #\
-          */
-          auto ml = [&c, this] () -> bool
-          {
-            if ((c = peek ()) == '\\')
-            {
-              get ();
-              if ((c = peek ()) == '\n')
-                return true;
-            }
-
-            return false;
-          };
-
-          if (ml ())
-          {
-            // Scan until we see the closing one.
-            //
-            for (; !eos (c); c = peek ())
-            {
-              get ();
-              if (c == '#' && ml ())
-                break;
-            }
-
-            if (eos (c))
-              fail (c) << "unterminated multi-line comment";
-          }
-          else
-          {
-            // Read until newline or eos.
-            //
-            for (; !eos (c) && c != '\n'; c = peek ())
-              get ();
-          }
-
-          continue;
-        }
-      case '\\':
-        {
-          get ();
-
-          if (peek () == '\n')
-            break; // Ignore.
-
-          unget (c);
-        }
-        // Fall through.
-      default:
-        return r; // Not a space.
-      }
-
-      get ();
-    }
-
-    return r;
-  }
-}
-- 
cgit v1.1