From 257ad3c2c5e633d2fd3f2228021ac3ae8d6d07cb Mon Sep 17 00:00:00 2001
From: Boris Kolpackov <boris@codesynthesis.com>
Date: Fri, 12 Dec 2014 11:30:04 +0200
Subject: Initial buildfile parser implementation

g++-4.9 -std=c++14 -g -I../../.. -o driver driver.cxx ../../../build/lexer.cxx ../../../build/parser.cxx && ./driver
---
 build/bd.cxx     |  35 +++---------
 build/lexer      |  16 ++++--
 build/lexer.cxx  |  12 ++--
 build/parser     |  55 ++++++++++++++++++
 build/parser.cxx | 167 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 build/token      |   6 ++
 6 files changed, 254 insertions(+), 37 deletions(-)
 create mode 100644 build/parser
 create mode 100644 build/parser.cxx

(limited to 'build')
diff --git a/build/bd.cxx b/build/bd.cxx
index 33ee02f..82d283c 100644
--- a/build/bd.cxx
+++ b/build/bd.cxx
@@ -16,8 +16,8 @@
 #include <build/process>
 #include <build/diagnostics>
 
-#include <build/token>
 #include <build/lexer>
+#include <build/parser>
 
 using namespace std;
 
@@ -148,41 +148,20 @@ main (int argc, char* argv[])
   }
 
   ifs.exceptions (ifstream::failbit | ifstream::badbit);
-  lexer l (ifs, bf.string ());
+  parser p;
 
   try
   {
-    for (token t (l.next ());; t = l.next ())
-    {
-      cout << t.line () << ':' << t.column () << ": ";
-
-      switch (t.type ())
-      {
-      case token_type::eos: cout << "<eos>"; break;
-      case token_type::punctuation:
-        {
-          switch (t.punctuation ())
-          {
-          case token_punctuation::newline: cout << "\\n"; break;
-          case token_punctuation::colon:   cout << ':'; break;
-          case token_punctuation::lcbrace: cout << '{'; break;
-          case token_punctuation::rcbrace: cout << '}'; break;
-          }
-          break;
-        }
-      case token_type::name: cout << '\'' << t.name () << '\''; break;
-      }
-
-      cout << endl;
-
-      if (t.type () == token_type::eos)
-        break;
-    }
+    p.parse (ifs, bf);
   }
   catch (const lexer_error&)
   {
     return 1; // Diagnostics has already been issued.
   }
+  catch (const parser_error&)
+  {
+    return 1; // Diagnostics has already been issued.
+  }
   catch (const std::ios_base::failure&)
   {
     cerr << "error: failed to read from " << bf << endl;
diff --git a/build/lexer b/build/lexer
index 34b6fcc..cf67eec 100644
--- a/build/lexer
+++ b/build/lexer
@@ -21,11 +21,8 @@ namespace build
   class lexer
   {
   public:
-    // If name is empty, then no diagnostics is issued, just lexer_error
-    // is thrown (use for testing).
-    //
-    lexer (std::istream& is, const std::string& name)
-        : is_ (is), name_ (name) {}
+    lexer (std::istream& is, const std::string& name, std::ostream& diag)
+        : is_ (is), name_ (name), diag_ (diag) {}
 
     token
     next ();
@@ -74,6 +71,8 @@ namespace build
       return c.value () == xchar::traits_type::eof ();
     }
 
+    // Scanner.
+    //
   private:
     xchar
     escape ();
@@ -84,9 +83,16 @@ namespace build
     token
     name (xchar);
 
+    // Utilities.
+    //
+  private:
+    std::ostream&
+    error (const xchar&);
+
   private:
     std::istream& is_;
     std::string name_;
+    std::ostream& diag_;
 
     std::uint64_t l_ {1};
     std::uint64_t c_ {1};
diff --git a/build/lexer.cxx b/build/lexer.cxx
index 8566788..9e3521a 100644
--- a/build/lexer.cxx
+++ b/build/lexer.cxx
@@ -56,10 +56,7 @@ namespace build
     if (!is_eos (c))
       return c;
 
-    if (!name_.empty ())
-      cerr << name_ << ':' << c.line () << ':' << c.column () << ": error: " <<
-        "unterminated escape sequence" << endl;
-
+    error (c) << "unterminated escape sequence" << endl;
     throw lexer_error ();
   }
 
@@ -219,4 +216,11 @@ namespace build
     buf_ = c;
     unget_ = true;
   }
+
+  ostream& lexer::
+  error (const xchar& c)
+  {
+    return diag_ << name_ << ':' << c.line () << ':' <<
+      c.column () << ": error: ";
+  }
 }
diff --git a/build/parser b/build/parser
new file mode 100644
index 0000000..04ef00d
--- /dev/null
+++ b/build/parser
@@ -0,0 +1,55 @@
+// file      : build/parser -*- C++ -*-
+// copyright : Copyright (c) 2014-2015 Code Synthesis Tools CC
+// license   : MIT; see accompanying LICENSE file
+
+#ifndef BUILD_PARSER
+#define BUILD_PARSER
+
+#include <string>
+#include <iosfwd>
+#include <exception>
+
+#include <build/path>
+
+namespace build
+{
+  class token;
+  enum class token_type;
+  class lexer;
+
+  // The handler must assume the diagnostics has already been issued.
+  //
+  struct parser_error: std::exception {};
+
+  class parser
+  {
+  public:
+    parser (std::ostream& diag): diag_ (diag) {}
+
+    void
+    parse (std::istream&, const path&);
+
+    // Recursive descent parser.
+    //
+  private:
+    void
+    names (token&, token_type&);
+
+    // Utilities.
+    //
+  private:
+    void
+    next (token&, token_type&);
+
+    std::ostream&
+    error (const token&);
+
+  private:
+    std::ostream& diag_;
+
+    lexer* lexer_;
+    const path* path_;
+  };
+}
+
+#endif // BUILD_PARSER
diff --git a/build/parser.cxx b/build/parser.cxx
new file mode 100644
index 0000000..669ac8b
--- /dev/null
+++ b/build/parser.cxx
@@ -0,0 +1,167 @@
+// file      : build/parser.cxx -*- C++ -*-
+// copyright : Copyright (c) 2014-2015 Code Synthesis Tools CC
+// license   : MIT; see accompanying LICENSE file
+
+#include <build/parser>
+
+#include <iostream>
+
+#include <build/token>
+#include <build/lexer>
+
+using namespace std;
+
+namespace build
+{
+  // Output the token type and value in a format suitable for diagnostics.
+  //
+  ostream&
+  operator<< (ostream&, const token&);
+
+  typedef token_type type;
+  typedef token_punctuation punc;
+
+  void parser::
+  parse (istream& is, const path& p)
+  {
+    lexer l (is, p.string (), diag_);
+    lexer_ = &l;
+    path_ = &p;
+
+    token t (0, 0); // eos
+    type tt;
+
+    for (next (t, tt); tt != type::eos; )
+    {
+      // We always start with one or more names.
+      //
+      names (t, tt);
+
+      if (t.is (punc::colon))
+      {
+        next (t, tt);
+
+        if (tt == type::name || t.is (punc::lcbrace))
+          names (t, tt);
+
+        if (t.is (punc::newline))
+          next (t, tt);
+        else if (tt != type::eos)
+        {
+          error (t) << "expected newline insetad of " << t << endl;
+          throw parser_error ();
+        }
+
+        continue;
+      }
+
+      error (t) << "unexpected " << t << endl;
+      throw parser_error ();
+    }
+  }
+
+  void parser::
+  names (token& t, type& tt)
+  {
+    for (bool first (true);; first = false)
+    {
+      // Untyped name group, e.g., '{foo bar}'.
+      //
+      if (t.is (punc::lcbrace))
+      {
+        next (t, tt);
+        names (t, tt);
+
+        if (!t.is (punc::rcbrace))
+        {
+          error (t) << "expected '}' instead of " << t << endl;
+          throw parser_error ();
+        }
+
+        next (t, tt);
+        continue;
+      }
+
+      // Name.
+      //
+      if (tt == type::name)
+      {
+        string name (t.name ());
+
+        // See if this is a type name, that is, it is followed by '{'.
+        //
+        next (t, tt);
+
+        if (t.is (punc::lcbrace))
+        {
+          //cout << "type: " << name << endl;
+
+          //@@ TODO:
+          //
+          //   - detect nested typed name groups, e.g., 'cxx{hxx{foo}}'.
+          //
+          next (t, tt);
+          names (t, tt);
+
+          if (!t.is (punc::rcbrace))
+          {
+            error (t) << "expected '}' instead of " << t << endl;
+            throw parser_error ();
+          }
+
+          next (t, tt);
+          continue;
+        }
+
+        // This is a target, directory, or variable name.
+        //cout << "name: " << name << endl;
+        continue;
+      }
+
+      if (!first)
+        break;
+
+      error (t) << "expected name instead of " << t << endl;
+      throw parser_error ();
+    }
+  }
+
+  void parser::
+  next (token& t, token_type& tt)
+  {
+    t = lexer_->next ();
+    tt = t.type ();
+  }
+
+  ostream& parser::
+  error (const token& t)
+  {
+    return diag_ << path_->string () << ':' << t.line () << ':' <<
+      t.column () << ": error: ";
+  }
+
+  // Output the token type and value in a format suitable for diagnostics.
+  //
+  ostream&
+  operator<< (ostream& os, const token& t)
+  {
+    switch (t.type ())
+    {
+    case token_type::eos: os << "<end-of-stream>"; break;
+    case token_type::punctuation:
+      {
+        switch (t.punctuation ())
+        {
+        case token_punctuation::newline: os << "<newline>"; break;
+        case token_punctuation::colon:   os << "':'"; break;
+        case token_punctuation::lcbrace: os << "'{'"; break;
+        case token_punctuation::rcbrace: os << "'}'"; break;
+        }
+        break;
+      }
+    case token_type::name: os << '\'' << t.name () << '\''; break;
+    }
+
+    return os;
+  }
+}
diff --git a/build/token b/build/token
index bade45c..6f4951c 100644
--- a/build/token
+++ b/build/token
@@ -28,6 +28,12 @@ namespace build
     token_punctuation
     punctuation () const {assert (t_ == token_type::punctuation); return p_;}
 
+    bool
+    is (token_punctuation p) const
+    {
+      return t_ == token_type::punctuation && p_ == p;
+    }
+
     std::uint64_t line () const {return l_;}
     std::uint64_t column () const {return c_;}
 
-- 
cgit v1.1