From 835ed5f7080a98e9ee80ac08d5585ccdbb63fe0e Mon Sep 17 00:00:00 2001
From: Boris Kolpackov <boris@codesynthesis.com>
Date: Mon, 15 Dec 2014 10:43:16 +0200
Subject: Parse directory scopes

---
 build/lexer.cxx  |  10 ++--
 build/parser     |  18 ++++++-
 build/parser.cxx | 155 ++++++++++++++++++++++++++++++++++++++++++-------------
 build/token      |  29 ++++-------
 4 files changed, 151 insertions(+), 61 deletions(-)

(limited to 'build')
diff --git a/build/lexer.cxx b/build/lexer.cxx
index 9e3521a..a1aa375 100644
--- a/build/lexer.cxx
+++ b/build/lexer.cxx
@@ -19,7 +19,7 @@ namespace build
     uint64_t ln (c.line ()), cn (c.column ());
 
     if (is_eos (c))
-      return token (ln, cn);
+      return token (token_type::eos, ln, cn);
 
     switch (c)
     {
@@ -27,19 +27,19 @@ namespace build
       //
     case '\n':
       {
-        return token (token_punctuation::newline, ln, cn);
+        return token (token_type::newline, ln, cn);
       }
     case ':':
       {
-        return token (token_punctuation::colon, ln, cn);
+        return token (token_type::colon, ln, cn);
       }
     case '{':
       {
-        return token (token_punctuation::lcbrace, ln, cn);
+        return token (token_type::lcbrace, ln, cn);
       }
     case '}':
       {
-        return token (token_punctuation::rcbrace, ln, cn);
+        return token (token_type::rcbrace, ln, cn);
       }
     }
 
diff --git a/build/parser b/build/parser
index 04ef00d..c487015 100644
--- a/build/parser
+++ b/build/parser
@@ -6,6 +6,7 @@
 #define BUILD_PARSER
 
 #include <string>
+#include <vector>
 #include <iosfwd>
 #include <exception>
 
@@ -32,13 +33,26 @@ namespace build
     // Recursive descent parser.
     //
   private:
+    typedef std::vector<std::string> names;
+
     void
-    names (token&, token_type&);
+    parse_clause (token&, token_type&);
+
+    names
+    parse_names (token& t, token_type& tt)
+    {
+      names ns;
+      parse_names (t, tt, ns);
+      return ns;
+    }
+
+    void
+    parse_names (token&, token_type&, names&);
 
     // Utilities.
     //
   private:
-    void
+    token_type
     next (token&, token_type&);
 
     std::ostream&
diff --git a/build/parser.cxx b/build/parser.cxx
index 669ac8b..348d285 100644
--- a/build/parser.cxx
+++ b/build/parser.cxx
@@ -19,7 +19,6 @@ namespace build
   operator<< (ostream&, const token&);
 
   typedef token_type type;
-  typedef token_punctuation punc;
 
   void parser::
   parse (istream& is, const path& p)
@@ -28,31 +27,122 @@ namespace build
     lexer_ = &l;
     path_ = &p;
 
-    token t (0, 0); // eos
+    token t (type::eos, 0, 0);
     type tt;
+    next (t, tt);
 
-    for (next (t, tt); tt != type::eos; )
+    parse_clause (t, tt);
+
+    if (tt != type::eos)
+    {
+      error (t) << "unexpected " << t << endl;
+      throw parser_error ();
+    }
+  }
+
+  void parser::
+  parse_clause (token& t, token_type& tt)
+  {
+    while (tt != type::eos)
     {
       // We always start with one or more names.
       //
-      names (t, tt);
+      if (tt != type::name && tt != type::lcbrace)
+        break; // Something else. Let our caller handle that.
+
+      names ns (parse_names (t, tt));
 
-      if (t.is (punc::colon))
+      if (tt == type::colon)
       {
         next (t, tt);
 
-        if (tt == type::name || t.is (punc::lcbrace))
-          names (t, tt);
+        // Dependency declaration.
+        //
+        if (tt == type::name || tt == type::lcbrace)
+        {
+          names ns (parse_names (t, tt));
 
-        if (t.is (punc::newline))
-          next (t, tt);
-        else if (tt != type::eos)
+          if (tt == type::newline)
+            next (t, tt);
+          else if (tt != type::eos)
+          {
+            error (t) << "expected newline instead of " << t << endl;
+            throw parser_error ();
+          }
+
+          continue;
+        }
+
+        if (tt == type::newline)
         {
-          error (t) << "expected newline insetad of " << t << endl;
-          throw parser_error ();
+          // See if we have a directory/target scope.
+          //
+          if (next (t, tt) == type::lcbrace)
+          {
+            // Should be on its own line.
+            //
+            if (next (t, tt) != type::newline)
+            {
+              error (t) << "expected newline after '{'" << endl;
+              throw parser_error ();
+            }
+
+            // See if this is a directory or target scope. Different
+            // things can appear inside depending on which it is.
+            //
+            bool dir (false);
+            for (const auto& n: ns)
+            {
+              if (n.back () == '/')
+              {
+                if (ns.size () != 1)
+                {
+                  // @@ TODO: point to name.
+                  //
+                  error (t) << "multiple names in directory scope" << endl;
+                  throw parser_error ();
+                }
+
+                dir = true;
+              }
+            }
+
+            next (t, tt);
+
+            if (dir)
+              // A directory scope can contain anything that a top level can.
+              //
+              parse_clause (t, tt);
+            else
+            {
+              // @@ TODO: target scope.
+            }
+
+            if (tt != type::rcbrace)
+            {
+              error (t) << "expected '}' instead of " << t << endl;
+              throw parser_error ();
+            }
+
+            // Should be on its own line.
+            //
+            if (next (t, tt) == type::newline)
+              next (t, tt);
+            else if (tt != type::eos)
+            {
+              error (t) << "expected newline after '}'" << endl;
+              throw parser_error ();
+            }
+          }
+
+          continue;
         }
 
-        continue;
+        if (tt == type::eos)
+          continue;
+
+        error (t) << "expected newline insetad of " << t << endl;
+        throw parser_error ();
       }
 
       error (t) << "unexpected " << t << endl;
@@ -61,18 +151,18 @@ namespace build
   }
 
   void parser::
-  names (token& t, type& tt)
+  parse_names (token& t, type& tt, names& ns)
   {
     for (bool first (true);; first = false)
     {
       // Untyped name group, e.g., '{foo bar}'.
       //
-      if (t.is (punc::lcbrace))
+      if (tt == type::lcbrace)
       {
         next (t, tt);
-        names (t, tt);
+        parse_names (t, tt, ns);
 
-        if (!t.is (punc::rcbrace))
+        if (tt != type::rcbrace)
         {
           error (t) << "expected '}' instead of " << t << endl;
           throw parser_error ();
@@ -90,9 +180,7 @@ namespace build
 
         // See if this is a type name, that is, it is followed by '{'.
         //
-        next (t, tt);
-
-        if (t.is (punc::lcbrace))
+        if (next (t, tt) == type::lcbrace)
         {
           //cout << "type: " << name << endl;
 
@@ -101,9 +189,9 @@ namespace build
           //   - detect nested typed name groups, e.g., 'cxx{hxx{foo}}'.
           //
           next (t, tt);
-          names (t, tt);
+          parse_names (t, tt, ns);
 
-          if (!t.is (punc::rcbrace))
+          if (tt != type::rcbrace)
           {
             error (t) << "expected '}' instead of " << t << endl;
             throw parser_error ();
@@ -115,6 +203,7 @@ namespace build
 
         // This is a target, directory, or variable name.
         //cout << "name: " << name << endl;
+        ns.push_back (name);
         continue;
       }
 
@@ -126,11 +215,12 @@ namespace build
     }
   }
 
-  void parser::
+  token_type parser::
   next (token& t, token_type& tt)
   {
     t = lexer_->next ();
     tt = t.type ();
+    return tt;
   }
 
   ostream& parser::
@@ -147,19 +237,12 @@ namespace build
   {
     switch (t.type ())
     {
-    case token_type::eos: os << "<end-of-stream>"; break;
-    case token_type::punctuation:
-      {
-        switch (t.punctuation ())
-        {
-        case token_punctuation::newline: os << "<newline>"; break;
-        case token_punctuation::colon:   os << "':'"; break;
-        case token_punctuation::lcbrace: os << "'{'"; break;
-        case token_punctuation::rcbrace: os << "'}'"; break;
-        }
-        break;
-      }
-    case token_type::name: os << '\'' << t.name () << '\''; break;
+    case token_type::eos:     os << "<end-of-stream>"; break;
+    case token_type::newline: os << "<newline>"; break;
+    case token_type::colon:   os << "':'"; break;
+    case token_type::lcbrace: os << "'{'"; break;
+    case token_type::rcbrace: os << "'}'"; break;
+    case token_type::name:    os << '\'' << t.name () << '\''; break;
     }
 
     return os;
diff --git a/build/token b/build/token
index 6f4951c..9f9b2b4 100644
--- a/build/token
+++ b/build/token
@@ -13,8 +13,15 @@
 
 namespace build
 {
-  enum class token_type {eos, name, punctuation};
-  enum class token_punctuation {newline, colon, lcbrace, rcbrace};
+  enum class token_type
+  {
+    eos,
+    name,
+    newline,
+    colon,
+    lcbrace,
+    rcbrace
+  };
 
   class token
   {
@@ -25,32 +32,18 @@ namespace build
     std::string const&
     name () const {assert (t_ == token_type::name); return n_;}
 
-    token_punctuation
-    punctuation () const {assert (t_ == token_type::punctuation); return p_;}
-
-    bool
-    is (token_punctuation p) const
-    {
-      return t_ == token_type::punctuation && p_ == p;
-    }
-
     std::uint64_t line () const {return l_;}
     std::uint64_t column () const {return c_;}
 
   public:
-    token (std::uint64_t l, std::uint64_t c)
-        : t_ (token_type::eos), l_ (l), c_ (c) {}
+    token (token_type t, std::uint64_t l, std::uint64_t c)
+        : t_ (t), l_ (l), c_ (c) {}
 
     token (std::string n, std::uint64_t l, std::uint64_t c)
         : t_ (token_type::name), n_ (std::move (n)), l_ (l), c_ (c) {}
 
-    token (token_punctuation p, std::uint64_t l, std::uint64_t c)
-        : t_ (token_type::punctuation), p_ (p), l_ (l), c_ (c) {}
-
   private:
     token_type t_;
-
-    token_punctuation p_;
     std::string n_;
 
     std::uint64_t l_;
-- 
cgit v1.1