From 1b86963946082e10e879283fad51ba7ce4e942e2 Mon Sep 17 00:00:00 2001
From: Boris Kolpackov <boris@codesynthesis.com>
Date: Thu, 10 Sep 2015 09:12:47 +0200
Subject: Add support for chunking name parsing

---
 build/parser                  |  19 ++++---
 build/parser.cxx              |  22 ++++++--
 tests/function/call/buildfile |   2 +
 tests/function/call/test.out  |   2 +
 tests/parser/buildfile        |   7 ++-
 tests/parser/driver.cxx       | 121 +++++++++++++++++++++++++++++++++++++-----
 6 files changed, 146 insertions(+), 27 deletions(-)
diff --git a/build/parser b/build/parser
index 0678a62..7787568 100644
--- a/build/parser
+++ b/build/parser
@@ -48,7 +48,7 @@ namespace build
 
     // Recursive descent parser.
     //
-  private:
+  protected:
     void
     clause (token&, token_type&);
 
@@ -79,17 +79,22 @@ namespace build
     names_type
     eval (token&, token_type&);
 
+    // If chunk is true, then parse the smallest but complete, name-wise,
+    // chunk of input. Note that in this case you may still end up with
+    // multiple names, for example, {foo bar}.
+    //
     names_type
-    names (token& t, token_type& tt)
+    names (token& t, token_type& tt, bool chunk = false)
     {
       names_type ns;
-      names (t, tt, ns, 0, nullptr, nullptr, nullptr);
+      names (t, tt, ns, chunk, 0, nullptr, nullptr, nullptr);
       return ns;
     }
 
     void
     names (token&, token_type&,
            names_type&,
+           bool chunk,
            std::size_t pair,
            const std::string* prj,
            const dir_path* dir,
@@ -102,7 +107,7 @@ namespace build
 
     // Utilities.
     //
-  private:
+  protected:
 
     // Switch to a new current scope. Note that this function might
     // also have to switch to a new root scope if the new current
@@ -122,7 +127,7 @@ namespace build
 
     // Lexer.
     //
-  private:
+  protected:
     token_type
     next (token&, token_type&);
 
@@ -138,10 +143,10 @@ namespace build
 
     // Diagnostics.
     //
-  private:
+  protected:
     const fail_mark<failed> fail;
 
-  private:
+  protected:
     const std::string* path_; // Path processed by diag_relative().
     lexer* lexer_;
     target* target_; // Current target, if any.
diff --git a/build/parser.cxx b/build/parser.cxx
index 1ceb193..092ae3d 100644
--- a/build/parser.cxx
+++ b/build/parser.cxx
@@ -825,6 +825,7 @@ namespace build
   names (token& t,
          type& tt,
          names_type& ns,
+         bool chunk,
          size_t pair,
          const std::string* pp,
          const dir_path* dp,
@@ -837,7 +838,7 @@ namespace build
 
     // Buffer that is used to collect the complete name in case of
     // an unseparated variable expansion or eval context, e.g.,
-    // 'foo$bar$(baz)fox'. The idea is to concatenate all the
+    // 'foo$bar($baz)fox'. The idea is to concatenate all the
     // individual parts in this buffer and then re-inject it into
     // the loop as a single token.
     //
@@ -865,7 +866,18 @@ namespace build
         concat.clear ();
       }
       else if (!first)
+      {
+        // If we are chunking, stop at the next separated token. Unless
+        // current or next token is a pair separator, since we want the
+        // "x = y" pair to be parsed as a single chunk.
+        //
+        if (chunk &&
+            peeked ().separated &&
+            (tt != type::pair_separator && t.type != type::pair_separator))
+          break;
+
         next (t, tt);
+      }
 
       // Name.
       //
@@ -969,6 +981,7 @@ namespace build
           count = ns.size ();
           names (t, tt,
                  ns,
+                 false,
                  (pair != 0
                   ? pair
                   : (ns.empty () || ns.back ().pair == '\0' ? 0 : ns.size ())),
@@ -1266,6 +1279,7 @@ namespace build
         count = ns.size ();
         names (t, tt,
                ns,
+               false,
                (pair != 0
                 ? pair
                 : (ns.empty () || ns.back ().pair == '\0' ? 0 : ns.size ())),
@@ -1279,7 +1293,7 @@ namespace build
         continue;
       }
 
-      // A pair separator (only in the pair mode).
+      // A pair separator (only in the pairs mode).
       //
       if (tt == type::pair_separator)
       {
@@ -1308,8 +1322,6 @@ namespace build
       if (!first)
         break;
 
-      // Our caller expected this to be a name.
-      //
       if (tt == type::rcbrace) // Empty name, e.g., dir{}.
       {
         // If we are a second half of a pair, add another first half
@@ -1325,6 +1337,8 @@ namespace build
         break;
       }
       else
+        // Our caller expected this to be a name.
+        //
         fail (t) << "expected name instead of " << t;
     }
 
diff --git a/tests/function/call/buildfile b/tests/function/call/buildfile
index 93eba14..136f652 100644
--- a/tests/function/call/buildfile
+++ b/tests/function/call/buildfile
@@ -3,6 +3,8 @@ $identity (a)
 $identity  (a b c)
 $identity(sub/dir{x y z})
 
+print a$identity  (b)c
+
 # Verify we can inhibit function call with quoting.
 #
 foo = FOO
diff --git a/tests/function/call/test.out b/tests/function/call/test.out
index abc5974..88a852b 100644
--- a/tests/function/call/test.out
+++ b/tests/function/call/test.out
@@ -2,6 +2,8 @@ identity()
 identity(a)
 identity(a b c)
 identity(sub/dir{x} sub/dir{y} sub/dir{z})
+identity(b)
+ac
 FOOBAR
 FOOBAR
 FOOBAR
diff --git a/tests/parser/buildfile b/tests/parser/buildfile
index 5cbcaca..c3eda28 100644
--- a/tests/parser/buildfile
+++ b/tests/parser/buildfile
@@ -2,5 +2,8 @@
 # copyright : Copyright (c) 2014-2015 Code Synthesis Ltd
 # license   : MIT; see accompanying LICENSE file
 
-exe{driver}: cxx{driver ../../../build/{lexer parser scope target native \
-  prerequisite context diagnostics utility path timestamp}}
+import libs = libbutl%lib{butl}
+
+exe{driver}: cxx{driver ../../build/{token lexer parser scope target \
+ prerequisite variable operation rule search algorithm file module dump \
+ context diagnostics name path-io utility}} $libs
diff --git a/tests/parser/driver.cxx b/tests/parser/driver.cxx
index fef2f41..ffafcea 100644
--- a/tests/parser/driver.cxx
+++ b/tests/parser/driver.cxx
@@ -9,7 +9,8 @@
 #include <build/types>
 #include <build/scope>
 #include <build/target>
-#include <build/native>
+#include <build/context>
+#include <build/variable>
 
 #include <build/lexer>
 #include <build/parser>
@@ -20,16 +21,64 @@ using namespace build;
 static bool
 parse (const char*);
 
+static names
+parse_names (const char* s, lexer_mode m, bool chunk);
+
+static names
+chunk_names (const char* s)
+{
+  return parse_names (s, lexer_mode::pairs, true);
+}
+
 int
 main ()
 {
   ostream cnull (nullptr);
   diag_stream = &cnull;
 
-  target_types.insert (file::static_type);
-  target_types.insert (exe::static_type);
-  target_types.insert (obj::static_type);
+  reset ();
+
+  global_scope->assign ("foo") = "FOO";
+  global_scope->assign ("bar") = "BAR";
+
+  // names() in chunking mode.
+  //
+  assert (chunk_names ("{}") == names ({name ()}));
+  assert (chunk_names ("foo") == names ({name ("foo")}));
+  assert (chunk_names ("foo bar") == names ({name ("foo")}));
+  assert (chunk_names ("{foo bar}") == names ({name ("foo"), name ("bar")}));
+  assert (chunk_names ("dir{foo bar}") == names ({name ("dir", "foo"),
+                                                  name ("dir", "bar")}));
+  assert (chunk_names ("dir{foo bar} baz") == names ({name ("dir", "foo"),
+                                                      name ("dir", "bar")}));
+  assert (chunk_names ("dir {foo bar}") == names ({name ("dir", "foo"),
+                                                   name ("dir", "bar")}));
+  assert (chunk_names ("dir {foo bar} baz") == names ({name ("dir", "foo"),
+                                                       name ("dir", "bar")}));
+  assert (chunk_names ("{} foo") == names ({name ()}));
+
+  // Expansion.
+  //
+  assert (chunk_names ("$foo $bar baz") == names ({name ("FOO")}));
+  assert (chunk_names ("$foo$bar baz") == names ({name ("FOOBAR")}));
+
+  assert (chunk_names ("foo(bar)") == names ({name ("foobar")}));
+  assert (chunk_names ("foo (bar)") == names ({name ("foo")}));
+
+  assert (chunk_names ("\"$foo\"(bar)") == names ({name ("FOObar")}));
+  assert (chunk_names ("\"$foo\" (bar)") == names ({name ("FOO")}));
+
+  // Quoting.
+  //
+  assert (chunk_names ("\"$foo $bar\" baz") == names ({name ("FOO BAR")}));
+
+  // Pairs.
+  //
+  assert (chunk_names ("foo=bar") == names ({name ("foo"), name ("bar")}));
+  assert (chunk_names ("foo = bar x") == names ({name ("foo"), name ("bar")}));
 
+  // General.
+  //
   assert (parse (""));
   assert (parse ("foo:"));
   assert (parse ("foo bar:"));
@@ -43,25 +92,25 @@ main ()
   assert (parse ("{{foo bar}}:"));
   assert (parse ("{{foo bar} {baz} {biz fox} fix}:"));
 
-  assert (parse ("exe{foo}:"));
-  assert (parse ("exe{foo bar}:"));
-  assert (parse ("{exe{foo bar}}:"));
-  assert (parse ("exe{{foo bar} fox}:"));
-  assert (parse ("exe{foo}: obj{bar baz} biz.o file{fox}"));
+  assert (parse ("file{foo}:"));
+  assert (parse ("file{foo bar}:"));
+  assert (parse ("{file{foo bar}}:"));
+  assert (parse ("file{{foo bar} fox}:"));
+  assert (parse ("file{foo}: file{bar baz} biz.o file{fox}"));
 
-  assert (!parse (":"));
+  //assert (!parse (":"));
   assert (!parse ("foo"));
   assert (!parse ("{"));
   assert (!parse ("{foo:"));
   assert (!parse ("{foo{:"));
   assert (!parse ("foo: bar:"));
-  assert (!parse ("exe{foo:"));
+  assert (!parse ("file{foo:"));
 
   // Directory prefix.
   //
   assert (parse ("../{foo}: ../{bar}"));
-  assert (parse ("../exe{foo}: ../obj{bar}"));
-  assert (!parse ("../exe{exe{foo}}:"));
+  assert (parse ("../file{foo}: ../file{bar}"));
+  assert (!parse ("../file{file{foo}}:"));
 
   // Directory scope.
   //
@@ -81,9 +130,23 @@ main ()
   assert (!parse ("test/ foo/:\n{\n}"));
 }
 
+struct test_parser: parser
+{
+  names_type
+  test_names (const char*, lexer_mode, bool chunk);
+};
+
 static bool
 parse (const char* s)
 {
+  reset (); // Clear the state.
+
+  // Create a minimal root scope.
+  //
+  auto i (scopes.insert (path::current (), nullptr, true, true));
+  scope& root (*i->second);
+  root.src_path_ = root.out_path_ = &i->first;
+
   istringstream is (s);
 
   is.exceptions (istream::failbit | istream::badbit);
@@ -91,7 +154,7 @@ parse (const char* s)
 
   try
   {
-    p.parse (is, path (), scopes[path::current ()]);
+    p.parse_buildfile (is, path (), root, root);
   }
   catch (const failed&)
   {
@@ -100,3 +163,33 @@ parse (const char* s)
 
   return true;
 }
+
+// parser::names()
+//
+names test_parser::
+test_names (const char* s, lexer_mode m, bool chunk)
+{
+  istringstream is (s);
+  is.exceptions (istream::failbit | istream::badbit);
+  lexer l (is, "");
+
+  if (m != lexer_mode::normal)
+    l.mode (m, '=');
+
+  path_ = &l.name ();
+  lexer_ = &l;
+  target_ = nullptr;
+  scope_ = root_ = global_scope;
+
+  token t (token_type::eos, false, 0, 0);
+  token_type tt;
+  next (t, tt);
+  return names (t, tt, chunk);
+}
+
+static names
+parse_names (const char* s, lexer_mode m, bool chunk)
+{
+  test_parser p;
+  return p.test_names (s, m, chunk);
+}
-- 
cgit v1.1