From 554475390b6d2912614778fe50788a09f99ac6a4 Mon Sep 17 00:00:00 2001
From: Boris Kolpackov <boris@codesynthesis.com>
Date: Thu, 3 Nov 2016 17:54:02 +0200
Subject: Implement testscript inclusion support

---
 build2/test/script/lexer.cxx               |   5 +-
 build2/test/script/parser                  |  14 +-
 build2/test/script/parser.cxx              | 215 ++++++++++++++++++++++++++---
 build2/test/script/script                  |  20 ++-
 build2/test/script/script.cxx              |  55 ++++----
 doc/testscript.cli                         |  18 ++-
 unit-tests/test/script/parser/buildfile    |   2 +-
 unit-tests/test/script/parser/include.test | 140 +++++++++++++++++++
 8 files changed, 406 insertions(+), 63 deletions(-)
 create mode 100644 unit-tests/test/script/parser/include.test
diff --git a/build2/test/script/lexer.cxx b/build2/test/script/lexer.cxx
index b7a9f78..7a4e12c 100644
--- a/build2/test/script/lexer.cxx
+++ b/build2/test/script/lexer.cxx
@@ -143,7 +143,7 @@ namespace build2
         xchar c (get ());
         uint64_t ln (c.line), cn (c.column);
 
-        auto make_token = [sep, ln, cn] (type t)
+        auto make_token = [&sep, ln, cn] (type t)
         {
           return token (t, sep, ln, cn, token_printer);
         };
@@ -173,6 +173,7 @@ namespace build2
               if (m == lexer_mode::variable_line)
                 state_.pop ();
 
+              sep = true; // Treat newline as always separated.
               return make_token (type::newline);
             }
 
@@ -426,7 +427,7 @@ namespace build2
         {
           get ();
           state_.pop (); // Expire the description mode.
-          return token (type::newline, false, ln, cn, token_printer);
+          return token (type::newline, true, ln, cn, token_printer);
         }
 
         string lexeme;
diff --git a/build2/test/script/parser b/build2/test/script/parser
index 6e29c9a..b711676 100644
--- a/build2/test/script/parser
+++ b/build2/test/script/parser
@@ -65,6 +65,12 @@ namespace build2
                         optional<description>&&,
                         lines* = nullptr);
 
+        void
+        parse_directive_line (token&, token_type&);
+
+        void
+        perform_include (names, location);
+
         bool
         parse_variable_line (token&, token_type&);
 
@@ -104,19 +110,23 @@ namespace build2
       protected:
         using base_parser = build2::parser;
 
-        lexer* lexer_;
         script* script_;
-        runner* runner_;
 
         // Pre-parse state.
         //
         using id_map = std::unordered_map<string, location>;
+        using include_set = std::set<path>;
 
         group* group_;
         id_map* id_map_;
+        include_set* include_set_; // Testscripts already included in this
+                                   // scope. Must be absolute and normalized.
+        lexer* lexer_;
+        string id_prefix_; // Auto-derived id prefix.
 
         // Parse state.
         //
+        runner* runner_;
         scope* scope_;
       };
     }
diff --git a/build2/test/script/parser.cxx b/build2/test/script/parser.cxx
index f07347b..da175ca 100644
--- a/build2/test/script/parser.cxx
+++ b/build2/test/script/parser.cxx
@@ -39,7 +39,7 @@ namespace build2
       pre_parse (istream& is, testscript& ts, target& tg, const dir_path& wd)
       {
         script s (tg, ts, wd);
-        path_ = &*s.testscripts_.insert (ts.path ()).first;
+        path_ = &*s.paths_.insert (ts.path ()).first;
 
         pre_parse_ = true;
 
@@ -47,12 +47,16 @@ namespace build2
         lexer_ = &l;
         base_parser::lexer_ = &l;
 
+        id_prefix_.clear ();
+
         id_map idm;
+        include_set ins;
 
         script_ = &s;
         runner_ = nullptr;
         group_ = script_;
         id_map_ = &idm;
+        include_set_ = &ins;
         scope_ = nullptr;
 
         // Start location of the implied script group is the beginning of
@@ -84,6 +88,7 @@ namespace build2
         runner_ = &r;
         group_ = nullptr;
         id_map_ = nullptr;
+        include_set_ = nullptr;
         scope_ = &sc;
 
         parse_scope_body ();
@@ -135,17 +140,24 @@ namespace build2
                 fail (t) << "expected newline after '{'";
 
               // Push group. If there is no user-supplied id, use the line
-              // number as the scope id.
+              // number (prefixed with include id) as the scope id.
               //
-              const string& id (d && !d->id.empty ()
-                                ? d->id
-                                : insert_id (to_string (sl.line), sl));
+              const string& id (
+                d && !d->id.empty ()
+                ? d->id
+                : insert_id (id_prefix_ + to_string (sl.line), sl));
+
               id_map idm;
+              include_set ins;
+
               unique_ptr<group> g (new group (id, *group_));
 
               id_map* om (id_map_);
               id_map_ = &idm;
 
+              include_set* os (include_set_);
+              include_set_ = &ins;
+
               group* og (group_);
               group_ = g.get ();
 
@@ -158,6 +170,7 @@ namespace build2
               // Pop group.
               //
               group_ = og;
+              include_set_ = os;
               id_map_ = om;
 
               // Drop empty scopes.
@@ -521,15 +534,20 @@ namespace build2
           }
         default:
           {
-            // Either test command or variable assignment.
+            // Either directive, variable assignment, or test command.
             //
             replay_save (); // Start saving tokens from the current one.
             next (t, tt);
 
-            // Decide whether this is a variable assignment or a command. It
-            // is an assignment if the first token is an unquoted word and
-            // the next is an assign/append/prepend operator. Assignment to
-            // a computed variable name must use the set builtin.
+            // Decide whether this is a variable assignment/directive or a
+            // command.
+            //
+            // It is a directive if the first token is an unquoted directive
+            // name that is separated from the next token (think .include$x).
+            //
+            // It is an assignment if the first token is an unquoted name and
+            // the next is an assign/append/prepend operator. Assignment to a
+            // computed variable name must use the set builtin.
             //
             if (tt == type::word && !t.quoted)
             {
@@ -541,9 +559,23 @@ namespace build2
               mode (lexer_mode::second_token);
               type p (peek ());
 
-              if (p == type::assign  ||
-                  p == type::prepend ||
-                  p == type::append)
+              if (peeked ().separated && t.value == ".include")
+              {
+                replay_stop (); // Stop replay and discard the data.
+
+                // Make sure we are not inside a test (i.e., after semi).
+                //
+                if (ls != nullptr)
+                  fail (ll) << "directive after ';'";
+
+                parse_directive_line (t, tt);
+                assert (tt == type::newline);
+
+                return nullopt;
+              }
+              else if (p == type::assign  ||
+                       p == type::prepend ||
+                       p == type::append)
               {
                 lt = line_type::variable;
                 break;
@@ -696,12 +728,13 @@ namespace build2
         //
         if (ls == &tests)
         {
-          // If there is no user-supplied id, use the line number as the scope
-          // id.
+          // If there is no user-supplied id, use the line number (prefixed
+          // with include id) as the scope id.
           //
-          const string& id (d && !d->id.empty ()
-                            ? d->id
-                            : insert_id (to_string (ll.line), ll));
+          const string& id (
+            d && !d->id.empty ()
+            ? d->id
+            : insert_id (id_prefix_ + to_string (ll.line), ll));
 
           unique_ptr<test> p (new test (id, *group_));
 
@@ -718,6 +751,131 @@ namespace build2
           return d;
       }
 
+      void parser::
+      parse_directive_line (token& t, type& tt)
+      {
+        string d (t.value);
+        location l (get_location (t));
+        next (t, tt);
+
+        // Suspend pre-parsing since we want to really parse the line, with
+        // expansion, etc. Also parse the whole line in one go.
+        //
+        names args;
+
+        if (tt != type::newline)
+        {
+          pre_parse_ = false;
+          args = parse_names (t, tt, false, "directive argument", nullptr);
+          pre_parse_ = true;
+        }
+
+        if (tt != type::newline)
+          fail (t) << t << " after directive";
+
+        if (d == ".include")
+          perform_include (move (args), move (l));
+        else
+          assert (false); // Unhandled directive.
+      }
+
+      void parser::
+      perform_include (names args, location dl)
+      {
+        auto i (args.begin ());
+
+        // Process options.
+        //
+        bool once (false);
+        for (; i != args.end () && i->simple (); ++i)
+        {
+          if (i->value == "--once")
+            once = true;
+          else
+            break;
+        }
+
+        // Process arguments.
+        //
+        auto include = [&dl, once, this] (string n) // throw invalid_path
+        {
+          // It may be tempting to use relative paths in diagnostics but it
+          // most likely will be misguided.
+          //
+          auto enter_path = [this] (string n) -> const path&
+          {
+            path p (move (n));
+
+            if (p.relative ())
+              p = path_->directory () / p;
+
+            p.normalize ();
+
+            return *script_->paths_.insert (move (p)).first;
+          };
+
+          const path& p (enter_path (move (n)));
+
+          if (include_set_->insert (p).second || !once)
+          {
+            try
+            {
+              ifdstream ifs (p);
+              lexer l (ifs, p, lexer_mode::script_line);
+
+              const path* op (path_);
+              path_ = &p;
+
+              lexer* ol (lexer_);
+              lexer_ = &l;
+              base_parser::lexer_ = &l;
+
+              string oip (id_prefix_);
+              id_prefix_ += to_string (dl.line);
+              id_prefix_ += '-';
+              id_prefix_ += p.leaf ().base ().string ();
+              id_prefix_ += '-';
+
+              token t (pre_parse_scope_body ());
+
+              if (t.type != type::eos)
+                fail (t) << "stray " << t;
+
+              id_prefix_ = oip;
+              base_parser::lexer_ = ol;
+              lexer_ = ol;
+              path_ = op;
+            }
+            catch (const io_error& e)
+            {
+              fail (dl) << "unable to read testscript " << p << ": "
+                        << e.what ();
+            }
+          }
+        };
+
+        for (; i != args.end (); ++i)
+        {
+          name& n (*i);
+
+          try
+          {
+            if (n.simple () && !n.empty ())
+            {
+              include (move (n.value));
+              continue;
+            }
+          }
+          catch (const invalid_path&) {} // Fall through.
+
+          {
+            diag_record dr (fail (dl));
+            dr << "invalid testscript include path ";
+            to_stream (dr.os, n, true); // Quote.
+          }
+        }
+      }
+
       // Return true if the string contains only digit characters (used to
       // detect the special $NN variables).
       //
@@ -1358,7 +1516,9 @@ namespace build2
                 }
                 catch (const invalid_argument&)
                 {
-                  fail (l) << "invalid string value '" << n << "'";
+                  diag_record dr (fail (l));
+                  dr << "invalid string value ";
+                  to_stream (dr.os, n, true); // Quote.
                 }
 
                 // If it is a quoted chunk, then we add the word as is.
@@ -1650,6 +1810,7 @@ namespace build2
         // The next chunk should be the exit status.
         //
         next (t, tt);
+        location l (get_location (t));
         names ns (parse_names (t, tt, true, "exit status", nullptr));
         unsigned long es (256);
 
@@ -1663,8 +1824,14 @@ namespace build2
           catch (const exception&) {} // Fall through.
 
           if (es > 255)
-            fail (t) << "expected exit status instead of '" << ns << "'" <<
-              info << "exit status is an unsigned integer less than 256";
+          {
+            diag_record dr;
+
+            dr << fail (l) << "expected exit status instead of ";
+            to_stream (dr.os, ns, true); // Quote.
+
+            dr << info << "exit status is an unsigned integer less than 256";
+          }
         }
 
         return command_exit {comp, static_cast<uint8_t> (es)};
@@ -1833,6 +2000,12 @@ namespace build2
         if (!qual.empty ())
           fail (loc) << "qualified variable name";
 
+        // If we have no scope (happens when pre-parsing directives), then we
+        // only look for buildfile variables.
+        //
+        if (scope_ == nullptr)
+          return script_->find_in_buildfile (name);
+
         // @@ MT: will need RW mutex on var_pool. Or maybe if it's not there
         // then it can't possibly be found? Still will be setting variables.
         //
diff --git a/build2/test/script/script b/build2/test/script/script
index aff5a59..c5be0c0 100644
--- a/build2/test/script/script
+++ b/build2/test/script/script
@@ -256,6 +256,11 @@ namespace build2
         lookup
         find (const variable&) const;
 
+        // As above but only look for buildfile variables.
+        //
+        lookup
+        find_in_buildfile (const string&) const;
+
         // Return a value suitable for assignment. If the variable does not
         // exist in this scope's map, then a new one with the NULL value is
         // added and returned. Otherwise the existing value is returned.
@@ -295,11 +300,6 @@ namespace build2
 
         location start_loc_;
         location end_loc_;
-
-        // Set of testscript files already included in this scope. Paths must
-        // be absolute and normalized.
-        //
-        std::set<path> testscripts_;
       };
 
       // group
@@ -379,6 +379,16 @@ namespace build2
       public:
         target& test_target;       // Target we are testing.
         testscript& script_target; // Target of the testscript file.
+
+        // Pre-parse data.
+        //
+      private:
+        friend class parser;
+
+        // Testscript file paths. Specifically, replay_token::file points to
+        // these paths.
+        //
+        std::set<path> paths_;
       };
     }
   }
diff --git a/build2/test/script/script.cxx b/build2/test/script/script.cxx
index 2679cb7..cfc1d91 100644
--- a/build2/test/script/script.cxx
+++ b/build2/test/script/script.cxx
@@ -435,46 +435,51 @@ namespace build2
         }
         while ((p->parent != nullptr ? (p = p->parent) : nullptr) != nullptr);
 
+        return find_in_buildfile (var.name);
+      }
+
+
+      lookup scope::
+      find_in_buildfile (const string& n) const
+      {
         // Switch to the corresponding buildfile variable. Note that we don't
         // want to insert a new variable into the pool (we might be running
         // concurrently). Plus, if there is no such variable, then we cannot
         // possibly find any value.
         //
-        const variable* pvar (build2::var_pool.find (var.name));
+        const variable* pvar (build2::var_pool.find (n));
 
         if (pvar == nullptr)
           return lookup ();
 
-        const script& s (static_cast<const script&> (*p));
-        {
-          const variable& var (*pvar);
+        const script& s (static_cast<const script&> (*root));
+        const variable& var (*pvar);
 
-          // First check the target we are testing.
+        // First check the target we are testing.
+        //
+        {
+          // Note that we skip applying the override if we did not find any
+          // value. In this case, presumably the override also affects the
+          // script target and we will pick it up there. A bit fuzzy.
           //
-          {
-            // Note that we skip applying the override if we did not find any
-            // value. In this case, presumably the override also affects the
-            // script target and we will pick it up there. A bit fuzzy.
-            //
-            auto p (s.test_target.find_original (var, true));
+          auto p (s.test_target.find_original (var, true));
 
-            if (p.first)
-            {
-              if (var.override != nullptr)
-                p = s.test_target.base_scope ().find_override (
-                  var, move (p), true);
+          if (p.first)
+          {
+            if (var.override != nullptr)
+              p = s.test_target.base_scope ().find_override (
+                var, move (p), true);
 
-              return p.first;
-            }
+            return p.first;
           }
-
-          // Then the script target followed by the scopes it is in. Note that
-          // while unlikely it is possible the test and script targets will be
-          // in different scopes which brings the question of which scopes we
-          // should search.
-          //
-          return s.script_target[var];
         }
+
+        // Then the script target followed by the scopes it is in. Note that
+        // while unlikely it is possible the test and script targets will be
+        // in different scopes which brings the question of which scopes we
+        // should search.
+        //
+        return s.script_target[var];
       }
 
       value& scope::
diff --git a/doc/testscript.cli b/doc/testscript.cli
index d055b58..913bd18 100644
--- a/doc/testscript.cli
+++ b/doc/testscript.cli
@@ -728,7 +728,7 @@ test:
   *((variable-line|test-line) ';')
   test-line (':' <text>)?
 
-include: 'include' '--once'? <path>+
+include: '.include'(' '+'--once')*(' '+<path>)*
 
 description:
   +(':' <text>)
@@ -801,8 +801,10 @@ interleaving with scopes and tests, it can be used anywhere in the scope
 body. It can also contain several parts of a scope, for example, setup and
 test lines.
 
-The \c{--once} option signals that files that have already been included
-in this scope should not be included again.
+The \c{--once} option signals that files that have already been included in
+this scope should not be included again. The implementation is not required to
+handle links when determining if two paths are to the same file. Relative
+paths are assumed to be relative to the including testscript.
 
 Note that \c{include} is a directive, not a command. It is performed during
 parsing before any command is executed or testscript variable assigned. You
@@ -1089,11 +1091,13 @@ If an id is not specified then it is automatically derived from the test or
 test group location. If the test or test group is contained directly in the
 top-level testscript file, then just its start line number is used as an id.
 Otherwise, if the test or test group reside in an included file, then the
-start line number is prefixed with that file name (without the extension) in
-the form \c{<file>-<line>}. The start line for a block (either test or group)
-is the line containing opening curly brace (\c{{}) and for a simple test \-
-the test line itself.
+start line number (inside the included file) is prefixed with the line number
+of the \c{.include} directive followed by the included file name (without the
+extension) in the form \c{<line>-<file>-}. This process is repeated
+recursively for nested inclusion.
 
+The start line for a block (either test or group) is the line containing
+opening brace (\c{{}) and for a simple test \- the test line itself.
 
 \h#grammar-directives|Directives|
 
diff --git a/unit-tests/test/script/parser/buildfile b/unit-tests/test/script/parser/buildfile
index 60c556c..f87f7e8 100644
--- a/unit-tests/test/script/parser/buildfile
+++ b/unit-tests/test/script/parser/buildfile
@@ -12,6 +12,6 @@ test/{target script/{token lexer parser script}}
 
 exe{driver}: cxx{driver} ../../../../build2/cxx{$src} $libs \
 test{cleanup command-re-parse description exit expansion here-document \
-     here-string pipe-expr pre-parse redirect scope setup-teardown}
+     here-string include pipe-expr pre-parse redirect scope setup-teardown}
 
 include ../../../../build2/
diff --git a/unit-tests/test/script/parser/include.test b/unit-tests/test/script/parser/include.test
new file mode 100644
index 0000000..8210172
--- /dev/null
+++ b/unit-tests/test/script/parser/include.test
@@ -0,0 +1,140 @@
+: not-directive
+:
+$* <<EOI >>EOO
+x =
+".include" foo.test
+.include\$x foo.test
+EOI
+.include foo.test
+.include foo.test
+EOO
+
+: none
+:
+$* <<EOI
+.include
+.include --once
+EOI
+
+: empty
+:
+touch foo.test &foo.test; #@@ TMP
+$* <<EOI
+.include foo.test
+.include --once foo.test
+EOI
+
+: one
+:
+cat <"cmd" >>>foo.test;
+$* <<EOI >>EOO
+.include foo.test
+EOI
+cmd
+EOO
+
+: multiple
+:
+cat <"cmd foo" >>>foo.test;
+cat <"cmd bar" >>>bar.test;
+$* <<EOI >>EOO
+.include foo.test bar.test
+EOI
+cmd foo
+cmd bar
+EOO
+
+: once
+:
+cat <"cmd" >>>foo.test;
+$* <<EOI >>EOO
+.include foo.test
+x
+.include --once foo.test
+.include --once bar/../foo.test
+y
+.include ../once/foo.test
+EOI
+cmd
+x
+y
+cmd
+EOO
+
+: group-id
+:
+cat <<EOI >>>foo.test;
+{
+  x = b
+}
+EOI
+$* -s -i <<EOI >>EOO
+x = a
+.include foo.test
+EOI
+{
+  { # 2-foo-1
+  }
+}
+EOO
+
+: test-id
+:
+cat <<EOI >>>foo.test;
+cmd
+EOI
+$* -s -i <<EOI >>EOO
+x = a
+.include foo.test
+EOI
+{
+  { # 2-foo-1
+    cmd
+  }
+}
+EOO
+
+: var-expansion
+:
+cat <<EOI >>>foo-$(build.version).test;
+cmd
+EOI
+$* <<EOI >>EOO
+.include foo-\$\(build.version\).test
+EOI
+cmd
+EOO
+
+: after-semi
+:
+$* <<EOI 2>>EOE != 0
+cmd;
+.include foo.test
+EOI
+testscript:2:1: error: directive after ';'
+EOE
+
+: semi-after
+:
+$* <<EOI 2>>EOE != 0
+.include foo.test;
+cmd
+EOI
+testscript:1:18: error: ';' after directive
+EOE
+
+: invalid-path
+:
+$* <<EOI 2>>EOE != 0
+.include ""
+EOI
+testscript:1:1: error: invalid testscript include path ''
+EOE
+
+: unable-open
+:
+: Note that the error message is platform specific.
+:
+$* <<EOI 2>- != 0
+.include foo.test
+EOI
-- 
cgit v1.1