From 630b498533f5a9a1e9d40893f4806ef855f1e03b Mon Sep 17 00:00:00 2001
From: Boris Kolpackov <boris@codesynthesis.com>
Date: Fri, 1 May 2020 17:09:59 +0200
Subject: Fix outstanding issue with directive vs assignment differentiation

Specifically, now the following does the right thing:

print +foo
---
 libbuild2/lexer.cxx                | 23 ++++++++++++++++++-----
 libbuild2/lexer.hxx                | 15 ++++++++++-----
 libbuild2/parser.cxx               | 20 ++++++++++++++------
 tests/directive/parsing.testscript | 23 +++++++++++++++++++++++
 4 files changed, 65 insertions(+), 16 deletions(-)
 create mode 100644 tests/directive/parsing.testscript

diff --git a/libbuild2/lexer.cxx b/libbuild2/lexer.cxx
index 5ffac54..c0cadd3 100644
--- a/libbuild2/lexer.cxx
+++ b/libbuild2/lexer.cxx
@@ -11,12 +11,26 @@ namespace build2
 {
   using type = token_type;
 
-  pair<char, bool> lexer::
-  peek_char ()
+  pair<pair<char, char>, bool> lexer::
+  peek_chars ()
   {
     sep_ = skip_spaces ();
-    xchar c (peek ());
-    return make_pair (eos (c) ? '\0' : char (c), sep_);
+    char r[2] = {'\0', '\0'};
+
+    xchar c0 (peek ());
+    if (!eos (c0))
+    {
+      get (c0);
+      r[0] = c0;
+
+      xchar c1 (peek ());
+      if (!eos (c1))
+        r[1] = c1;
+
+      unget (c0);
+    }
+
+    return make_pair (make_pair (r[0], r[1]), sep_);
   }
 
   void lexer::
@@ -31,7 +45,6 @@ namespace build2
     bool n (true); // newline
     bool q (true); // quotes
 
-
     if (!esc)
     {
       assert (!state_.empty ());
diff --git a/libbuild2/lexer.hxx b/libbuild2/lexer.hxx
index e9a3149..02112cb 100644
--- a/libbuild2/lexer.hxx
+++ b/libbuild2/lexer.hxx
@@ -142,12 +142,17 @@ namespace build2
     virtual token
     next ();
 
-    // Peek at the first character of the next token. Return the character
-    // or '\0' if the next token will be eos. Also return an indicator of
-    // whether the next token will be separated.
+    // Peek at the first two characters of the next token(s). Return the
+    // characters or '\0' if either would be eos. Also return an indicator of
+    // whether the next token would be separated. Note: cannot be used to peek
+    // at the first character of a line.
     //
-    pair<char, bool>
-    peek_char ();
+    // Note also that it assumes that the current mode and the potential new
+    // mode in which these characters will actually be parsed use the same
+    // whitespace separation (the sep_space and sep_newline values).
+    //
+    pair<pair<char, char>, bool>
+    peek_chars ();
 
   protected:
     struct state
diff --git a/libbuild2/parser.cxx b/libbuild2/parser.cxx
index 1432d4b..f0eac97 100644
--- a/libbuild2/parser.cxx
+++ b/libbuild2/parser.cxx
@@ -5747,7 +5747,7 @@ namespace build2
     //
     // - it is not quoted [so a keyword can always be escaped] and
     // - next token is '\n' (or eos) or '(' [so if(...) will work] or
-    // - next token is separated and is not '=', '=+', or '+=' [which
+    // - next token is separated and is not '=', '=+', '+=', or '?=' [which
     //   means a "directive trailer" can never start with one of them].
     //
     // See tests/keyword.
@@ -5757,15 +5757,23 @@ namespace build2
       // We cannot peek at the whole token here since it might have to be
       // lexed in a different mode. So peek at its first character.
       //
-      pair<char, bool> p (lexer_->peek_char ());
-      char c (p.first);
+      pair<pair<char, char>, bool> p (lexer_->peek_chars ());
+      char c0 (p.first.first);
+      char c1 (p.first.second);
 
-      // @@ Just checking for leading '+' is not sufficient, for example:
+      // Note that just checking for leading '+'/'?' is not sufficient, for
+      // example:
       //
       // print +foo
       //
-      return c == '\n' || c == '\0' || c == '(' ||
-        (p.second && c != '=' && c != '+');
+      // So wepeek at one more character since what we expect next ('=') can't
+      // be whitespace-separated.
+      //
+      return c0 == '\n' || c0 == '\0' || c0 == '(' ||
+        (p.second                 &&
+         c0 != '='                &&
+         (c0 != '+' || c1 != '=') &&
+         (c0 != '?' || c1 != '='));
     }
 
     return false;
diff --git a/tests/directive/parsing.testscript b/tests/directive/parsing.testscript
new file mode 100644
index 0000000..04dd054
--- /dev/null
+++ b/tests/directive/parsing.testscript
@@ -0,0 +1,23 @@
+# file      : tests/directive/assert.testscript
+# license   : MIT; see accompanying LICENSE file
+
+# Test overall directive parsing.
+#
+
+.include ../common.testscript
+
+: assign
+: Test differentiation with variable assignment.
+:
+{
+  # Note: ? is expanded as pattern.
+
+  $* <'print +foo' >'+foo' : plus
+  $* <'print ?foo' >''     : ques
+
+  $* <'print + foo' >'+ foo' : plus-ws-eq
+  $* <'print ? foo' >'foo'   : ques-ws-eq
+
+  $* <'print +' >'+' : plus-only
+  $* <'print ?' >''  : ques-only
+}
-- 
cgit v1.1