From 630b498533f5a9a1e9d40893f4806ef855f1e03b Mon Sep 17 00:00:00 2001 From: Boris Kolpackov Date: Fri, 1 May 2020 17:09:59 +0200 Subject: Fix outstanding issue with directive vs assignment differentiation Specifically, now the following does the right thing: print +foo --- libbuild2/lexer.cxx | 23 ++++++++++++++++++----- libbuild2/lexer.hxx | 15 ++++++++++----- libbuild2/parser.cxx | 20 ++++++++++++++------ tests/directive/parsing.testscript | 23 +++++++++++++++++++++++ 4 files changed, 65 insertions(+), 16 deletions(-) create mode 100644 tests/directive/parsing.testscript diff --git a/libbuild2/lexer.cxx b/libbuild2/lexer.cxx index 5ffac54..c0cadd3 100644 --- a/libbuild2/lexer.cxx +++ b/libbuild2/lexer.cxx @@ -11,12 +11,26 @@ namespace build2 { using type = token_type; - pair lexer:: - peek_char () + pair, bool> lexer:: + peek_chars () { sep_ = skip_spaces (); - xchar c (peek ()); - return make_pair (eos (c) ? '\0' : char (c), sep_); + char r[2] = {'\0', '\0'}; + + xchar c0 (peek ()); + if (!eos (c0)) + { + get (c0); + r[0] = c0; + + xchar c1 (peek ()); + if (!eos (c1)) + r[1] = c1; + + unget (c0); + } + + return make_pair (make_pair (r[0], r[1]), sep_); } void lexer:: @@ -31,7 +45,6 @@ namespace build2 bool n (true); // newline bool q (true); // quotes - if (!esc) { assert (!state_.empty ()); diff --git a/libbuild2/lexer.hxx b/libbuild2/lexer.hxx index e9a3149..02112cb 100644 --- a/libbuild2/lexer.hxx +++ b/libbuild2/lexer.hxx @@ -142,12 +142,17 @@ namespace build2 virtual token next (); - // Peek at the first character of the next token. Return the character - // or '\0' if the next token will be eos. Also return an indicator of - // whether the next token will be separated. + // Peek at the first two characters of the next token(s). Return the + // characters or '\0' if either would be eos. Also return an indicator of + // whether the next token would be separated. Note: cannot be used to peek + // at the first character of a line. // - pair - peek_char (); + // Note also that it assumes that the current mode and the potential new + // mode in which these characters will actually be parsed use the same + // whitespace separation (the sep_space and sep_newline values). + // + pair, bool> + peek_chars (); protected: struct state diff --git a/libbuild2/parser.cxx b/libbuild2/parser.cxx index 1432d4b..f0eac97 100644 --- a/libbuild2/parser.cxx +++ b/libbuild2/parser.cxx @@ -5747,7 +5747,7 @@ namespace build2 // // - it is not quoted [so a keyword can always be escaped] and // - next token is '\n' (or eos) or '(' [so if(...) will work] or - // - next token is separated and is not '=', '=+', or '+=' [which + // - next token is separated and is not '=', '=+', '+=', or '?=' [which // means a "directive trailer" can never start with one of them]. // // See tests/keyword. @@ -5757,15 +5757,23 @@ namespace build2 // We cannot peek at the whole token here since it might have to be // lexed in a different mode. So peek at its first character. // - pair p (lexer_->peek_char ()); - char c (p.first); + pair, bool> p (lexer_->peek_chars ()); + char c0 (p.first.first); + char c1 (p.first.second); - // @@ Just checking for leading '+' is not sufficient, for example: + // Note that just checking for leading '+'/'?' is not sufficient, for + // example: // // print +foo // - return c == '\n' || c == '\0' || c == '(' || - (p.second && c != '=' && c != '+'); + // So wepeek at one more character since what we expect next ('=') can't + // be whitespace-separated. + // + return c0 == '\n' || c0 == '\0' || c0 == '(' || + (p.second && + c0 != '=' && + (c0 != '+' || c1 != '=') && + (c0 != '?' || c1 != '=')); } return false; diff --git a/tests/directive/parsing.testscript b/tests/directive/parsing.testscript new file mode 100644 index 0000000..04dd054 --- /dev/null +++ b/tests/directive/parsing.testscript @@ -0,0 +1,23 @@ +# file : tests/directive/assert.testscript +# license : MIT; see accompanying LICENSE file + +# Test overall directive parsing. +# + +.include ../common.testscript + +: assign +: Test differentiation with variable assignment. +: +{ + # Note: ? is expanded as pattern. + + $* <'print +foo' >'+foo' : plus + $* <'print ?foo' >'' : ques + + $* <'print + foo' >'+ foo' : plus-ws-eq + $* <'print ? foo' >'foo' : ques-ws-eq + + $* <'print +' >'+' : plus-only + $* <'print ?' >'' : ques-only +} -- cgit v1.1