aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBoris Kolpackov <boris@codesynthesis.com>2020-05-04 07:27:47 +0200
committerBoris Kolpackov <boris@codesynthesis.com>2020-05-27 08:35:29 +0200
commita54abb2f4e5e66877619097bfd281261f99c5103 (patch)
treeb10a24fd2c5d1d1dd2602f25cc7228b287200cbb
parente63b427c51e37135e50dec9435659d661872fe95 (diff)
Add recognition for line-leading `%` as token
-rw-r--r--libbuild2/context.cxx4
-rw-r--r--libbuild2/lexer+normal.test.testscript36
-rw-r--r--libbuild2/lexer.cxx46
-rw-r--r--libbuild2/lexer.hxx37
-rw-r--r--libbuild2/parser.cxx5
-rw-r--r--libbuild2/test/script/lexer.cxx2
-rw-r--r--libbuild2/token.cxx1
-rw-r--r--libbuild2/token.hxx1
8 files changed, 104 insertions, 28 deletions
diff --git a/libbuild2/context.cxx b/libbuild2/context.cxx
index 0be0046..fe046ae 100644
--- a/libbuild2/context.cxx
+++ b/libbuild2/context.cxx
@@ -340,8 +340,10 @@ namespace build2
// And so the first token should be a word which can be either a
// variable name (potentially with the directory qualification) or just
// the directory, in which case it should be followed by another word
- // (unqualified variable name).
+ // (unqualified variable name). To avoid treating any of the visibility
+ // modifiers as special we use the cmdvar mode.
//
+ l.mode (lexer_mode::cmdvar);
token t (l.next ());
optional<dir_path> dir;
diff --git a/libbuild2/lexer+normal.test.testscript b/libbuild2/lexer+normal.test.testscript
index c9448c3..e66b81e 100644
--- a/libbuild2/lexer+normal.test.testscript
+++ b/libbuild2/lexer+normal.test.testscript
@@ -34,3 +34,39 @@ $* <:'x?=y' >>EOO
?=
'y'
EOO
+
+: percent
+: Leading percent sign recognition.
+:
+{
+ : first
+ :
+ $* <:'%%' >>EOO
+ %
+ '%'
+ EOO
+
+ : space
+ :
+ $* <:' %%' >>EOO
+ %
+ '%'
+ EOO
+
+ : newline
+ :
+ $* <<EOI >>EOO
+
+ %%
+ EOI
+ %
+ '%'
+ <newline>
+ EOO
+
+ : non-token
+ :
+ $* <:'x%' >>EOO
+ 'x%'
+ EOO
+}
diff --git a/libbuild2/lexer.cxx b/libbuild2/lexer.cxx
index c0cadd3..1e400e3 100644
--- a/libbuild2/lexer.cxx
+++ b/libbuild2/lexer.cxx
@@ -14,7 +14,10 @@ namespace build2
pair<pair<char, char>, bool> lexer::
peek_chars ()
{
- sep_ = skip_spaces ();
+ auto p (skip_spaces ());
+ assert (!p.second);
+ sep_ = p.first;
+
char r[2] = {'\0', '\0'};
xchar c0 (peek ());
@@ -54,7 +57,11 @@ namespace build2
switch (m)
{
case lexer_mode::normal:
+ case lexer_mode::cmdvar:
{
+ // Note: `%` is only recognized at the beginning of the line so it
+ // should not be included here.
+ //
a = true;
s1 = ":<>=+? $(){}#\t\n";
s2 = " == ";
@@ -148,6 +155,7 @@ namespace build2
switch (m)
{
case lexer_mode::normal:
+ case lexer_mode::cmdvar:
case lexer_mode::value:
case lexer_mode::values:
case lexer_mode::switch_expressions:
@@ -161,7 +169,9 @@ namespace build2
default: assert (false); // Unhandled custom mode.
}
- bool sep (skip_spaces ());
+ pair<bool, bool> skip (skip_spaces ());
+ bool sep (skip.first); // Separated from a previous character.
+ bool first (skip.second); // First non-whitespace character of a line.
xchar c (get ());
uint64_t ln (c.line), cn (c.column);
@@ -209,7 +219,8 @@ namespace build2
m == lexer_mode::case_patterns)
state_.pop ();
- // Re-enable attributes in the normal mode.
+ // Re-enable attributes in the normal mode (should never be needed in
+ // cmdvar).
//
if (state_.top ().mode == lexer_mode::normal)
state_.top ().attributes = true;
@@ -230,6 +241,14 @@ namespace build2
}
}
+ if (m == lexer_mode::normal && first)
+ {
+ switch (c)
+ {
+ case '%': return make_token (type::percent);
+ }
+ }
+
// The following characters are special in all modes except attributes.
//
if (m != lexer_mode::attributes && m != lexer_mode::attribute_value)
@@ -267,6 +286,7 @@ namespace build2
// switch_expressions modes.
//
if (m == lexer_mode::normal ||
+ m == lexer_mode::cmdvar ||
m == lexer_mode::switch_expressions ||
m == lexer_mode::case_patterns)
{
@@ -278,7 +298,8 @@ namespace build2
// The following characters are special in the normal mode.
//
- if (m == lexer_mode::normal)
+ if (m == lexer_mode::normal ||
+ m == lexer_mode::cmdvar)
{
switch (c)
{
@@ -315,7 +336,8 @@ namespace build2
// The following characters are special in the normal mode.
//
- if (m == lexer_mode::normal)
+ if (m == lexer_mode::normal ||
+ m == lexer_mode::cmdvar)
{
switch (c)
{
@@ -361,7 +383,7 @@ namespace build2
// This mode is quite a bit like the value mode when it comes to special
// characters, except that we have some of our own.
- bool sep (skip_spaces ());
+ bool sep (skip_spaces ().first);
xchar c (get ());
if (eos (c))
@@ -728,7 +750,7 @@ namespace build2
return token (move (lexeme), sep, qtype, qcomp, ln, cn);
}
- bool lexer::
+ pair<bool, bool> lexer::
skip_spaces ()
{
bool r (sep_);
@@ -739,7 +761,7 @@ namespace build2
// In some special modes we don't skip spaces.
//
if (!s.sep_space)
- return r;
+ return make_pair (r, false);
xchar c (peek ());
bool start (c.column == 1);
@@ -758,6 +780,8 @@ namespace build2
{
// In some modes we treat newlines as ordinary spaces.
//
+ // Note that in this case we don't adjust start.
+ //
if (!s.sep_newline)
{
r = true;
@@ -772,7 +796,7 @@ namespace build2
break;
}
- return r;
+ return make_pair (r, start);
}
case '#':
{
@@ -833,12 +857,12 @@ namespace build2
}
// Fall through.
default:
- return r; // Not a space.
+ return make_pair (r, start); // Not a space.
}
get ();
}
- return r;
+ return make_pair (r, start);
}
}
diff --git a/libbuild2/lexer.hxx b/libbuild2/lexer.hxx
index 02112cb..c7e96fb 100644
--- a/libbuild2/lexer.hxx
+++ b/libbuild2/lexer.hxx
@@ -20,17 +20,18 @@
namespace build2
{
- // Context-dependent lexing mode. Quoted modes are internal and should not
- // be set explicitly. In the value mode we don't treat certain characters
- // (e.g., `+`, `=`) as special so that we can use them in the variable
- // values, e.g., `foo = g++`. In contrast, in the variable mode, we restrict
- // certain character (e.g., `/`) from appearing in the name. The values mode
- // is like value but recogizes `,` as special (used in contexts where we
- // need to list multiple values). The attributes/attribute_value modes are
- // like values where each value is potentially a variable assignment; they
- // don't treat `{` and `}` as special (so we cannot have name groups in
- // attributes) as well as recognizes `=` and `]`. The eval mode is used in
- // the evaluation context.
+ // Context-dependent lexing mode.
+ //
+ // Quoted modes are internal and should not be set explicitly. In the value
+ // mode we don't treat certain characters (e.g., `+`, `=`) as special so
+ // that we can use them in the variable values, e.g., `foo = g++`. In
+ // contrast, in the variable mode, we restrict certain character (e.g., `/`)
+ // from appearing in the name. The values mode is like value but recogizes
+ // `,` as special (used in contexts where we need to list multiple
+ // values). The attributes/attribute_value modes are like values where each
+ // value is potentially a variable assignment; they don't treat `{` and `}`
+ // as special (so we cannot have name groups in attributes) as well as
+ // recognizes `=` and `]`. The eval mode is used in the evaluation context.
//
// A number of modes are "derived" from the value/values mode by recognizing
// a few extra characters:
@@ -42,6 +43,9 @@ namespace build2
// split words separated by the pair character (to disable pairs one can
// pass `\0` as a pair character).
//
+ // The normal mode recognizes `%` at the beginning of the line as special.
+ // The cmdvar mode is like normal but does not treat `%` as special.
+ //
// The alternative modes must be set manually. The value/values and derived
// modes automatically expires after the end of the line. The attribute mode
// expires after the closing `]`. The variable mode expires after the word
@@ -70,6 +74,7 @@ namespace build2
enum
{
normal = base_type::value_next,
+ cmdvar,
variable,
value,
values,
@@ -189,11 +194,13 @@ namespace build2
virtual token
word (state current, bool separated);
- // Return true if we have seen any spaces. Skipped empty lines
- // don't count. In other words, we are only interested in spaces
- // that are on the same line as the following non-space character.
+ // Return true in first if we have seen any spaces. Skipped empty lines
+ // don't count. In other words, we are only interested in spaces that are
+ // on the same line as the following non-space character. Return true in
+ // second if we have started skipping spaces from column 1 (note that
+ // if this mode does not skip spaces, then second will always be false).
//
- bool
+ pair<bool, bool>
skip_spaces ();
// Diagnostics.
diff --git a/libbuild2/parser.cxx b/libbuild2/parser.cxx
index e87ca95..000670b 100644
--- a/libbuild2/parser.cxx
+++ b/libbuild2/parser.cxx
@@ -335,6 +335,11 @@ namespace build2
while (tt != type::eos && !(one && parsed))
{
+ // Issue better diagnostics for stray `%`.
+ //
+ if (tt == type::percent)
+ fail (t) << "recipe without target";
+
// Extract attributes if any.
//
assert (attributes_.empty ());
diff --git a/libbuild2/test/script/lexer.cxx b/libbuild2/test/script/lexer.cxx
index 26d77b5..4683bc7 100644
--- a/libbuild2/test/script/lexer.cxx
+++ b/libbuild2/test/script/lexer.cxx
@@ -174,7 +174,7 @@ namespace build2
token lexer::
next_line ()
{
- bool sep (skip_spaces ());
+ bool sep (skip_spaces ().first);
xchar c (get ());
uint64_t ln (c.line), cn (c.column);
diff --git a/libbuild2/token.cxx b/libbuild2/token.cxx
index 4975a02..11b080e 100644
--- a/libbuild2/token.cxx
+++ b/libbuild2/token.cxx
@@ -24,6 +24,7 @@ namespace build2
case token_type::colon: os << q << ':' << q; break;
case token_type::dollar: os << q << '$' << q; break;
case token_type::question: os << q << '?' << q; break;
+ case token_type::percent: os << q << '%' << q; break;
case token_type::comma: os << q << ',' << q; break;
case token_type::lparen: os << q << '(' << q; break;
diff --git a/libbuild2/token.hxx b/libbuild2/token.hxx
index e48c088..8dad4ba 100644
--- a/libbuild2/token.hxx
+++ b/libbuild2/token.hxx
@@ -36,6 +36,7 @@ namespace build2
colon, // :
dollar, // $
question, // ?
+ percent, // %
comma, // ,
lparen, // (