aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBoris Kolpackov <boris@codesynthesis.com>2015-03-06 09:15:40 +0200
committerBoris Kolpackov <boris@codesynthesis.com>2015-03-06 09:15:40 +0200
commit897a0e4fdf9ca90ee8d236a38e138a8ae6bc3627 (patch)
treed4c00de32d028823906d342fcd984faee8d977ff
parent9ef25ab2f9da89ab48ecce3fe1b8cbb0bc5f1e09 (diff)
Add support for lexing and parsing name pairs
We will need it for the buildspec and also if/when we support map variable types.
-rw-r--r--build/lexer30
-rw-r--r--build/lexer.cxx77
-rw-r--r--build/name4
-rw-r--r--build/name.cxx14
-rw-r--r--build/parser5
-rw-r--r--build/parser.cxx134
6 files changed, 218 insertions, 46 deletions
diff --git a/build/lexer b/build/lexer
index d6817f2..787ba72 100644
--- a/build/lexer
+++ b/build/lexer
@@ -15,11 +15,29 @@
namespace build
{
+ // Context-dependent lexing mode. In the value mode we don't treat
+ // certain characters (e.g., +, =) as special so that we can use
+ // them in the variable values, e.g., 'foo = g++'. In contrast,
+ // in the variable mode, we restrict certain character (e.g., /)
+ // from appearing in the name. The pairs mode is just like value
+ // except that we split names separated by '='. The pairs mode must
+ // be set manually.
+ //
+ enum class lexer_mode {normal, value, variable, pairs};
+
class lexer
{
public:
lexer (std::istream& is, const std::string& name): is_ (is), fail (name) {}
+ // Note: sets mode for the next token.
+ //
+ void
+ mode (lexer_mode m) {next_mode_ = m;}
+
+ lexer_mode
+ mode () const {return mode_;}
+
// Scanner.
//
token
@@ -108,15 +126,9 @@ namespace build
xchar buf_ {0, 0, 0};
bool eos_ {false};
-
- // Context-dependent lexing mode. In the value mode we don't treat
- // certain characters (e.g., +, =) as special so that we can use
- // them in the variable values, e.g., 'foo = g++'. In contrast,
- // in the variable mode, we restrict certain character (e.g., /)
- // from appearing in the name.
- //
- enum class mode {normal, value, variable};
- mode mode_ {mode::normal};
+ lexer_mode mode_ {lexer_mode::normal};
+ lexer_mode next_mode_; // Mode to switch to for the next token.
+ lexer_mode prev_mode_; // Mode to return to after this mode expires.
};
}
diff --git a/build/lexer.cxx b/build/lexer.cxx
index 5f394fc..9683567 100644
--- a/build/lexer.cxx
+++ b/build/lexer.cxx
@@ -11,6 +11,12 @@ namespace build
token lexer::
next ()
{
+ if (mode_ != next_mode_)
+ {
+ prev_mode_ = mode_;
+ mode_ = next_mode_;
+ }
+
bool sep (skip_spaces ());
xchar c (get ());
@@ -27,8 +33,8 @@ namespace build
{
// Restore the normal mode at the end of the line.
//
- if (mode_ == mode::value)
- mode_ = mode::normal;
+ if (mode_ == lexer_mode::value || mode_ == lexer_mode::pairs)
+ mode_ = next_mode_ = lexer_mode::normal;
return token (token_type::newline, sep, ln, cn);
}
@@ -42,7 +48,10 @@ namespace build
}
case '$':
{
- mode_ = mode::variable; // The next name is lexed in the var mode.
+ // The following name is lexed in the variable mode.
+ //
+ next_mode_ = lexer_mode::variable;
+
return token (token_type::dollar, sep, ln, cn);
}
case '(':
@@ -56,9 +65,9 @@ namespace build
}
// The following characters are not treated as special in the
- // value mode.
+ // value or pairs mode.
//
- if (mode_ != mode::value)
+ if (mode_ != lexer_mode::value && mode_ != lexer_mode::pairs)
{
// NOTE: remember to update name() if adding new punctuations.
//
@@ -68,22 +77,39 @@ namespace build
{
return token (token_type::colon, sep, ln, cn);
}
- case '=':
- {
- mode_ = mode::value;
- return token (token_type::equal, sep, ln, cn);
- }
case '+':
{
if (get () != '=')
fail (c) << "expected = after +";
- mode_ = mode::value;
+ next_mode_ = lexer_mode::value;
return token (token_type::plus_equal, sep, ln, cn);
}
}
}
+ // The following characters are not treated as special in the
+ // value mode.
+ //
+ if (mode_ != lexer_mode::value)
+ {
+ // NOTE: remember to update name() if adding new punctuations.
+ //
+ switch (c)
+ {
+ case '=':
+ {
+ // Unless we are already in the pairs mode, switch to the
+ // value mode.
+ //
+ if (next_mode_ != lexer_mode::pairs)
+ next_mode_ = lexer_mode::value;
+
+ return token (token_type::equal, sep, ln, cn);
+ }
+ }
+ }
+
// Otherwise it is a name.
//
return name (c, sep);
@@ -101,14 +127,13 @@ namespace build
bool done (false);
// The following characters are not treated as special in the
- // value mode.
+ // value or pairs mode.
//
- if (mode_ != mode::value)
+ if (mode_ != lexer_mode::value && mode_ != lexer_mode::pairs)
{
switch (c)
{
case ':':
- case '=':
case '+':
{
done = true;
@@ -120,10 +145,28 @@ namespace build
break;
}
+ // The following characters are not treated as special in the
+ // value mode.
+ //
+ if (mode_ != lexer_mode::value)
+ {
+ switch (c)
+ {
+ case '=':
+ {
+ done = true;
+ break;
+ }
+ }
+
+ if (done)
+ break;
+ }
+
// While these extra characters are treated as the name end in
// the variable mode.
//
- if (mode_ == mode::variable)
+ if (mode_ == lexer_mode::variable)
{
switch (c)
{
@@ -171,8 +214,8 @@ namespace build
break;
}
- if (mode_ == mode::variable)
- mode_ = mode::normal;
+ if (mode_ == lexer_mode::variable)
+ next_mode_ = prev_mode_;
return token (lexeme, sep, ln, cn);
}
diff --git a/build/name b/build/name
index 00f0c00..fc89f44 100644
--- a/build/name
+++ b/build/name
@@ -19,6 +19,9 @@ namespace build
// without a type and directory can be used to represent any text.
// A name with directory and empty value represents a directory.
//
+ // If pair is true, then this name and the next in the list form
+ // a pair.
+ //
struct name
{
explicit
@@ -33,6 +36,7 @@ namespace build
std::string type;
path dir;
std::string value;
+ bool pair {false};
};
typedef std::vector<name> names;
diff --git a/build/name.cxx b/build/name.cxx
index 6280676..46e2440 100644
--- a/build/name.cxx
+++ b/build/name.cxx
@@ -17,6 +17,7 @@ namespace build
{
bool ht (!n.type.empty ());
bool hv (!n.value.empty ());
+ bool hd (false);
if (ht)
os << n.type << '{';
@@ -37,6 +38,8 @@ namespace build
//
if (s.back () != path::traits::directory_separator && (hv || !ht))
os << path::traits::directory_separator;
+
+ hd = true;
}
}
@@ -45,14 +48,21 @@ namespace build
if (ht)
os << '}';
+ if (!ht && !hv && !hd)
+ os << "{}"; // Nothing got printed.
+
return os;
}
ostream&
operator<< (ostream& os, const names& ns)
{
- for (auto b (ns.begin ()), i (b), e (ns.end ()); i != e; ++i)
- os << (i != b ? " " : "") << *i;
+ for (auto i (ns.begin ()), e (ns.end ()); i != e; )
+ {
+ const name& n (*i);
+ ++i;
+ os << n << (n.pair ? "=" : (i != e ? " " : ""));
+ }
return os;
}
diff --git a/build/parser b/build/parser
index 6e0be34..4375b27 100644
--- a/build/parser
+++ b/build/parser
@@ -51,12 +51,13 @@ namespace build
names (token& t, token_type& tt)
{
names_type ns;
- names (t, tt, ns, nullptr, nullptr);
+ names (t, tt, ns, 0, nullptr, nullptr);
return ns;
}
void
- names (token&, token_type&, names_type&,
+ names (token&, token_type&,
+ names_type&, std::size_t pair,
const path* dir, const std::string* type);
// Utilities.
diff --git a/build/parser.cxx b/build/parser.cxx
index 8f4f81d..56a61c5 100644
--- a/build/parser.cxx
+++ b/build/parser.cxx
@@ -191,6 +191,12 @@ namespace build
if (tt == type::colon)
{
+ // While '{}:' means empty name, '{$x}:' where x is empty list
+ // means empty list.
+ //
+ if (ns.empty ())
+ fail (t) << "target expected before :";
+
next (t, tt);
if (tt == type::newline)
@@ -625,8 +631,18 @@ namespace build
}
void parser::
- names (token& t, type& tt, names_type& ns, const path* dp, const string* tp)
+ names (token& t,
+ type& tt,
+ names_type& ns,
+ size_t pair,
+ const path* dp,
+ const string* tp)
{
+ // If pair is not 0, then it is an index + 1 of the first half of
+ // the pair for which we are parsing the second halves, e.g.,
+ // a={b c d{e f} {}}.
+ //
+
// Buffer that is used to collect the complete name in case of an
// unseparated variable expansion, e.g., 'foo$bar$(baz)fox'. The
// idea is to concatenate all the individual parts in this buffer
@@ -634,6 +650,12 @@ namespace build
//
string concat;
+ // Number of names in the last group. This is used to detect when
+ // we need to add an empty first pair element (e.g., {=y}) or when
+ // we have a for now unsupported multi-name LHS (e.g., {x y}=z).
+ //
+ size_t count (0);
+
for (bool first (true);; first = false)
{
// If the accumulating buffer is not empty, then we have two options:
@@ -714,7 +736,14 @@ namespace build
}
next (t, tt);
- names (t, tt, ns, dp1, tp1);
+ count = ns.size ();
+ names (t, tt,
+ ns,
+ (pair != 0
+ ? pair
+ : (ns.empty () || !ns.back ().pair ? 0 : ns.size ())),
+ dp1, tp1);
+ count = ns.size () - count;
if (tt != type::rcbrace)
fail (t) << "expected } instead of " << t;
@@ -723,6 +752,12 @@ namespace build
continue;
}
+ // If we are a second half of a pair, add another first half
+ // unless this is the first instance.
+ //
+ if (pair != 0 && pair != ns.size ())
+ ns.push_back (ns[pair - 1]);
+
// If it ends with a directory separator, then it is a directory.
// Note that at this stage we don't treat '.' and '..' as special
// (unless they are specified with a directory separator) because
@@ -753,20 +788,7 @@ namespace build
(dp != nullptr ? *dp : path ()),
move (name));
- continue;
- }
-
- // Untyped name group without a directory prefix, e.g., '{foo bar}'.
- //
- if (tt == type::lcbrace)
- {
- next (t, tt);
- names (t, tt, ns, dp, tp);
-
- if (tt != type::rcbrace)
- fail (t) << "expected } instead of " << t;
-
- tt = peek ();
+ count = 1;
continue;
}
@@ -887,20 +909,91 @@ namespace build
<< "expansion";
}
+ // If we are a second half of a pair.
+ //
+ if (pair != 0)
+ {
+ // Check that there are no nested pairs.
+ //
+ if (n.pair)
+ fail (t) << "nested pair in variable expansion";
+
+ // And add another first half unless this is the first instance.
+ //
+ if (pair != ns.size ())
+ ns.push_back (ns[pair - 1]);
+ }
+
ns.emplace_back ((tp1 != nullptr ? *tp1 : string ()),
(dp1 != nullptr ? *dp1 : path ()),
n.value);
}
+
+ count = lv.data.size ();
}
continue;
}
+ // Untyped name group without a directory prefix, e.g., '{foo bar}'.
+ //
+ if (tt == type::lcbrace)
+ {
+ next (t, tt);
+ count = ns.size ();
+ names (t, tt,
+ ns,
+ (pair != 0
+ ? pair
+ : (ns.empty () || !ns.back ().pair ? 0 : ns.size ())),
+ dp, tp);
+ count = ns.size () - count;
+
+ if (tt != type::rcbrace)
+ fail (t) << "expected } instead of " << t;
+
+ tt = peek ();
+ continue;
+ }
+
+ // A pair separator (only in the pair mode).
+ //
+ if (tt == type::equal && lexer_->mode () == lexer_mode::pairs)
+ {
+ if (pair != 0)
+ fail (t) << "nested pair on the right hand side of a pair";
+
+ if (count > 1)
+ fail (t) << "multiple names on the left hand side of a pair";
+
+ if (count == 0)
+ {
+ // Empty LHS, (e.g., {=y}), create an empty name.
+ //
+ ns.emplace_back ((tp != nullptr ? *tp : string ()),
+ (dp != nullptr ? *dp : path ()),
+ "");
+ count = 1;
+ }
+
+ ns.back ().pair = true;
+ tt = peek ();
+ continue;
+ }
+
if (!first)
break;
+ // Our caller expected this to be a name.
+ //
if (tt == type::rcbrace) // Empty name, e.g., dir{}.
{
+ // If we are a second half of a pair, add another first half
+ // unless this is the first instance.
+ //
+ if (pair != 0 && pair != ns.size ())
+ ns.push_back (ns[pair - 1]);
+
ns.emplace_back ((tp != nullptr ? *tp : string ()),
(dp != nullptr ? *dp : path ()),
"");
@@ -909,6 +1002,15 @@ namespace build
else
fail (t) << "expected name instead of " << t;
}
+
+ // Handle the empty RHS in a pair, (e.g., {y=}).
+ //
+ if (!ns.empty () && ns.back ().pair)
+ {
+ ns.emplace_back ((tp != nullptr ? *tp : string ()),
+ (dp != nullptr ? *dp : path ()),
+ "");
+ }
}
void parser::