aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBoris Kolpackov <boris@codesynthesis.com>2021-05-25 11:19:04 +0200
committerBoris Kolpackov <boris@codesynthesis.com>2021-05-28 10:10:44 +0200
commit6b3e75edf034ebcbd048a24c283c7bcf7b1da019 (patch)
tree4dff34692fd09c6f201a486c0d92549adc864947
parentbb02e152dc036879ab0b2d1d8aa2cb19084b8e16 (diff)
Add support for regex-based target type/pattern specific variables
This is in addition to the already supported path-based target type/pattern specific variables. For example: hxx{*}: x = y # path-based hxx{~/.*/}: x = y # regex-based
-rw-r--r--libbuild2/dump.cxx10
-rw-r--r--libbuild2/lexer.cxx19
-rw-r--r--libbuild2/name.cxx81
-rw-r--r--libbuild2/name.hxx38
-rw-r--r--libbuild2/name.ixx15
-rw-r--r--libbuild2/parser.cxx448
-rw-r--r--libbuild2/parser.hxx14
-rw-r--r--libbuild2/scope.cxx8
-rw-r--r--libbuild2/token.hxx3
-rw-r--r--libbuild2/types.hxx10
-rw-r--r--libbuild2/variable.cxx95
-rw-r--r--libbuild2/variable.hxx64
-rw-r--r--old-tests/variable/type-pattern/buildfile39
-rw-r--r--tests/variable/target-type-pattern-specific/testscript127
14 files changed, 710 insertions, 261 deletions
diff --git a/libbuild2/dump.cxx b/libbuild2/dump.cxx
index bc44b24..23d430e 100644
--- a/libbuild2/dump.cxx
+++ b/libbuild2/dump.cxx
@@ -147,6 +147,9 @@ namespace build2
const variable_type_map& vtm,
const scope& s)
{
+ using pattern = variable_pattern_map::pattern;
+ using pattern_type = variable_pattern_map::pattern_type;
+
for (const auto& vt: vtm)
{
const target_type& t (vt.first);
@@ -154,7 +157,7 @@ namespace build2
for (const auto& vp: vpm)
{
- const string p (vp.first);
+ const pattern& pat (vp.first);
const variable_map& vars (vp.second);
os << endl
@@ -163,7 +166,10 @@ namespace build2
if (t != target::static_type)
os << t.name << '{';
- os << p;
+ if (pat.type == pattern_type::regex_pattern)
+ os << '~';
+
+ os << pat.text;
if (t != target::static_type)
os << '}';
diff --git a/libbuild2/lexer.cxx b/libbuild2/lexer.cxx
index 0b6f96d..f445d4b 100644
--- a/libbuild2/lexer.cxx
+++ b/libbuild2/lexer.cxx
@@ -674,18 +674,15 @@ namespace build2
bool qcomp (false);
bool qfirst (false);
- auto append = [&lexeme, &m, &qcomp, &qfirst] (char c)
+ auto append = [&lexeme, &m, &qcomp, &qfirst] (char c, bool escaped = false)
{
- if (m == lexer_mode::double_quoted)
- {
- if (lexeme.empty ()) // First character.
+ if (lexeme.empty () && (escaped || m == lexer_mode::double_quoted))
qfirst = true;
- }
- else
- {
- if (qcomp) // An unquoted character after a quoted fragment.
- qcomp = false;
- }
+
+ // An unquoted character after a quoted fragment.
+ //
+ if (m != lexer_mode::double_quoted && qcomp)
+ qcomp = false;
lexeme += c;
};
@@ -716,7 +713,7 @@ namespace build2
fail (p) << "unterminated escape sequence";
if (p != '\n') // Ignore if line continuation.
- append (p);
+ append (p, true);
continue;
}
diff --git a/libbuild2/name.cxx b/libbuild2/name.cxx
index 6a33a63..1081b5c 100644
--- a/libbuild2/name.cxx
+++ b/libbuild2/name.cxx
@@ -82,8 +82,21 @@ namespace build2
ostream&
to_stream (ostream& os, const name& n, bool quote, char pair, bool escape)
{
- auto write_string = [quote, pair, escape, &os](const string& v, bool pat)
+ using pattern_type = name::pattern_type;
+
+ auto write_string = [&os, quote, pair, escape] (
+ const string& v,
+ optional<pattern_type> pat = nullopt,
+ bool curly = false)
{
+ // Special characters, path pattern characters, and regex pattern
+ // characters. The latter only need to be quoted in the first position
+ // and if followed by a non-alphanumeric delimiter. If that's the only
+ // special character, then we handle it with escaping rather than
+ // quoting (see the parsing logic for rationale). Additionally, we
+ // escape leading `+` in the curly braces which is also recognized as a
+ // path pattern.
+ //
char sc[] = {
'{', '}', '[', ']', '$', '(', ')', // Token endings.
' ', '\t', '\n', '#', // Spaces.
@@ -93,9 +106,24 @@ namespace build2
'\0'};
char pc[] = {
- '*', '?', // Wildcard characters.
+ '*', '?', // Path wildcard characters.
'\0'};
+ auto rc = [] (const string& v)
+ {
+ return (v[0] == '~' || v[0] == '^') && v[1] != '\0' && !alnum (v[1]);
+ };
+
+ if (pat)
+ {
+ switch (*pat)
+ {
+ case pattern_type::path: break;
+ case pattern_type::regex_pattern: os << '~'; break;
+ case pattern_type::regex_substitution: os << '^'; break;
+ }
+ }
+
if (quote && v.find ('\'') != string::npos)
{
// Quote the string with the double quotes rather than with the single
@@ -115,6 +143,11 @@ namespace build2
if (escape) os << '\\';
os << '"';
}
+ //
+ // Note that a regex pattern does not need to worry about special path
+ // pattern character but not vice-verse. See the parsing logic for
+ // details.
+ //
else if (quote && (v.find_first_of (sc) != string::npos ||
(!pat && v.find_first_of (pc) != string::npos)))
{
@@ -126,17 +159,32 @@ namespace build2
if (escape) os << '\\';
os << '\'';
}
+ // Note that currently we do not preserve a leading `+` as a pattern
+ // unless it has other wildcard characters (see the parsing code for
+ // details). So we escape it both if it's not a pattern or is a path
+ // pattern.
+ //
+ else if (quote && ((!pat || *pat == pattern_type::path) &&
+ ((v[0] == '+' && curly) || rc (v))))
+ {
+ if (escape) os << '\\';
+ os << '\\' << v;
+ }
else
os << v;
};
uint16_t dv (stream_verb (os).path); // Directory verbosity.
- auto write_dir = [dv, quote, &os, &write_string] (const dir_path& d,
- bool pat)
+ auto write_dir = [&os, quote, &write_string, dv] (
+ const dir_path& d,
+ optional<pattern_type> pat = nullopt,
+ bool curly = false)
{
if (quote)
- write_string (dv < 1 ? diag_relative (d) : d.representation (), pat);
+ write_string (dv < 1 ? diag_relative (d) : d.representation (),
+ pat,
+ curly);
else
os << d;
};
@@ -151,7 +199,7 @@ namespace build2
if (n.proj)
{
- write_string (n.proj->string (), false);
+ write_string (n.proj->string ());
os << '%';
}
@@ -171,29 +219,34 @@ namespace build2
dir_path ());
if (!pd.empty ())
- write_dir (pd, false);
+ write_dir (pd);
- if (t || (!d && !v))
+ bool curly;
+ if ((curly = t || (!d && !v)))
{
if (t)
- write_string (n.type, false);
+ write_string (n.type);
os << '{';
}
if (v)
- write_string (n.value, n.pattern);
+ write_string (n.value, n.pattern, curly);
else if (d)
{
+ // A directory pattern cannot be regex.
+ //
+ assert (!n.pattern || *n.pattern == pattern_type::path);
+
if (rd.empty ())
- write_string (dir_path (".").representation (), false);
+ write_string (dir_path (".").representation (), nullopt, curly);
else if (!pd.empty ())
- write_string (rd.leaf ().representation (), n.pattern);
+ write_string (rd.leaf ().representation (), n.pattern, curly);
else
- write_dir (rd, n.pattern);
+ write_dir (rd, n.pattern, curly);
}
- if (t || (!d && !v))
+ if (curly)
os << '}';
return os;
diff --git a/libbuild2/name.hxx b/libbuild2/name.hxx
index 5c76d07..216f207 100644
--- a/libbuild2/name.hxx
+++ b/libbuild2/name.hxx
@@ -33,16 +33,21 @@ namespace build2
// If pair is not '\0', then this name and the next in the list form a
// pair. Can be used as a bool flag.
//
- // If pattern is true then this is a name pattern (e.g., file{*.txt}).
+ // If pattern is present then this is a name pattern (e.g., file{*.txt},
+ // file{~'/(.+)\.txt/i'}, file{^'/\1/'}). A directory name cannot be a regex
+ // pattern (since we would need to store it in dir_path and a regex is not
+ // necessarily a valid path).
//
struct name
{
+ enum class pattern_type: uint8_t {path, regex_pattern, regex_substitution};
+
optional<project_name> proj;
dir_path dir;
string type;
string value;
char pair = '\0';
- bool pattern = false;
+ optional<pattern_type> pattern;
name () {} // = default; Clang needs this to initialize const object.
name (string v): value (move (v)) {}
@@ -57,13 +62,16 @@ namespace build2
: proj (project_name (move (p))), dir (move (d)), type (move (t)),
value (move (v)) {}
- name (optional<project_name> p,
- dir_path d,
- string t,
- string v,
- bool pat = false)
+ name (optional<project_name> p, dir_path d, string t, string v)
+ : proj (move (p)), dir (move (d)), type (move (t)), value (move (v)) {}
+
+ name (optional<project_name> p,
+ dir_path d,
+ string t,
+ string v,
+ optional<pattern_type> pt)
: proj (move (p)), dir (move (d)), type (move (t)), value (move (v)),
- pattern (pat) {}
+ pattern (pt) {}
bool
qualified () const {return proj.has_value ();}
@@ -162,7 +170,8 @@ namespace build2
cs.append (n.type);
cs.append (n.value);
cs.append (n.pair);
- cs.append (n.pattern);
+ if (n.pattern)
+ cs.append (static_cast<uint8_t> (*n.pattern));
}
// Store a string in a name in a reversible way. If the string ends with a
@@ -173,14 +182,21 @@ namespace build2
to_name (string);
// Serialize the name to the stream. If requested, the name components
- // containing special characters are quoted. The special characters are:
+ // containing special characters are quoted and/or escaped. The special
+ // characters are:
//
// {}[]$() \t\n#\"'%
//
- // And additionally, if name is not a pattern:
+ // And additionally, unless name is a pattern:
//
// *?
//
+ // As well as leading and if followed by a non-alphanumeric delimiter:
+ //
+ // ~^
+ //
+ // As well as leading `+` if in the curly braces.
+ //
// If the pair argument is not '\0', then it is added to the above special
// characters set. If the quote character is present in the component then
// it is double quoted rather than single quoted. In this case the following
diff --git a/libbuild2/name.ixx b/libbuild2/name.ixx
index 80a097e..a3ee94b 100644
--- a/libbuild2/name.ixx
+++ b/libbuild2/name.ixx
@@ -21,7 +21,20 @@ namespace build2
r = pair < x.pair ? -1 : (pair > x.pair ? 1 : 0);
if (r == 0)
- r = pattern == x.pattern ? 0 : (!pattern && x.pattern ? -1 : 1);
+ {
+ bool p (pattern);
+ bool xp (x.pattern);
+
+ r = p == xp ? 0 : (p ? 1 : -1);
+
+ if (r == 0 && p)
+ {
+ auto p (static_cast<uint8_t> (*pattern));
+ auto xp (static_cast<uint8_t> (*x.pattern));
+
+ r = p < xp ? -1 : (p > xp ? 1 : 0);
+ }
+ }
return r;
}
diff --git a/libbuild2/parser.cxx b/libbuild2/parser.cxx
index a9646d5..120d6ab 100644
--- a/libbuild2/parser.cxx
+++ b/libbuild2/parser.cxx
@@ -638,7 +638,9 @@ namespace build2
// sensitive to the target context in which they are evaluated. The
// function signature is:
//
- // void (token& t, type& tt, const target_type* type, string pat)
+ // void (token& t, type& tt,
+ // optional<pattern_type>, const target_type* pat_tt, string pat,
+ // const location& pat_loc)
//
// Note that the target and its ad hoc members are inserted implied
// but this flag can be cleared and default_target logic applied if
@@ -695,42 +697,51 @@ namespace build2
//
// foo*/dir{*/} -- foo*/*/dir{}
//
- if (n.value.empty () && !n.dir.empty ())
+ // Note also that none of this applies to regex patterns (see
+ // the parsing code for details).
+ //
+ if (*n.pattern == pattern_type::path)
{
- // Note that we use string and not the representation: in a
- // sense the trailing slash in the pattern is subsumed by the
- // target type.
- //
- if (n.dir.simple ())
- n.value = move (n.dir).string ();
- else
+ if (n.value.empty () && !n.dir.empty ())
{
- n.value = n.dir.leaf ().string ();
- n.dir.make_directory ();
- }
+ // Note that we use string and not the representation: in a
+ // sense the trailing slash in the pattern is subsumed by
+ // the target type.
+ //
+ if (n.dir.simple ())
+ n.value = move (n.dir).string ();
+ else
+ {
+ n.value = n.dir.leaf ().string ();
+ n.dir.make_directory ();
+ }
- // Treat directory as type dir{} similar to other places.
- //
- if (n.untyped ())
- n.type = "dir";
- }
- else
- {
- // Move the directory part, if any, from value to dir.
- //
- try
- {
- n.canonicalize ();
- }
- catch (const invalid_path& e)
- {
- fail (nloc) << "invalid path '" << e.path << "'";
+ // Treat directory as type dir{} similar to other places.
+ //
+ if (n.untyped ())
+ n.type = "dir";
}
- catch (const invalid_argument&)
+ else
{
- fail (nloc) << "invalid pattern '" << n.value << "'";
+ // Move the directory part, if any, from value to dir.
+ //
+ try
+ {
+ n.canonicalize ();
+ }
+ catch (const invalid_path& e)
+ {
+ fail (nloc) << "invalid path '" << e.path << "'";
+ }
+ catch (const invalid_argument&)
+ {
+ fail (nloc) << "invalid pattern '" << n.value << "'";
+ }
}
}
+ else if (*n.pattern == pattern_type::regex_substitution)
+ fail (nloc) << "regex substitution " << n << " without "
+ << "regex pattern";
// If we have the directory, then it is the scope.
//
@@ -760,7 +771,7 @@ namespace build2
if (ti == nullptr)
fail (nloc) << "unknown target type " << n.type;
- f (t, tt, ti, move (n.value));
+ f (t, tt, n.pattern, ti, move (n.value), nloc);
}
else
{
@@ -781,7 +792,7 @@ namespace build2
enter_adhoc_members (move (ans[i]), true /* implied */);
}
- f (t, tt, nullptr, string ());
+ f (t, tt, nullopt, nullptr, string (), location ());
}
if (++i != e)
@@ -832,7 +843,8 @@ namespace build2
st = token (t), // Save start token (will be gone on replay).
recipes = small_vector<shared_ptr<adhoc_rule>, 1> ()]
(token& t, type& tt,
- const target_type* type, string pat) mutable
+ optional<pattern_type> pt, const target_type* ptt, string pat,
+ const location& ploc) mutable
{
token rt; // Recipe start token.
@@ -842,7 +854,7 @@ namespace build2
{
next (t, tt); // Newline.
next (t, tt); // First token inside the variable block.
- parse_variable_block (t, tt, type, move (pat));
+ parse_variable_block (t, tt, pt, ptt, move (pat), ploc);
if (tt != type::rcbrace)
fail (t) << "expected '}' instead of " << t;
@@ -858,7 +870,7 @@ namespace build2
else
rt = st;
- if (type != nullptr)
+ if (pt)
fail (rt) << "recipe in target type/pattern";
parse_recipe (t, tt, rt, recipes);
@@ -921,17 +933,19 @@ namespace build2
// Parse the assignment for each target.
//
- for_each ([this, &var, akind, &aloc] (token& t, type& tt,
- const target_type* type,
- string pat)
- {
- if (type == nullptr)
- parse_variable (t, tt, var, akind);
- else
- parse_type_pattern_variable (t, tt,
- *type, move (pat),
- var, akind, aloc);
- });
+ for_each (
+ [this, &var, akind, &aloc] (
+ token& t, type& tt,
+ optional<pattern_type> pt, const target_type* ptt, string pat,
+ const location& ploc)
+ {
+ if (pt)
+ parse_type_pattern_variable (t, tt,
+ *pt, *ptt, move (pat), ploc,
+ var, akind, aloc);
+ else
+ parse_variable (t, tt, var, akind);
+ });
next_after_newline (t, tt);
}
@@ -1110,7 +1124,8 @@ namespace build2
void parser::
parse_variable_block (token& t, type& tt,
- const target_type* type, string pat)
+ optional<pattern_type> pt, const target_type* ptt,
+ string pat, const location& ploc)
{
// Parse a target or prerequisite-specific variable block. If type is not
// NULL, then this is a target type/pattern-specific block.
@@ -1148,12 +1163,12 @@ namespace build2
<< " visibility but is assigned on a target";
}
- if (type == nullptr)
- parse_variable (t, tt, var, tt);
- else
+ if (pt)
parse_type_pattern_variable (t, tt,
- *type, pat, // Note: can't move.
+ *pt, *ptt, pat, ploc, // Note: can't move.
var, tt, get_location (t));
+ else
+ parse_variable (t, tt, var, tt);
if (tt != type::newline)
fail (t) << "expected newline instead of " << t;
@@ -3835,24 +3850,34 @@ namespace build2
}
void parser::
- parse_type_pattern_variable (token& t, token_type& tt,
- const target_type& type, string pat,
- const variable& var, token_type kind,
- const location& loc)
+ parse_type_pattern_variable (
+ token& t, token_type& tt,
+ pattern_type pt, const target_type& ptt, string pat, const location& ploc,
+ const variable& var, token_type kind, const location& loc)
{
// Parse target type/pattern-specific variable assignment.
//
- // See old-tests/variable/type-pattern.
// Note: expanding the value in the current scope context.
//
value rhs (parse_variable_value (t, tt));
- // Leave the value untyped unless we are assigning.
- //
- pair<reference_wrapper<value>, bool> p (
- scope_->target_vars[type][move (pat)].insert (
- var, kind == type::assign));
+ pair<reference_wrapper<value>, bool> p (rhs /* dummy */, false);
+ try
+ {
+ // Leave the value untyped unless we are assigning.
+ //
+ // Note that the pattern is preserved if insert fails with regex_error.
+ //
+ p = scope_->target_vars[ptt].insert (pt, move (pat)).insert (
+ var, kind == type::assign);
+ }
+ catch (const regex_error& e)
+ {
+ // Print regex_error description if meaningful (no space).
+ //
+ fail (ploc) << "invalid regex pattern '" << pat << "'" << e;
+ }
value& lhs (p.first);
@@ -4676,7 +4701,7 @@ namespace build2
dir_path d,
string t,
string v,
- bool pat,
+ optional<name::pattern_type> pat,
const location& loc)
{
// The directory/value must not be empty if we have a type.
@@ -4792,8 +4817,9 @@ namespace build2
}
name& r (
- append_name (
- ns, move (p), move (d), move (t), move (v), cn.pattern, loc));
+ append_name (ns,
+ move (p), move (d), move (t), move (v), cn.pattern,
+ loc));
r.pair = cn.pair;
}
@@ -5492,6 +5518,7 @@ namespace build2
// Return '+' or '-' if a token can start an inclusion or exclusion
// (pattern or group), '\0' otherwise. The result can be used as bool.
+ // Note that token::qfirst covers both quoting and escaping.
//
auto pattern_prefix = [] (const token& t) -> char
{
@@ -5758,9 +5785,9 @@ namespace build2
// Find a separator (slash or %).
//
- string::size_type p (separators != nullptr
- ? val.find_last_of (*separators)
- : string::npos);
+ string::size_type pos (separators != nullptr
+ ? val.find_last_of (*separators)
+ : string::npos);
// First take care of project. A project-qualified name is not very
// common, so we can afford some copying for the sake of simplicity.
@@ -5768,10 +5795,10 @@ namespace build2
optional<project_name> p1;
const optional<project_name>* pp1 (&pp);
- if (p != string::npos)
+ if (pos != string::npos)
{
- bool last (val[p] == '%');
- string::size_type q (last ? p : val.rfind ('%', p - 1));
+ bool last (val[pos] == '%');
+ string::size_type q (last ? pos : val.rfind ('%', pos - 1));
for (; q != string::npos; ) // Breakout loop.
{
@@ -5801,13 +5828,13 @@ namespace build2
// Now fix the rest of the name.
//
val.erase (0, q + 1);
- p = last ? string::npos : p - (q + 1);
+ pos = last ? string::npos : pos - (q + 1);
break;
}
}
- string::size_type n (p != string::npos ? val.size () - 1 : 0);
+ size_t size (pos != string::npos ? val.size () - 1 : 0);
// See if this is a type name, directory prefix, or both. That
// is, it is followed by an un-separated '{'.
@@ -5834,7 +5861,7 @@ namespace build2
}
}
- if (p != n && tp != nullptr && !pinc)
+ if (pos != size && tp != nullptr && !pinc)
fail (loc) << "nested type name " << val;
dir_path d1;
@@ -5845,9 +5872,9 @@ namespace build2
try
{
- if (p == string::npos) // type
+ if (pos == string::npos) // type
tp1 = &val;
- else if (p == n) // directory
+ else if (pos == size) // directory
{
if (dp == nullptr)
d1 = dir_path (val);
@@ -5858,12 +5885,12 @@ namespace build2
}
else // both
{
- t1.assign (val, p + 1, n - p);
+ t1.assign (val, pos + 1, size - pos);
if (dp == nullptr)
- d1 = dir_path (val, 0, p + 1);
+ d1 = dir_path (val, 0, pos + 1);
else
- d1 = *dp / dir_path (val, 0, p + 1);
+ d1 = *dp / dir_path (val, 0, pos + 1);
dp1 = &d1;
tp1 = &t1;
@@ -5893,102 +5920,212 @@ namespace build2
continue;
}
- // See if this is a wildcard pattern.
+ // See if this is a pattern, path or regex.
+ //
+ // A path pattern either contains an unquoted wildcard character or,
+ // in the curly context, start with unquoted/unescaped `+`.
+ //
+ // A regex pattern starts with unquoted/unescaped `~` followed by a
+ // non-alphanumeric delimiter and has the following form:
+ //
+ // ~/<pat>/[<flags>]
+ //
+ // A regex substitution starts with unquoted/unescaped '^' followed by
+ // a non-alphanumeric delimiter and has the follwing form:
+ //
+ // ^/<sub>/[<flags>]
+ //
+ // Any non-alphanumeric character other that `/` can be used as a
+ // delimiter but escaping of the delimiter character is not supported
+ // (one benefit of this is that we can store and print the pattern as
+ // is without worrying about escaping; the non-alphanumeric part is to
+ // allow values like ~host and ^cat).
//
- // It should either contain a wildcard character or, in a curly
- // context, start with unquoted '+'.
+ // The following pattern flags are recognized:
//
- // Note that in the general case we need to convert it to a path prior
- // to testing for being a pattern (think of b[a/r] that is not a
- // pattern). If the conversion fails then this is not a path pattern.
+ // i -- match ignoring case
+ // e -- match including extension
//
- auto pattern = [&val, &loc, this] ()
+ // Note that we cannot express certain path patterns that start with
+ // the regex introducer using quoting (for example, `~*`) since
+ // quoting prevents the whole from being recognized as a path
+ // pattern. However, we can achieve this with escaping (for example,
+ // \~*). This works automatically since we treat (at the lexer level)
+ // escaped first characters as quoted without treating the whole thing
+ // as quoted. Note that there is also the corresponding logic in
+ // to_stream(name).
+ //
+ // A pattern cannot be project-qualified.
+ //
+ optional<pattern_type> pat;
+
+ if (pmode != pattern_mode::ignore && !*pp1)
{
- // Let's optimize it a bit for the common cases.
+ // Note that in the general case we need to convert it to a path
+ // prior to testing for being a pattern (think of b[a/r] that is not
+ // a pattern).
//
- if (val.find_first_of ("*?[") == string::npos)
- return false;
+ auto path_pattern = [&val, &loc, this] ()
+ {
+ // Let's optimize it a bit for the common cases.
+ //
+ if (val.find_first_of ("*?[") == string::npos)
+ return false;
- if (path::traits_type::find_separator (val) == string::npos)
- return path_pattern (val);
+ if (path_traits::find_separator (val) == string::npos)
+ return build2::path_pattern (val);
- try
- {
- return path_pattern (path (val));
- }
- catch (const invalid_path& e)
+ try
+ {
+ return build2::path_pattern (path (val));
+ }
+ catch (const invalid_path& e)
+ {
+ fail (loc) << "invalid path '" << e.path << "'" << endf;
+ }
+ };
+
+ auto regex_pattern = [&val] ()
{
- fail (loc) << "invalid path '" << e.path << "'" << endf;
- }
- };
+ return ((val[0] == '~' || val[0] == '^') &&
+ val[1] != '\0' && !alnum (val[1]));
+ };
- bool pat (false);
- if (pmode == pattern_mode::expand || pmode == pattern_mode::detect)
- {
- if (!*pp1 && // Cannot be project-qualified.
- !quoted && // Cannot be quoted.
- ((dp != nullptr && dp->absolute ()) || pbase_ != nullptr) &&
- (pattern () || (curly && val[0] == '+')))
+ if (pmode != pattern_mode::preserve)
{
- // Resolve the target type if there is one. If we fail, then this
- // is not a pattern.
+ // Note that if we have no base directory or cannot resolve the
+ // target type, then this affectively becomes the ignore mode.
//
- const target_type* ttp (tp != nullptr && scope_ != nullptr
- ? scope_->find_target_type (*tp)
- : nullptr);
-
- if (tp == nullptr || ttp != nullptr)
+ if (pbase_ != nullptr || (dp != nullptr && dp->absolute ()))
{
- if (pmode == pattern_mode::detect)
+ // Note that we have to check for regex patterns first since
+ // they may also be detected as path patterns.
+ //
+ if (!quoted_first && regex_pattern ())
{
- // Strip the literal unquoted plus character for the first
- // pattern in the group.
+ // Note: we may decide to support regex-based name generation
+ // some day (though a substitution won't make sense here).
//
- if (ppat)
- {
- assert (val[0] == '+');
+ fail (loc) << "regex pattern-based name generation" <<
+ info << "quote '" << val << "' (or escape first character) "
+ << "to treat it as literal name (or path pattern)";
+ }
+ else if ((!quoted && path_pattern ()) ||
+ (!quoted_first && curly && val[0] == '+'))
+ {
+ // Resolve the target type if there is one.
+ //
+ const target_type* ttp (tp != nullptr && scope_ != nullptr
+ ? scope_->find_target_type (*tp)
+ : nullptr);
- val.erase (0, 1);
- ppat = pinc = false;
+ if (tp == nullptr || ttp != nullptr)
+ {
+ if (pmode == pattern_mode::detect)
+ {
+ // Strip the literal unquoted plus character for the first
+ // pattern in the group.
+ //
+ if (ppat)
+ {
+ assert (val[0] == '+');
+ val.erase (0, 1);
+ ppat = pinc = false;
+ }
+
+ // Set the detect pattern mode to expand if the pattern is
+ // not followed by the inclusion/exclusion pattern/match.
+ // Note that if it is '}' (i.e., the end of the group),
+ // then it is a single pattern and the expansion is what
+ // we want.
+ //
+ if (!pattern_prefix (peeked ()))
+ pmode = pattern_mode::expand;
+ }
+
+ if (pmode == pattern_mode::expand)
+ {
+ count = expand_name_pattern (get_location (t),
+ names {name (move (val))},
+ ns,
+ what,
+ pairn,
+ dp, tp, ttp);
+ continue;
+ }
+
+ pattern_detected (ttp);
+
+ // Fall through.
}
+ }
+ }
+ }
+ else
+ {
+ // For the preserve mode we treat it as a pattern if it look like
+ // one syntactically. For now we also don't treat leading `+` in
+ // the curly context as an indication of a path pattern (since
+ // there isn't any good reason to; see also to_stream(name) for
+ // the corresponding serialization logic).
+ //
+ if (!quoted_first && regex_pattern ())
+ {
+ const char* w;
+ if (val[0] == '~')
+ {
+ w = "regex pattern";
+ pat = pattern_type::regex_pattern;
+ }
+ else
+ {
+ w = "regex substitution";
+ pat = pattern_type::regex_substitution;
+ }
- // Reset the detect pattern mode to expand if the pattern is
- // not followed by the inclusion/exclusion pattern/match. Note
- // that if it is '}' (i.e., the end of the group), then it is
- // a single pattern and the expansion is what we want.
- //
- if (!pattern_prefix (peeked ()))
- pmode = pattern_mode::expand;
+ size_t n (val.size ());
+
+ // Verify delimiters and find the position of the flags.
+ //
+ char d (val[1]);
+ size_t p (val.rfind (d));
+
+ if (p == 1)
+ {
+ fail (loc) << "no trailing delimiter '" << d << "' in "
+ << w << " '" << val << "'" <<
+ info << "quote '" << val << "' (or escape first character) "
+ << "to treat it as literal name (or path pattern)";
}
- if (pmode == pattern_mode::expand)
+ // Verify flags.
+ //
+ for (size_t i (++p); i != n; ++i)
{
- count = expand_name_pattern (get_location (t),
- names {name (move (val))},
- ns,
- what,
- pairn,
- dp, tp, ttp);
- continue;
+ char f (val[i]);
+
+ if (*pat == pattern_type::regex_pattern)
+ {
+ if (f == 'i' || f == 'e')
+ continue;
+ }
+
+ fail (loc) << "unknown flag '" << f << "' in " << w << " '"
+ << val << "'";
}
- pattern_detected (ttp);
+ val.erase (0, 1); // Remove `~` or `^`.
- // Fall through.
+ // Make sure we don't treat something like `~/.../` as a
+ // directory.
+ //
+ pos = string::npos;
+ size = 0;
}
+ else if (!quoted && path_pattern ())
+ pat = pattern_type::path;
}
}
- else if (pmode == pattern_mode::preserve)
- {
- // For the preserve mode we treat it as a pattern if it look like
- // one syntactically. For now we also don't treat leading `+` in the
- // curly context as an indication of a pattern.
- //
- if (!*pp1 && // Cannot be project-qualified.
- !quoted && // Cannot be quoted.
- pattern ())
- pat = true;
- }
// If we are a second half of a pair, add another first half
// unless this is the first instance.
@@ -6006,7 +6143,9 @@ namespace build2
// in scope::find_target_type(). This would also mess up
// reversibility to simple name.
//
- if (p == n)
+ // Note: a regex pattern cannot be a directory (see above).
+ //
+ if (pos == size)
{
// For reversibility to simple name, only treat it as a directory
// if the string is an exact representation.
@@ -6021,8 +6160,7 @@ namespace build2
append_name (
ns,
*pp1, move (dir), (tp != nullptr ? *tp : string ()), string (),
- pat,
- loc);
+ pat, loc);
continue;
}
@@ -6568,7 +6706,7 @@ namespace build2
(dp != nullptr ? *dp : dir_path ()),
(tp != nullptr ? *tp : string ()),
string (),
- false /* pattern */,
+ nullopt, /* pattern */
get_location (t));
count = 1;
}
@@ -6589,7 +6727,7 @@ namespace build2
(dp != nullptr ? *dp : dir_path ()),
(tp != nullptr ? *tp : string ()),
string (),
- false /* pattern */,
+ nullopt, /* pattern */
get_location (t));
count = 0;
}
@@ -6617,7 +6755,7 @@ namespace build2
(dp != nullptr ? *dp : dir_path ()),
(tp != nullptr ? *tp : string ()),
string (),
- false /* pattern */,
+ nullopt, /* pattern */
get_location (t));
break;
}
@@ -6636,7 +6774,7 @@ namespace build2
(dp != nullptr ? *dp : dir_path ()),
(tp != nullptr ? *tp : string ()),
string (),
- false /* pattern */,
+ nullopt, /* pattern */
get_location (t));
}
diff --git a/libbuild2/parser.hxx b/libbuild2/parser.hxx
index 889d339..007e508 100644
--- a/libbuild2/parser.hxx
+++ b/libbuild2/parser.hxx
@@ -103,6 +103,7 @@ namespace build2
// Recursive descent parser.
//
protected:
+ using pattern_type = name::pattern_type;
// Pattern expansion mode.
//
@@ -129,8 +130,10 @@ namespace build2
void
parse_variable_block (token&, token_type&,
- const target_type* = nullptr,
- string = string ());
+ optional<pattern_type> = {},
+ const target_type* = nullptr,
+ string = {},
+ const location& = {});
void
parse_recipe (token&, token_type&,
@@ -223,9 +226,10 @@ namespace build2
parse_variable (token&, token_type&, const variable&, token_type);
void
- parse_type_pattern_variable (token&, token_type&,
- const target_type&, string,
- const variable&, token_type, const location&);
+ parse_type_pattern_variable (
+ token&, token_type&,
+ pattern_type, const target_type&, string, const location&,
+ const variable&, token_type, const location&);
const variable&
parse_variable_name (names&&, const location&);
diff --git a/libbuild2/scope.cxx b/libbuild2/scope.cxx
index 46e3dcd..f2700c4 100644
--- a/libbuild2/scope.cxx
+++ b/libbuild2/scope.cxx
@@ -49,7 +49,7 @@ namespace build2
const scope* s,
const target_key* tk,
const target_key* gk,
- optional<string> n)
+ string n)
{
const value& v (*l);
assert ((v.extra == 1 || v.extra == 2) && v.type == nullptr);
@@ -70,7 +70,7 @@ namespace build2
pair<value&, ulock> entry (
s->target_vars.cache.insert (
ctx,
- make_tuple (&v, tk->type, n && !n->empty () ? move (*n) : *tk->name),
+ make_tuple (&v, tk->type, !n.empty () ? move (n) : *tk->name),
stem,
static_cast<const variable_map::value_data&> (v).version,
var));
@@ -146,7 +146,7 @@ namespace build2
if (l.defined ())
{
if (l->extra != 0) // Prepend/append?
- pre_app (l, s, tk, gk, move (tn));
+ pre_app (l, s, tk, gk, move (*tn));
return make_pair (move (l), d);
}
@@ -164,7 +164,7 @@ namespace build2
if (l.defined ())
{
if (l->extra != 0) // Prepend/append?
- pre_app (l, s, gk, nullptr, move (gn));
+ pre_app (l, s, gk, nullptr, move (*gn));
return make_pair (move (l), d);
}
diff --git a/libbuild2/token.hxx b/libbuild2/token.hxx
index faae466..fca888c 100644
--- a/libbuild2/token.hxx
+++ b/libbuild2/token.hxx
@@ -120,7 +120,8 @@ namespace build2
// Quoting can be complete, where the token starts and ends with the quote
// characters and quoting is contiguous or partial where only some part(s)
// of the token are quoted or quoting continues to the next token. We also
- // keep track whether the first character of a token is quoted.
+ // keep track whether the first character of a token is quoted (we also
+ // treat escaped first character as quoted).
//
quote_type qtype;
bool qcomp;
diff --git a/libbuild2/types.hxx b/libbuild2/types.hxx
index dd82ef1..8dfda6e 100644
--- a/libbuild2/types.hxx
+++ b/libbuild2/types.hxx
@@ -16,6 +16,7 @@
#include <map>
#include <array>
#include <tuple>
+#include <regex>
#include <vector>
#include <string>
#include <memory> // unique_ptr, shared_ptr
@@ -44,6 +45,7 @@
#include <libbutl/path.mxx>
#include <libbutl/path-map.mxx>
+#include <libbutl/regex.mxx>
#include <libbutl/sha256.mxx>
#include <libbutl/process.mxx>
#include <libbutl/fdstream.mxx>
@@ -105,6 +107,14 @@ namespace build2
using std::endl;
using std::streamsize; // C++'s ssize_t.
+ // Regex.
+ //
+ // Note that <libbutl/regex.mxx> includes an ostream insertion operator for
+ // regex_error which prints cleaned up message, if any.
+ //
+ using std::regex;
+ using std::regex_error;
+
// Concurrency.
//
using std::atomic;
diff --git a/libbuild2/variable.cxx b/libbuild2/variable.cxx
index 3e3bf05..1855f3e 100644
--- a/libbuild2/variable.cxx
+++ b/libbuild2/variable.cxx
@@ -1796,6 +1796,51 @@ namespace build2
return m_.erase (var) != 0;
}
+ // variable_pattern_map
+ //
+ variable_map& variable_pattern_map::
+ insert (pattern_type type, string&& text)
+ {
+ auto r (map_.emplace (pattern {type, false, move (text), {}},
+ variable_map (ctx, global_)));
+
+ // Compile the regex.
+ //
+ if (r.second && type == pattern_type::regex_pattern)
+ {
+ // On exception restore the text argument (so that it's available for
+ // diagnostics) and remove the element from the map.
+ //
+ auto eg (make_exception_guard (
+ [&text, &r, this] ()
+ {
+ text = r.first->first.text;
+ map_.erase (r.first);
+ }));
+
+ const string& t (r.first->first.text);
+ size_t n (t.size ()), p (t.rfind (t[0]));
+
+ // Convert flags.
+ //
+ regex::flag_type f (regex::ECMAScript);
+ for (size_t i (p + 1); i != n; ++i)
+ {
+ switch (t[i])
+ {
+ case 'i': f |= regex::icase; break;
+ case 'e': r.first->first.match_ext = true; break;
+ }
+ }
+
+ // Skip leading delimiter as well as trailing delimiter and flags.
+ //
+ r.first->first.regex = regex (t.c_str () + 1, p - 1, f);
+ }
+
+ return r.first->second;
+ }
+
// variable_type_map
//
lookup variable_type_map::
@@ -1805,6 +1850,8 @@ namespace build2
{
// Compute and cache "effective" name that we will be matching.
//
+ // See also the additional match_ext logic below.
+ //
auto name = [&tk, &oname] () -> const string&
{
if (!oname)
@@ -1856,24 +1903,40 @@ namespace build2
if (i == end ())
continue;
- // Try to match the pattern, starting from the longest values
- // so that the more "specific" patterns (i.e., those that cover
- // fewer characters with the wildcard) take precedence. See
- // tests/variable/type-pattern.
+ // Try to match the pattern, starting from the longest values.
//
const variable_pattern_map& m (i->second);
-
for (auto j (m.rbegin ()); j != m.rend (); ++j)
{
- const string& pat (j->first);
+ using pattern = variable_pattern_map::pattern;
+ using pattern_type = variable_pattern_map::pattern_type;
- //@@ TODO: should we detect ambiguity? 'foo-*' '*-foo' and 'foo-foo'?
- // Right now the last defined will be used.
- //
- if (pat != "*")
+ const pattern& pat (j->first);
+
+ bool r, e (false);
+ if (pat.type == pattern_type::path)
+ {
+ r = pat.text == "*" || butl::path_match (name (), pat.text);
+ }
+ else
{
- if (!butl::path_match (name (), pat))
- continue;
+ const string& n (name ());
+
+ // Deal with match_ext: first see if the extension would be added by
+ // default. If not, then temporarily add it in oname and then clean
+ // it up if there is no match (to prevent another pattern from using
+ // it). While we may keep adding it if there are multiple patterns
+ // with such a flag, we will at least reuse the buffer in oname.
+ //
+ e = pat.match_ext && tk.ext && !tk.ext->empty () && oname->empty ();
+ if (e)
+ {
+ *oname = *tk.name;
+ *oname += '.';
+ *oname += *tk.ext;
+ }
+
+ r = regex_match (e ? *oname : n, *pat.regex);
}
// Ok, this pattern matches. But is there a variable?
@@ -1882,8 +1945,9 @@ namespace build2
// to automatically type it. And if it is assignment, then typify it
// ourselves.
//
- const variable_map& vm (j->second);
+ if (r)
{
+ const variable_map& vm (j->second);
auto p (vm.lookup (var, false));
if (const variable_map::value_data* v = p.first)
{
@@ -1895,12 +1959,15 @@ namespace build2
// Make sure the effective name is computed if this is
// append/prepend (it is used as a cache key).
//
- if (v->extra != 0)
+ if (v->extra != 0 && !oname)
name ();
return lookup (*v, p.second, vm);
}
}
+
+ if (e)
+ oname->clear ();
}
}
diff --git a/libbuild2/variable.hxx b/libbuild2/variable.hxx
index a272013..573f968 100644
--- a/libbuild2/variable.hxx
+++ b/libbuild2/variable.hxx
@@ -1740,17 +1740,69 @@ namespace build2
class variable_pattern_map
{
public:
- using map_type = map<string, variable_map>;
+ using pattern_type = name::pattern_type;
+
+ // We use the map to keep the patterns in the shortest-first order. This
+ // is used during match where we starting from the longest values so that
+ // the more "specific" patterns (i.e., those that cover fewer characters
+ // with the wildcard) take precedence.
+ //
+ // Note that this is only an approximation (e.g., `*[0-9]` vs `[0-9]`) but
+ // it's sufficient in practice (e.g., `*` vs `*.txt`). We also have the
+ // ambiguity problem (e.g., `foo-foo` matching both `foo-*` and `*-foo`).
+ //
+ // And, of course, this doesn't apply accross pattern types so we always
+ // treat regex patterns as more specific than path patterns.
+ //
+ // While it feels like this should be a union (with pattern_type as the
+ // discriminator), we need to keep the original regex text for dumping.
+ // So we just keep optional<regex> which is absent for path patterns (it's
+ // optional since a default-constructed regex has a pattern). BTW, the
+ // size of std::regex object ranges between 32 and 64 bytes, depending on
+ // the implementation.
+ //
+ struct pattern
+ {
+ pattern_type type;
+ mutable bool match_ext; // Match extension flag.
+ string text;
+ mutable optional<build2::regex> regex;
+ };
+
+ struct pattern_compare
+ {
+ bool operator() (const pattern& x, const pattern& y) const
+ {
+ return x.type != y.type
+ ? x.type == pattern_type::path
+ : (x.text.size () != y.text.size ()
+ ? x.text.size () < y.text.size ()
+ : x.text < y.text);
+ }
+ };
+
+ using map_type = map<pattern, variable_map, pattern_compare>;
using const_iterator = map_type::const_iterator;
using const_reverse_iterator = map_type::const_reverse_iterator;
variable_pattern_map (context& c, bool global)
: ctx (c), global_ (global) {}
+ // Note that here we assume the "outer" pattern format (delimiters, flags,
+ // etc) is valid.
+ //
+ // Note: may throw regex_error in which case text is preserved.
+ //
+ variable_map&
+ insert (pattern_type type, string&& text);
+
+ // Convenience shortcut or path patterns.
+ //
variable_map&
- operator[] (const string& v)
+ operator[] (string text)
{
- return map_.emplace (v, variable_map (ctx, global_)).first->second;
+ return map_.emplace (pattern {pattern_type::path, false, move (text), {}},
+ variable_map (ctx, global_)).first->second;
}
const_iterator begin () const {return map_.begin ();}
@@ -1769,7 +1821,7 @@ namespace build2
{
public:
using map_type = map<reference_wrapper<const target_type>,
- variable_pattern_map>;
+ variable_pattern_map>;
using const_iterator = map_type::const_iterator;
variable_type_map (context& c, bool global): ctx (c), global_ (global) {}
@@ -1785,6 +1837,10 @@ namespace build2
const_iterator end () const {return map_.end ();}
bool empty () const {return map_.empty ();}
+ // If found append/prepend then name is guaranteed to either contain the
+ // full name that was used for the match or be empty in which case the
+ // orginal target name was used.
+ //
lookup
find (const target_key&, const variable&, optional<string>& name) const;
diff --git a/old-tests/variable/type-pattern/buildfile b/old-tests/variable/type-pattern/buildfile
deleted file mode 100644
index dd218ac..0000000
--- a/old-tests/variable/type-pattern/buildfile
+++ /dev/null
@@ -1,39 +0,0 @@
-#dir/foo{*}: x = y # directory
-#foo{*.*}: x = y # multiple wildcards
-#foo{*}: x = y # unknown target type
-#file{*}: x += y # append
-
-# Use --verbose 6 to examine.
-#
-
-dir{*}: x = y
-
-x = z
-dir{*-foo}: x = $x # 'z'
-
-x = G
-file{*-foo}: x = x
-file{xfoo}: x = $x # 'G'
-file{-foo}: x = $x # 'x'
-file{x-foo}: x = $x # 'x'
-file{bar-*-foo}: x = X
-file{bar-x}: x = $x # 'G'
-file{bar--foo}: x = $x # 'X'
-file{bar-x-foo}: x = $x # 'X'
-
-file{*-fox}: x = 1
-file{fox-*}: x = 2
-file{fox-fox}: x = $x # '2'
-file{*-fox}: x = 3
-file{fox-x-fox}: x = $x # still '2'!
-
-*-foz: x = z # any target
-file{x-foz}: x = $x # 'z'
-
-# These should all be the same.
-#
-*: x1 = X1
-{*}: x2 = X2
-*{*}: x3 = X3
-
-./:
diff --git a/tests/variable/target-type-pattern-specific/testscript b/tests/variable/target-type-pattern-specific/testscript
index 1a3e98a..9962342 100644
--- a/tests/variable/target-type-pattern-specific/testscript
+++ b/tests/variable/target-type-pattern-specific/testscript
@@ -19,6 +19,65 @@ X
y Y
EOO
+: old-tests-type-pattern
+:
+$* <<EOI >>EOO
+dir{*}: x = y
+
+x = z
+dir{*-foo}: x = $x # 'z'
+print $(bar-foo/: x)
+
+x = G
+file{*-foo}: x = x
+file{xfoo}: x = $x # 'G'
+print $(file{xfoo}: x)
+file{-foo}: x = $x # 'x'
+print $(file{-foo}: x)
+file{x-foo}: x = $x # 'x'
+print $(file{x-foo}: x)
+file{bar-*-foo}: x = X
+file{bar-x}: x = $x # 'G'
+print $(file{bar-x}: x)
+file{bar--foo}: x = $x # 'X'
+print $(file{bar--foo}: x)
+file{bar-x-foo}: x = $x # 'X'
+print $(file{bar-x-foo}: x)
+
+file{*-fox}: x = 1
+file{fox-*}: x = 2
+file{fox-fox}: x = $x # '2'
+print $(file{fox-fox}: x)
+file{*-fox}: x = 3
+file{fox-x-fox}: x = $x # still '2'!
+print $(file{fox-fox}: x)
+
+*-foz: x = z # any target
+file{x-foz}: x = $x # 'z'
+print $(file{x-foz}: x)
+
+*: x1 = X1
+{*}: x2 = X2
+*{*}: x3 = X3
+print $(file{x}: x1)
+print $(file{x}: x2)
+print $(file{x}: x3)
+EOI
+z
+G
+x
+x
+G
+X
+X
+2
+2
+z
+X1
+X2
+X3
+EOO
+
: block
:
$* <<EOI >>EOO
@@ -55,3 +114,71 @@ EOI
X
y Y
EOO
+
+: regex
+:
+{
+ : flag-icase
+ :
+ $* <<EOI >>EOO
+ file{~/'.+\.txt'/i}: x = 1
+
+ print $(file{foo.txt}: x)
+ print $(file{foo.TXT}: x)
+ EOI
+ 1
+ 1
+ EOO
+
+ : flag-match-ext
+ :
+ $* <<EOI >>EOO
+ define txt: file
+
+ txt{*}: x = 0
+ txt{~/'[^.]+'/}: x = 1
+ txt{~/'.+\.tx'/e}: x = 2
+ txt{~/'.+\.txt'/e}: x = 3
+
+ print $(txt{foo.x}: x)
+ print $(txt{foo.tx}: x)
+ print $(txt{foo.txt}: x)
+ print $(txt{foo.bar...}: x)
+ EOI
+ 1
+ 2
+ 3
+ 0
+ EOO
+
+ : backref
+ :
+ $* <<EOI >>EOO
+ x = 0
+ file{~/'(.+)-\1'/}: x = 1
+
+ print $(file{foo-foo}: x)
+ print $(file{foo-bar}: x)
+ EOI
+ 1
+ 0
+ EOO
+
+ : dir
+ :
+ $* <<EOI >>EOO
+ foo/dir{~/b.+/}: x = 1
+
+ print $(foo/dir{bar}: x)
+ EOI
+ 1
+ EOO
+
+ : invalid
+ :
+ $* <<EOI 2>>~/EOE/ != 0
+ file{~/'(.+'/}: x = 1
+ EOI
+ /<stdin>:1:1: error: invalid regex pattern .+/
+ EOE
+}