aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBoris Kolpackov <boris@codesynthesis.com>2020-11-12 14:38:12 +0200
committerKaren Arutyunov <karen@codesynthesis.com>2020-11-17 16:57:00 +0300
commita68fa2f5c22cbbfc099dd77cccaf44db4cf85730 (patch)
treea6faf9babcb8e637d09c8061253d8cd52e670d53
parent8cc7acfc647ab61eecb8feddbd0cbf5ae270e41e (diff)
Generalize dot escaping in target name rules
Now triple dot and escape sequence can appear almost anywhere in the target name (see target::split_name() for details).
-rw-r--r--libbuild2/parser.cxx2
-rw-r--r--libbuild2/target.cxx316
-rw-r--r--libbuild2/target.hxx7
-rw-r--r--tests/name/extension.testscript243
4 files changed, 499 insertions, 69 deletions
diff --git a/libbuild2/parser.cxx b/libbuild2/parser.cxx
index 033395d..8676c9d 100644
--- a/libbuild2/parser.cxx
+++ b/libbuild2/parser.cxx
@@ -4924,7 +4924,7 @@ namespace build2
// Post-process the result: remove extension, reverse target type-specific
// pattern/match amendments (essentially: cxx{*} -> *.cxx -> foo.cxx ->
- // cxx{foo}), and recombined the result.
+ // cxx{foo}), and recombine the result.
//
for (name& n: r)
{
diff --git a/libbuild2/target.cxx b/libbuild2/target.cxx
index df03128..6647f75 100644
--- a/libbuild2/target.cxx
+++ b/libbuild2/target.cxx
@@ -250,74 +250,279 @@ namespace build2
{
assert (!v.empty ());
- // We treat a single trailing dot as "specified no extension", double dots
- // as a single trailing dot (that is, an escape sequence which can be
- // repeated any number of times; in such cases we naturally assume there
- // is no default extension) and triple dots as "unspecified (default)
- // extension" (used when the extension in the name is not "ours", for
- // example, cxx{foo.test...} for foo.test.cxx). An odd number of dots
- // other than one or three is invalid.
+ // Normally, we treat the rightmost dot as an extension separator (but see
+ // find_extension() for the exact semantics) and if none exists, then we
+ // assume the extension is not specified. There are, however, special
+ // cases that override this rule:
//
- optional<string> r;
+ // - We treat triple dots as the "chosen extension separator" (used to
+ // resolve ambiguity as to which dot is the separator, for example,
+ // libfoo...u.a). If they are trailing triple dots, then this signifies
+ // the "unspecified (default) extension" (used when the extension in the
+ // name is not "ours", for example, cxx{foo.test...} for foo.test.cxx)
+ // Having multiple triple dots is illegal.
+ //
+ // - Otherwise, we treat a single trailing dot as the "specified no
+ // - extension".
+ //
+ // - Finally, double dots are used as an escape sequence to make sure the
+ // dot is not treated as an extension separator (or as special by any of
+ // the above rules, for example, libfoo.u..a). In case of trailing
+ // double dots, we naturally assume there is no default extension.
+ //
+ // An odd number of dots other than one or three is illegal. This means,
+ // in particular, that it's impossible to specify a base/extension pair
+ // where either the base ends with a dot or the extension begins with one
+ // (or both). We are ok with that.
+ //
+ // Dot-only sequences are illegal. Note though, that dir{.} and dir{..}
+ // are handled ad hoc outside this function and are valid.
+
+ // Note that we cannot unescape dots in-place before we validate the name
+ // since it can be required for diagnostics. Thus, the plan is as follows:
+ //
+ // - Iterate right to left, searching for the extension dot, validating
+ // the name, and checking if any dots are escaped.
+ //
+ // - Split the name.
+ //
+ // - Unescape the dots in the name and/or extension, if required.
+
+ // Search for an extension dot, validate the name, and check for escape
+ // sequences.
+ //
+ optional<size_t> edp; // Extension dot position.
+ size_t edn (0); // Extension dot representation lenght (1 or 3).
- size_t p;
- if (v.back () != '.')
+ bool escaped (false);
+ bool dot_only (true);
+ size_t n (v.size ());
+
+ // Iterate right to left until the beginning of the string or a directory
+ // separator is encountered.
+ //
+ // At the end of the loop p will point to the beginning of the leaf.
+ //
+ size_t p (n - 1);
+
+ for (;; --p)
{
- if ((p = path::traits_type::find_extension (v)) != string::npos)
- r = string (v.c_str () + p + 1);
+ char c (v[p]);
+
+ if (c == '.')
+ {
+ // Find the first dot in the sequence.
+ //
+ size_t i (p);
+ for (; i != 0 && v[i - 1] == '.'; --i) ;
+
+ size_t sn (p - i + 1); // Sequence length.
+
+ if (sn == 3) // Triple dots?
+ {
+ if (edp && edn == 3)
+ fail (loc) << "multiple triple dots in target name '" << v << "'";
+
+ edp = i;
+ edn = 3;
+ }
+ else if (sn == 1) // Single dot?
+ {
+ if (!edp)
+ {
+ edp = i;
+ edn = 1;
+ }
+ }
+ else if (sn % 2 == 0) // Escape sequence?
+ escaped = true;
+ else
+ fail (loc) << "invalid dot sequence in target name '" << v << "'";
+
+ p = i; // Position to the first dot in the sequence.
+ }
+ else if (path::traits_type::is_separator (c))
+ {
+ // Position to the beginning of the leaf and bail out.
+ //
+ ++p;
+ break;
+ }
+ else
+ dot_only = false;
+
+ if (p == 0)
+ break;
}
- else
+
+ if (dot_only)
+ fail (loc) << "invalid target name '" << v << "'";
+
+ // The leading dot cannot be an extension dot. Thus, the leading triple
+ // dots are invalid and the leading single dot is not considered as such.
+ //
+ if (edp && *edp == p)
{
- if ((p = v.find_last_not_of ('.')) == string::npos)
- fail (loc) << "invalid target name '" << v << "'";
+ if (edn == 3)
+ fail (loc) << "leading triple dots in target name '" << v << "'";
- p++; // Position of the first trailing dot.
- size_t n (v.size () - p); // Number of the trailing dots.
+ edp = nullopt;
+ }
+
+ // Split the name.
+ //
+ optional<string> r;
- if (n == 1)
+ if (edp)
+ {
+ if (*edp != n - edn) // Non-trailing dot?
+ r = string (v, *edp + edn);
+ else if (edn == 1) // Trailing single dot?
r = string ();
- else if (n == 3)
- ;
- else if (n % 2 == 0)
+ //else if (edn == 3) // Trailing triple dots?
+ // r = nullopt;
+
+ v.resize (*edp);
+ }
+ else if (v.back () == '.') // Trailing escaped dot?
+ r = string ();
+
+ if (!escaped)
+ return r;
+
+ // Unescape the dots.
+ //
+ auto unescape = [] (string& s, size_t b = 0)
+ {
+ size_t n (s.size ());
+ for (size_t i (b); i != n; ++i)
{
- p += n / 2; // Keep half of the dots.
- r = string ();
+ if (s[i] == '.')
+ {
+ // Find the end of the dot sequence.
+ //
+ size_t j (i + 1);
+ for (; j != n && s[j] == '.'; ++j) ;
+
+ size_t sn (j - i); // Sequence length.
+
+ // Multiple dots can only represent an escape sequence now.
+ //
+ if (sn != 1)
+ {
+ assert (sn % 2 == 0);
+
+ size_t dn (sn / 2); // Number of dots to remove.
+ s.erase (i + dn, dn);
+
+ i += dn - 1; // Position to the last dot in the sequence.
+ n -= dn; // Adjust string size counter.
+ }
+ }
}
- else
- fail (loc) << "invalid trailing dot sequence in target name '"
- << v << "'";
- }
+ };
- if (p != string::npos)
- v.resize (p);
+ unescape (v, p);
+
+ if (r)
+ unescape (*r);
return r;
}
+ // Escape the name according to the rules described in split_name(). The
+ // idea is that we should be able to roundtrip things.
+ //
+ // Note though, that multiple representations can end up with the same
+ // name, for example libfoo.u..a and libfoo...u.a. We will always resolve
+ // ambiguity with the triple dot and only escape those dots that otherwise
+ // can be misinterpreted (dot sequences, etc).
+ //
void target::
combine_name (string& v, const optional<string>& e, bool de)
{
- if (v.back () == '.')
+ // Escape all dot sequences since they can be misinterpreted as escape
+ // sequences and return true if the result contains an unescaped dot that
+ // can potentially be considered an extension dot.
+ //
+ // In the name mode only consider the basename, escape the trailing dot
+ // (since it can be misinterpreted as the 'no extension' case), and don't
+ // treat the basename leading dot as the potential extension dot.
+ //
+ auto escape = [] (string& s, bool name) -> bool
{
- assert (e && e->empty ());
+ if (s.empty ())
+ return false;
- size_t p (v.find_last_not_of ('.'));
- assert (p != string::npos);
+ bool r (false);
+ size_t n (s.size ());
- p++; // Position of the first trailing dot.
- size_t n (v.size () - p); // Number of the trailing dots.
- v.append (n, '.'); // Double them.
- }
- else if (e)
+ // Iterate right to left until the beginning of the string or a
+ // directory separator is encountered.
+ //
+ for (size_t p (n - 1);; --p)
+ {
+ char c (s[p]);
+
+ if (c == '.')
+ {
+ // Find the first dot in the sequence.
+ //
+ size_t i (p);
+ for (; i != 0 && s[i - 1] == '.'; --i) ;
+
+ size_t sn (p - i + 1); // Sequence length.
+
+ bool esc (sn != 1); // Escape the sequence.
+ bool ext (sn == 1); // An extension dot, potentially.
+
+ if (name)
+ {
+ if (i == n - 1)
+ esc = true;
+
+ if (ext && (i == 0 || path::traits_type::is_separator (s[i - 1])))
+ ext = false;
+ }
+
+ if (esc)
+ s.insert (p + 1, sn, '.'); // Double them.
+
+ if (ext)
+ r = true;
+
+ p = i; // Position to the first dot in the sequence.
+ }
+ else if (path::traits_type::is_separator (c))
+ {
+ assert (name);
+ break;
+ }
+
+ if (p == 0)
+ break;
+ }
+
+ return r;
+ };
+
+ bool ed (escape (v, true /* name */));
+
+ if (v.back () == '.') // Name had (before escaping) trailing dot.
{
- v += '.';
- v += *e; // Empty or not.
+ assert (e && e->empty ());
}
- else if (de)
+ else if (e)
{
- if (path::traits_type::find_extension (v) != string::npos)
- v += "...";
+ // Separate the name and extension with the triple dots if the extension
+ // contains potential extension dots.
+ //
+ string ext (*e);
+ v += escape (ext, false /* name */) ? "..." : ".";
+ v += ext; // Empty or not.
}
+ else if (de && ed)
+ v += "...";
}
// include()
@@ -499,6 +704,8 @@ namespace build2
return pair<target&, ulock> (*t, ulock ());
}
+ static const optional<string> unknown_ext ("?");
+
ostream&
to_stream (ostream& os, const target_key& k, optional<stream_verbosity> osv)
{
@@ -530,7 +737,7 @@ namespace build2
if (n)
{
- os << *k.name;
+ const optional<string>* ext (nullptr); // NULL or present.
// If the extension derivation functions are NULL, then it means this
// target type doesn't use extensions.
@@ -543,11 +750,30 @@ namespace build2
//
if (ev > 0 && (ev > 1 || (k.ext && !k.ext->empty ())))
{
- os << '.' << (k.ext ? *k.ext : "?");
+ ext = k.ext ? &k.ext : &unknown_ext;
}
}
else
assert (!k.ext || k.ext->empty ()); // Unspecified or none.
+
+ // Escape dots in the name/extension to resolve potential ambiguity.
+ //
+ if (k.name->find ('.') == string::npos &&
+ (ext == nullptr || (*ext)->find ('.') == string::npos))
+ {
+ os << *k.name;
+
+ if (ext != nullptr)
+ os << '.' << **ext;
+ }
+ else
+ {
+ string n (*k.name);
+ target::combine_name (n,
+ ext != nullptr ? *ext : nullopt_string,
+ false /* default_extension */);
+ os << n;
+ }
}
else
to_stream (os,
diff --git a/libbuild2/target.hxx b/libbuild2/target.hxx
index 73363ae..d6e128e 100644
--- a/libbuild2/target.hxx
+++ b/libbuild2/target.hxx
@@ -767,13 +767,14 @@ namespace build2
}
public:
- // Split the name leaf into target name (in place) and extension
- // (returned).
+ // Split the name (not necessarily a simple path) into target name (in
+ // place) and extension (returned).
//
static optional<string>
split_name (string&, const location&);
- // Combine the target name and extension into the name leaf.
+ // Combine the target name (not necessarily a simple path) and
+ // extension.
//
// If the target type has the default extension, then "escape" the
// existing extension if any.
diff --git a/tests/name/extension.testscript b/tests/name/extension.testscript
index 72721c2..1583109 100644
--- a/tests/name/extension.testscript
+++ b/tests/name/extension.testscript
@@ -16,46 +16,238 @@ EOI
: unspecified
:
touch foo.txt;
- $* <'./: txt{foo}'
+ $* <<EOI >>EOO
+ ./: txt{foo}
+ print $name(txt{foo})
+ print $extension(txt{foo})
+ print txt{fo?}
+ EOI
+ foo
+ [null]
+ txt{foo}
+ EOO
: specified
:
touch foo.text;
- $* <'./: txt{foo.text}'
+ $* <<EOI >>EOO
+ ./: txt{foo.text}
+ print $name(txt{foo.text})
+ print $extension(txt{foo.text})
+ print txt{fo?.text}
+ EOI
+ foo
+ text
+ txt{foo.text}
+ EOO
- : specified-none
+ : specified-triple
:
- touch foo;
- $* <'./: txt{foo.}'
+ touch fo.o.text;
+ $* <<EOI >>EOO
+ ./: txt{fo...o.text}
+ print $name(txt{fo...o.text})
+ print $extension(txt{fo...o.text})
+ print txt{f?...o.text}
+ EOI
+ fo
+ o.text
+ txt{fo...o.text}
+ EOO
- : specified-default
+ : multiple-triples
:
- touch foo.testscript.txt;
- $* <'./: txt{foo.testscript...}'
+ $* <'./: txt{f...o...o}' 2>>EOE != 0
+ <stdin>:1:5: error: multiple triple dots in target name 'f...o...o'
+ EOE
- # Trailing dots are not allowed on Windows.
- #
+ : multiple-singles
+ :
+ touch fo.o.text;
+ $* <<EOI >>EOO
+ ./: txt{fo.o.text}
+ print $name(txt{fo.o.text})
+ print $extension(txt{fo.o.text})
+ print txt{f?.o.text}
+ EOI
+ fo.o
+ text
+ txt{fo.o.text}
+ EOO
- : specified-escape-one
+ : escape-basename
+ :
+ touch fo.o.text;
+ $* <<EOI >>EOO
+ ./: txt{fo..o.text}
+ print $name(txt{fo..o.text})
+ print $extension(txt{fo..o.text})
+ print txt{f?..o.text}
+ EOI
+ fo.o
+ text
+ txt{fo.o.text}
+ EOO
+
+ : escape-extension
+ :
+ touch fo.o.text;
+ $* <<EOI >>EOO
+ ./: txt{fo.o..text}
+ print $name(txt{fo.o..text})
+ print $extension(txt{fo.o..text})
+ print txt{f?.o..text}
+ EOI
+ fo
+ o.text
+ txt{fo...o.text}
+ EOO
+
+ : trailing
:
- if ($cxx.target.class != 'windows')
{
- touch foo.;
- $* <'./: txt{foo..}'
+ : no-extension
+ :
+ touch foo;
+ $* <<EOI >>EOO
+ ./: txt{foo.}
+ print $name(txt{foo.})
+ print $extension(txt{foo.})
+ print txt{fo?.}
+ EOI
+ foo
+
+ txt{foo.}
+ EOO
+
+ : dot-no-extension
+ :
+ touch f.oo;
+ $* <<EOI >>EOO
+ ./: txt{f.oo.}
+ print $name(txt{f.oo.})
+ print $extension(txt{f.oo.})
+ print txt{f.o?.}
+ EOI
+ f.oo
+
+ txt{f.oo.}
+ EOO
+
+ : default-extension
+ :
+ touch foo.testscript.txt;
+ $* <<EOI >>EOO
+ ./: txt{foo.testscript...}
+ print $name(txt{foo.testscript...})
+ print $extension(txt{foo.testscript...})
+ print txt{fo?.testscript...}
+ EOI
+ foo.testscript
+ [null]
+ txt{foo.testscript...}
+ EOO
+
+ # Trailing dots are not allowed on Windows.
+ #
+
+ : escape-one
+ :
+ if ($cxx.target.class != 'windows')
+ {
+ touch foo.;
+ $* <<EOI >>EOO
+ ./: txt{foo..}
+ print $name(txt{foo..})
+ print $extension(txt{foo..})
+ print txt{fo?..}
+ EOI
+ foo.
+
+ txt{foo..}
+ EOO
+ }
+
+ : escape-two
+ :
+ if ($cxx.target.class != 'windows')
+ {
+ touch foo..;
+ $* <<EOI >>EOO
+ ./: txt{foo....}
+ print $name(txt{foo....})
+ print $extension(txt{foo....})
+ print txt{fo?....}
+ EOI
+ foo..
+
+ txt{foo....}
+ EOO
+ }
}
- : specified-escape-two
+ : leading
:
- if ($cxx.target.class != 'windows')
{
- touch foo..;
- $* <'./: txt{foo....}'
+ : single
+ :
+ touch .foo.txt;
+ $* <<EOI >>EOO
+ ./: txt{.foo}
+ print $name(txt{.foo})
+ print $extension(txt{.foo})
+ print txt{.fo?}
+ EOI
+ .foo
+ [null]
+ txt{.foo}
+ EOO
+
+ : triple
+ :
+ $* <'./: txt{...foo}' 2>>EOE != 0
+ <stdin>:1:5: error: leading triple dots in target name '...foo'
+ EOE
+
+ : escape-one
+ :
+ touch .foo.txt;
+ $* <<EOI >>EOO
+ ./: txt{..foo}
+ print $name(txt{..foo})
+ print $extension(txt{..foo})
+ print txt{..fo?}
+ EOI
+ .foo
+ [null]
+ txt{.foo}
+ EOO
+
+ : escape-two
+ :
+ touch ..foo.txt;
+ $* <<EOI >>EOO
+ ./: txt{....foo}
+ print $name(txt{....foo})
+ print $extension(txt{....foo})
+ print txt{....fo?}
+ EOI
+ ..foo
+ [null]
+ txt{....foo}
+ EOO
}
: specified-invalid
:
- $* <'./: txt{foo.....}' 2>>EOE != 0
- <stdin>:1:5: error: invalid trailing dot sequence in target name 'foo.....'
+ $* <'./: txt{fo.....o}' 2>>EOE != 0
+ <stdin>:1:5: error: invalid dot sequence in target name 'fo.....o'
+ EOE
+
+ : specified-dot-only
+ :
+ $* <'./: txt{..}' 2>>EOE != 0
+ <stdin>:1:5: error: invalid target name '..'
EOE
}
@@ -72,6 +264,17 @@ EOI
touch foo.testscript.txt;
$* <'print txt{fo?.testscript...}' >'txt{foo.testscript...}'
+ : dir
+ :
+ touch foo.txt;
+ $* <'print txt{./f*}' >'txt{./foo}'
+
+ : dir-leading-triple
+ :
+ $* <'print txt{./...f*}' 2>>EOE != 0
+ <stdin>:1:11: error: leading triple dots in target name './...f*'
+ EOE
+
# Trailing dots are not allowed on Windows.
#