diff options
author | Boris Kolpackov <boris@codesynthesis.com> | 2020-11-12 14:38:12 +0200 |
---|---|---|
committer | Karen Arutyunov <karen@codesynthesis.com> | 2020-11-17 16:57:00 +0300 |
commit | a68fa2f5c22cbbfc099dd77cccaf44db4cf85730 (patch) | |
tree | a6faf9babcb8e637d09c8061253d8cd52e670d53 | |
parent | 8cc7acfc647ab61eecb8feddbd0cbf5ae270e41e (diff) |
Generalize dot escaping in target name rules
Now triple dot and escape sequence can appear almost anywhere in the target
name (see target::split_name() for details).
-rw-r--r-- | libbuild2/parser.cxx | 2 | ||||
-rw-r--r-- | libbuild2/target.cxx | 316 | ||||
-rw-r--r-- | libbuild2/target.hxx | 7 | ||||
-rw-r--r-- | tests/name/extension.testscript | 243 |
4 files changed, 499 insertions, 69 deletions
diff --git a/libbuild2/parser.cxx b/libbuild2/parser.cxx index 033395d..8676c9d 100644 --- a/libbuild2/parser.cxx +++ b/libbuild2/parser.cxx @@ -4924,7 +4924,7 @@ namespace build2 // Post-process the result: remove extension, reverse target type-specific // pattern/match amendments (essentially: cxx{*} -> *.cxx -> foo.cxx -> - // cxx{foo}), and recombined the result. + // cxx{foo}), and recombine the result. // for (name& n: r) { diff --git a/libbuild2/target.cxx b/libbuild2/target.cxx index df03128..6647f75 100644 --- a/libbuild2/target.cxx +++ b/libbuild2/target.cxx @@ -250,74 +250,279 @@ namespace build2 { assert (!v.empty ()); - // We treat a single trailing dot as "specified no extension", double dots - // as a single trailing dot (that is, an escape sequence which can be - // repeated any number of times; in such cases we naturally assume there - // is no default extension) and triple dots as "unspecified (default) - // extension" (used when the extension in the name is not "ours", for - // example, cxx{foo.test...} for foo.test.cxx). An odd number of dots - // other than one or three is invalid. + // Normally, we treat the rightmost dot as an extension separator (but see + // find_extension() for the exact semantics) and if none exists, then we + // assume the extension is not specified. There are, however, special + // cases that override this rule: // - optional<string> r; + // - We treat triple dots as the "chosen extension separator" (used to + // resolve ambiguity as to which dot is the separator, for example, + // libfoo...u.a). If they are trailing triple dots, then this signifies + // the "unspecified (default) extension" (used when the extension in the + // name is not "ours", for example, cxx{foo.test...} for foo.test.cxx) + // Having multiple triple dots is illegal. + // + // - Otherwise, we treat a single trailing dot as the "specified no + // - extension". + // + // - Finally, double dots are used as an escape sequence to make sure the + // dot is not treated as an extension separator (or as special by any of + // the above rules, for example, libfoo.u..a). In case of trailing + // double dots, we naturally assume there is no default extension. + // + // An odd number of dots other than one or three is illegal. This means, + // in particular, that it's impossible to specify a base/extension pair + // where either the base ends with a dot or the extension begins with one + // (or both). We are ok with that. + // + // Dot-only sequences are illegal. Note though, that dir{.} and dir{..} + // are handled ad hoc outside this function and are valid. + + // Note that we cannot unescape dots in-place before we validate the name + // since it can be required for diagnostics. Thus, the plan is as follows: + // + // - Iterate right to left, searching for the extension dot, validating + // the name, and checking if any dots are escaped. + // + // - Split the name. + // + // - Unescape the dots in the name and/or extension, if required. + + // Search for an extension dot, validate the name, and check for escape + // sequences. + // + optional<size_t> edp; // Extension dot position. + size_t edn (0); // Extension dot representation lenght (1 or 3). - size_t p; - if (v.back () != '.') + bool escaped (false); + bool dot_only (true); + size_t n (v.size ()); + + // Iterate right to left until the beginning of the string or a directory + // separator is encountered. + // + // At the end of the loop p will point to the beginning of the leaf. + // + size_t p (n - 1); + + for (;; --p) { - if ((p = path::traits_type::find_extension (v)) != string::npos) - r = string (v.c_str () + p + 1); + char c (v[p]); + + if (c == '.') + { + // Find the first dot in the sequence. + // + size_t i (p); + for (; i != 0 && v[i - 1] == '.'; --i) ; + + size_t sn (p - i + 1); // Sequence length. + + if (sn == 3) // Triple dots? + { + if (edp && edn == 3) + fail (loc) << "multiple triple dots in target name '" << v << "'"; + + edp = i; + edn = 3; + } + else if (sn == 1) // Single dot? + { + if (!edp) + { + edp = i; + edn = 1; + } + } + else if (sn % 2 == 0) // Escape sequence? + escaped = true; + else + fail (loc) << "invalid dot sequence in target name '" << v << "'"; + + p = i; // Position to the first dot in the sequence. + } + else if (path::traits_type::is_separator (c)) + { + // Position to the beginning of the leaf and bail out. + // + ++p; + break; + } + else + dot_only = false; + + if (p == 0) + break; } - else + + if (dot_only) + fail (loc) << "invalid target name '" << v << "'"; + + // The leading dot cannot be an extension dot. Thus, the leading triple + // dots are invalid and the leading single dot is not considered as such. + // + if (edp && *edp == p) { - if ((p = v.find_last_not_of ('.')) == string::npos) - fail (loc) << "invalid target name '" << v << "'"; + if (edn == 3) + fail (loc) << "leading triple dots in target name '" << v << "'"; - p++; // Position of the first trailing dot. - size_t n (v.size () - p); // Number of the trailing dots. + edp = nullopt; + } + + // Split the name. + // + optional<string> r; - if (n == 1) + if (edp) + { + if (*edp != n - edn) // Non-trailing dot? + r = string (v, *edp + edn); + else if (edn == 1) // Trailing single dot? r = string (); - else if (n == 3) - ; - else if (n % 2 == 0) + //else if (edn == 3) // Trailing triple dots? + // r = nullopt; + + v.resize (*edp); + } + else if (v.back () == '.') // Trailing escaped dot? + r = string (); + + if (!escaped) + return r; + + // Unescape the dots. + // + auto unescape = [] (string& s, size_t b = 0) + { + size_t n (s.size ()); + for (size_t i (b); i != n; ++i) { - p += n / 2; // Keep half of the dots. - r = string (); + if (s[i] == '.') + { + // Find the end of the dot sequence. + // + size_t j (i + 1); + for (; j != n && s[j] == '.'; ++j) ; + + size_t sn (j - i); // Sequence length. + + // Multiple dots can only represent an escape sequence now. + // + if (sn != 1) + { + assert (sn % 2 == 0); + + size_t dn (sn / 2); // Number of dots to remove. + s.erase (i + dn, dn); + + i += dn - 1; // Position to the last dot in the sequence. + n -= dn; // Adjust string size counter. + } + } } - else - fail (loc) << "invalid trailing dot sequence in target name '" - << v << "'"; - } + }; - if (p != string::npos) - v.resize (p); + unescape (v, p); + + if (r) + unescape (*r); return r; } + // Escape the name according to the rules described in split_name(). The + // idea is that we should be able to roundtrip things. + // + // Note though, that multiple representations can end up with the same + // name, for example libfoo.u..a and libfoo...u.a. We will always resolve + // ambiguity with the triple dot and only escape those dots that otherwise + // can be misinterpreted (dot sequences, etc). + // void target:: combine_name (string& v, const optional<string>& e, bool de) { - if (v.back () == '.') + // Escape all dot sequences since they can be misinterpreted as escape + // sequences and return true if the result contains an unescaped dot that + // can potentially be considered an extension dot. + // + // In the name mode only consider the basename, escape the trailing dot + // (since it can be misinterpreted as the 'no extension' case), and don't + // treat the basename leading dot as the potential extension dot. + // + auto escape = [] (string& s, bool name) -> bool { - assert (e && e->empty ()); + if (s.empty ()) + return false; - size_t p (v.find_last_not_of ('.')); - assert (p != string::npos); + bool r (false); + size_t n (s.size ()); - p++; // Position of the first trailing dot. - size_t n (v.size () - p); // Number of the trailing dots. - v.append (n, '.'); // Double them. - } - else if (e) + // Iterate right to left until the beginning of the string or a + // directory separator is encountered. + // + for (size_t p (n - 1);; --p) + { + char c (s[p]); + + if (c == '.') + { + // Find the first dot in the sequence. + // + size_t i (p); + for (; i != 0 && s[i - 1] == '.'; --i) ; + + size_t sn (p - i + 1); // Sequence length. + + bool esc (sn != 1); // Escape the sequence. + bool ext (sn == 1); // An extension dot, potentially. + + if (name) + { + if (i == n - 1) + esc = true; + + if (ext && (i == 0 || path::traits_type::is_separator (s[i - 1]))) + ext = false; + } + + if (esc) + s.insert (p + 1, sn, '.'); // Double them. + + if (ext) + r = true; + + p = i; // Position to the first dot in the sequence. + } + else if (path::traits_type::is_separator (c)) + { + assert (name); + break; + } + + if (p == 0) + break; + } + + return r; + }; + + bool ed (escape (v, true /* name */)); + + if (v.back () == '.') // Name had (before escaping) trailing dot. { - v += '.'; - v += *e; // Empty or not. + assert (e && e->empty ()); } - else if (de) + else if (e) { - if (path::traits_type::find_extension (v) != string::npos) - v += "..."; + // Separate the name and extension with the triple dots if the extension + // contains potential extension dots. + // + string ext (*e); + v += escape (ext, false /* name */) ? "..." : "."; + v += ext; // Empty or not. } + else if (de && ed) + v += "..."; } // include() @@ -499,6 +704,8 @@ namespace build2 return pair<target&, ulock> (*t, ulock ()); } + static const optional<string> unknown_ext ("?"); + ostream& to_stream (ostream& os, const target_key& k, optional<stream_verbosity> osv) { @@ -530,7 +737,7 @@ namespace build2 if (n) { - os << *k.name; + const optional<string>* ext (nullptr); // NULL or present. // If the extension derivation functions are NULL, then it means this // target type doesn't use extensions. @@ -543,11 +750,30 @@ namespace build2 // if (ev > 0 && (ev > 1 || (k.ext && !k.ext->empty ()))) { - os << '.' << (k.ext ? *k.ext : "?"); + ext = k.ext ? &k.ext : &unknown_ext; } } else assert (!k.ext || k.ext->empty ()); // Unspecified or none. + + // Escape dots in the name/extension to resolve potential ambiguity. + // + if (k.name->find ('.') == string::npos && + (ext == nullptr || (*ext)->find ('.') == string::npos)) + { + os << *k.name; + + if (ext != nullptr) + os << '.' << **ext; + } + else + { + string n (*k.name); + target::combine_name (n, + ext != nullptr ? *ext : nullopt_string, + false /* default_extension */); + os << n; + } } else to_stream (os, diff --git a/libbuild2/target.hxx b/libbuild2/target.hxx index 73363ae..d6e128e 100644 --- a/libbuild2/target.hxx +++ b/libbuild2/target.hxx @@ -767,13 +767,14 @@ namespace build2 } public: - // Split the name leaf into target name (in place) and extension - // (returned). + // Split the name (not necessarily a simple path) into target name (in + // place) and extension (returned). // static optional<string> split_name (string&, const location&); - // Combine the target name and extension into the name leaf. + // Combine the target name (not necessarily a simple path) and + // extension. // // If the target type has the default extension, then "escape" the // existing extension if any. diff --git a/tests/name/extension.testscript b/tests/name/extension.testscript index 72721c2..1583109 100644 --- a/tests/name/extension.testscript +++ b/tests/name/extension.testscript @@ -16,46 +16,238 @@ EOI : unspecified : touch foo.txt; - $* <'./: txt{foo}' + $* <<EOI >>EOO + ./: txt{foo} + print $name(txt{foo}) + print $extension(txt{foo}) + print txt{fo?} + EOI + foo + [null] + txt{foo} + EOO : specified : touch foo.text; - $* <'./: txt{foo.text}' + $* <<EOI >>EOO + ./: txt{foo.text} + print $name(txt{foo.text}) + print $extension(txt{foo.text}) + print txt{fo?.text} + EOI + foo + text + txt{foo.text} + EOO - : specified-none + : specified-triple : - touch foo; - $* <'./: txt{foo.}' + touch fo.o.text; + $* <<EOI >>EOO + ./: txt{fo...o.text} + print $name(txt{fo...o.text}) + print $extension(txt{fo...o.text}) + print txt{f?...o.text} + EOI + fo + o.text + txt{fo...o.text} + EOO - : specified-default + : multiple-triples : - touch foo.testscript.txt; - $* <'./: txt{foo.testscript...}' + $* <'./: txt{f...o...o}' 2>>EOE != 0 + <stdin>:1:5: error: multiple triple dots in target name 'f...o...o' + EOE - # Trailing dots are not allowed on Windows. - # + : multiple-singles + : + touch fo.o.text; + $* <<EOI >>EOO + ./: txt{fo.o.text} + print $name(txt{fo.o.text}) + print $extension(txt{fo.o.text}) + print txt{f?.o.text} + EOI + fo.o + text + txt{fo.o.text} + EOO - : specified-escape-one + : escape-basename + : + touch fo.o.text; + $* <<EOI >>EOO + ./: txt{fo..o.text} + print $name(txt{fo..o.text}) + print $extension(txt{fo..o.text}) + print txt{f?..o.text} + EOI + fo.o + text + txt{fo.o.text} + EOO + + : escape-extension + : + touch fo.o.text; + $* <<EOI >>EOO + ./: txt{fo.o..text} + print $name(txt{fo.o..text}) + print $extension(txt{fo.o..text}) + print txt{f?.o..text} + EOI + fo + o.text + txt{fo...o.text} + EOO + + : trailing : - if ($cxx.target.class != 'windows') { - touch foo.; - $* <'./: txt{foo..}' + : no-extension + : + touch foo; + $* <<EOI >>EOO + ./: txt{foo.} + print $name(txt{foo.}) + print $extension(txt{foo.}) + print txt{fo?.} + EOI + foo + + txt{foo.} + EOO + + : dot-no-extension + : + touch f.oo; + $* <<EOI >>EOO + ./: txt{f.oo.} + print $name(txt{f.oo.}) + print $extension(txt{f.oo.}) + print txt{f.o?.} + EOI + f.oo + + txt{f.oo.} + EOO + + : default-extension + : + touch foo.testscript.txt; + $* <<EOI >>EOO + ./: txt{foo.testscript...} + print $name(txt{foo.testscript...}) + print $extension(txt{foo.testscript...}) + print txt{fo?.testscript...} + EOI + foo.testscript + [null] + txt{foo.testscript...} + EOO + + # Trailing dots are not allowed on Windows. + # + + : escape-one + : + if ($cxx.target.class != 'windows') + { + touch foo.; + $* <<EOI >>EOO + ./: txt{foo..} + print $name(txt{foo..}) + print $extension(txt{foo..}) + print txt{fo?..} + EOI + foo. + + txt{foo..} + EOO + } + + : escape-two + : + if ($cxx.target.class != 'windows') + { + touch foo..; + $* <<EOI >>EOO + ./: txt{foo....} + print $name(txt{foo....}) + print $extension(txt{foo....}) + print txt{fo?....} + EOI + foo.. + + txt{foo....} + EOO + } } - : specified-escape-two + : leading : - if ($cxx.target.class != 'windows') { - touch foo..; - $* <'./: txt{foo....}' + : single + : + touch .foo.txt; + $* <<EOI >>EOO + ./: txt{.foo} + print $name(txt{.foo}) + print $extension(txt{.foo}) + print txt{.fo?} + EOI + .foo + [null] + txt{.foo} + EOO + + : triple + : + $* <'./: txt{...foo}' 2>>EOE != 0 + <stdin>:1:5: error: leading triple dots in target name '...foo' + EOE + + : escape-one + : + touch .foo.txt; + $* <<EOI >>EOO + ./: txt{..foo} + print $name(txt{..foo}) + print $extension(txt{..foo}) + print txt{..fo?} + EOI + .foo + [null] + txt{.foo} + EOO + + : escape-two + : + touch ..foo.txt; + $* <<EOI >>EOO + ./: txt{....foo} + print $name(txt{....foo}) + print $extension(txt{....foo}) + print txt{....fo?} + EOI + ..foo + [null] + txt{....foo} + EOO } : specified-invalid : - $* <'./: txt{foo.....}' 2>>EOE != 0 - <stdin>:1:5: error: invalid trailing dot sequence in target name 'foo.....' + $* <'./: txt{fo.....o}' 2>>EOE != 0 + <stdin>:1:5: error: invalid dot sequence in target name 'fo.....o' + EOE + + : specified-dot-only + : + $* <'./: txt{..}' 2>>EOE != 0 + <stdin>:1:5: error: invalid target name '..' EOE } @@ -72,6 +264,17 @@ EOI touch foo.testscript.txt; $* <'print txt{fo?.testscript...}' >'txt{foo.testscript...}' + : dir + : + touch foo.txt; + $* <'print txt{./f*}' >'txt{./foo}' + + : dir-leading-triple + : + $* <'print txt{./...f*}' 2>>EOE != 0 + <stdin>:1:11: error: leading triple dots in target name './...f*' + EOE + # Trailing dots are not allowed on Windows. # |