From d115cc49a5f91c9c547a7a4d27323f6ccc959da3 Mon Sep 17 00:00:00 2001 From: Boris Kolpackov Date: Thu, 6 Jun 2019 12:22:33 +0200 Subject: Add path::abnormalities(), note on normalization and symlinks --- libbutl/path.ixx | 129 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ libbutl/path.mxx | 81 +++++++++++++++++++++------------- 2 files changed, 181 insertions(+), 29 deletions(-) diff --git a/libbutl/path.ixx b/libbutl/path.ixx index 9622a81..7786fbc 100644 --- a/libbutl/path.ixx +++ b/libbutl/path.ixx @@ -4,6 +4,120 @@ LIBBUTL_MODEXPORT namespace butl //@@ MOD Clang needs this for some reason. { + // path_abnormality + // + + inline path_abnormality + operator& (path_abnormality x, path_abnormality y) + { + return x &= y; + } + + inline path_abnormality + operator| (path_abnormality x, path_abnormality y) + { + return x |= y; + } + + inline path_abnormality + operator&= (path_abnormality& x, path_abnormality y) + { + return x = static_cast ( + static_cast (x) & + static_cast (y)); + } + + inline path_abnormality + operator|= (path_abnormality& x, path_abnormality y) + { + return x = static_cast ( + static_cast (x) | + static_cast (y)); + } + + // path_traits + // + template + inline bool path_traits:: + normalized (const C* s, size_type n, bool sep) + { + // An early-return version of abnormalities(). + // + size_t j (0); // Beginning of path component. + + for (size_t i (0); i != n; ++i) + { + char c (s[i]); + + if (is_separator (c)) + { + if (sep && c != directory_separator) + return false; + + const char* p (s + j); + size_t m (i - j); + j = i + 1; + + if (j != n && is_separator (s[j])) + return false; + + if (parent (p, m) || current (p, m)) + return false; + } + } + + // Last component. + // + const char* p (s + j); + size_t m (n - j); + + return !(parent (p, m) || current (p, m)); + } + + template + inline path_abnormality path_traits:: + abnormalities (const C* s, size_type n) + { + path_abnormality r (path_abnormality::none); + + size_t j (0); // Beginning of path component. + + for (size_t i (0); i != n; ++i) + { + char c (s[i]); + + if (is_separator (c)) + { + if (c != directory_separator) + r |= path_abnormality::separator; + + const char* p (s + j); + size_t m (i - j); + j = i + 1; + + if (j != n && is_separator (s[j])) + r |= path_abnormality::separator; + + if (parent (p, m)) + r |= path_abnormality::parent; + else if (current (p, m)) + r |= path_abnormality::current; + } + } + + // Last component. + // + const char* p (s + j); + size_t m (n - j); + + if (parent (p, m)) + r |= path_abnormality::parent; + else if (current (p, m)) + r |= path_abnormality::current; + + return r; + } + #ifdef _WIN32 template <> inline char path_traits:: @@ -20,6 +134,9 @@ LIBBUTL_MODEXPORT namespace butl //@@ MOD Clang needs this for some reason. } #endif + // path + // + template inline basic_path path_cast_impl (const basic_path& p, basic_path*) @@ -92,6 +209,18 @@ LIBBUTL_MODEXPORT namespace butl //@@ MOD Clang needs this for some reason. } template + inline path_abnormality basic_path:: + abnormalities () const + { + path_abnormality r (traits_type::abnormalities (this->path_)); + + if (this->tsep_ > 1) + r |= path_abnormality::separator; + + return r; + } + + template inline bool basic_path:: root () const { diff --git a/libbutl/path.mxx b/libbutl/path.mxx index 2de85e9..685e15a 100644 --- a/libbutl/path.mxx +++ b/libbutl/path.mxx @@ -11,6 +11,7 @@ #ifndef __cpp_lib_modules_ts #include #include // ptrdiff_t +#include // uint16_t #include // move(), swap() #include #include // invalid_argument @@ -76,6 +77,19 @@ LIBBUTL_MODEXPORT namespace butl invalid_basic_path (const string_type& p): path (p) {} }; + enum class path_abnormality: std::uint16_t + { + none = 0x00, // Path is normal. + separator = 0x01, // Wrong or multiple consequitive directory separators. + current = 0x02, // Contains current directory (`.`) component. + parent = 0x04 // Contains parent directory (`..`) component. + }; + + inline path_abnormality operator& (path_abnormality, path_abnormality); + inline path_abnormality operator| (path_abnormality, path_abnormality); + inline path_abnormality operator&= (path_abnormality&, path_abnormality); + inline path_abnormality operator|= (path_abnormality&, path_abnormality); + // The only currently available specialization is for the char type. // template @@ -201,39 +215,23 @@ LIBBUTL_MODEXPORT namespace butl } static bool - normalized (const C* s, size_type n, bool sep) - { - size_t j (0); // Beginning of path component. - - for (size_t i (0); i != n; ++i) - { - char c (s[i]); - - if (is_separator (c)) - { - if (sep && c != directory_separator) - return false; - - const char* p (s + j); - size_t m (i - j); - j = i + 1; - - if (j != n && is_separator (s[j])) - return false; - - if (current (p, m) || parent (p, m)) - return false; - } - } + normalized (const C*, size_type, bool); - // Last component. - // - const char* p (s + j); - size_t m (n - j); + static path_abnormality + abnormalities (const string_type& s) + { + return abnormalities (s.c_str (), s.size ()); + } - return !(current (p, m) || parent (p, m)); + static path_abnormality + abnormalities (const C* s) + { + return abnormalities (s, char_traits_type::length (s)); } + static path_abnormality + abnormalities (const C*, size_type); + static bool root (const string_type& s) { @@ -802,6 +800,12 @@ LIBBUTL_MODEXPORT namespace butl bool normalized (bool sep = true) const; + // Similar to normalized() but return details on what renders the path + // abnormal. + // + path_abnormality + abnormalities () const; + // Test, based on the presence/absence of the trailing separator, if the // path is to a directory. // @@ -1058,6 +1062,25 @@ LIBBUTL_MODEXPORT namespace butl // normalized() will still return false (for example, ../foo/../ which // will be normalized to ../). // + // Note also that on POSIX the parent directory ('..') components are + // resolved relative to a symlink target. As a result, it's possible to + // construct a valid path that this function will either consider as + // invalid or produce a path that points to an incorrect filesystem entry + // (it's also possible that it returns the correct path by accident). For + // example: + // + // /tmp/sym/../../../ -> (should be /tmp) + // | + // /tmp/sub1/sub2/tgt + // + // /tmp/sym/../../ -> / (should be /tmp/sub1) + // | + // /tmp/sub1/sub2/tgt + // + // The common property of such paths is '..' crossing symlink boundaries + // and it's impossible to normalize them without touching the filesystem + // *and* resolving their symlink components (see realize() below). + // basic_path& normalize (bool actual = false, bool cur_empty = false); -- cgit v1.1