diff options
author | Boris Kolpackov <boris@codesynthesis.com> | 2016-07-26 15:12:54 +0200 |
---|---|---|
committer | Boris Kolpackov <boris@codesynthesis.com> | 2016-07-28 13:51:29 +0200 |
commit | b6f166c4ed98f94bdd2cc82885d61173a101abfd (patch) | |
tree | c6b75cf2efc98624760050173219e977f8620608 /butl | |
parent | 098559ca3552ebd8f80a6d28254f4fa58913b751 (diff) |
Redesign path to store trailing slash for directories
Diffstat (limited to 'butl')
-rw-r--r-- | butl/filesystem.cxx | 2 | ||||
-rw-r--r-- | butl/path | 593 | ||||
-rw-r--r-- | butl/path.cxx | 32 | ||||
-rw-r--r-- | butl/path.ixx | 352 | ||||
-rw-r--r-- | butl/path.txx | 263 | ||||
-rw-r--r-- | butl/process.cxx | 4 |
6 files changed, 817 insertions, 429 deletions
diff --git a/butl/filesystem.cxx b/butl/filesystem.cxx index cabe306..aa9319f 100644 --- a/butl/filesystem.cxx +++ b/butl/filesystem.cxx @@ -146,7 +146,7 @@ namespace butl path ep (p / de.path ()); //@@ Would be good to reuse the buffer. if (de.ltype () == entry_type::directory) - rmdir_r (path_cast<dir_path> (ep), true, ignore_error); + rmdir_r (path_cast<dir_path> (move (ep)), true, ignore_error); else try_rmfile (ep, ignore_error); } @@ -7,7 +7,7 @@ #include <string> #include <cstddef> // ptrdiff_t -#include <utility> // move() +#include <utility> // move(), swap() #include <iterator> #include <exception> #include <functional> // hash @@ -16,63 +16,68 @@ namespace butl { - // Wish list/ideas for improvements. // - // Ability to convert to directory/leaf/base in-place, without dynamic - // allocation. One idea is something like this: + // - posix_representation() in addition to posix_string() + // + // - Ability to convert to directory/leaf/base in-place, without dynamic + // allocation. One idea is something like this: + // + // p -= "/*"; // directory + // p -= "*/"; // leaf + // p -= ".*"; // base // - // p -= "/*"; // directory - // p -= "*/"; // leaf - // p -= ".*"; // base + // - Faster normalize() implementation. + // + // - We duplicate the interface for path and dir_path while most of it + // is common. Also, we can implicit-cast dir_path& to path& and use + // non-dir-adapted implementation (see where we call K::cast()). // - class LIBBUTL_EXPORT invalid_path_base: std::exception + struct LIBBUTL_EXPORT invalid_path_base: public std::exception { - public: virtual char const* what () const throw (); }; template <typename C> - class invalid_basic_path: public invalid_path_base + struct invalid_basic_path: invalid_path_base { - public: - typedef std::basic_string<C> string_type; + using string_type = std::basic_string<C>; - invalid_basic_path (C const* p): path_ (p) {} - invalid_basic_path (string_type const& p): path_ (p) {} - ~invalid_basic_path () throw () {} + string_type path; - string_type const& - path () const - { - return path_; - } - - private: - string_type path_; + invalid_basic_path (const C* p): path (p) {} + invalid_basic_path (const string_type& p): path (p) {} }; template <typename C> struct path_traits { - typedef std::basic_string<C> string_type; - typedef typename string_type::size_type size_type; + using string_type = std::basic_string<C>; + using size_type = typename string_type::size_type; // Canonical directory and path seperators. // #ifdef _WIN32 - static C const directory_separator = '\\'; - static C const path_separator = ';'; + static const C directory_separator = '\\'; + static const C path_separator = ';'; #else static C const directory_separator = '/'; static C const path_separator = ':'; #endif - // Directory separator tests. On some platforms there - // could be multiple seperators. For example, on Windows - // we check for both '/' and '\'. + // Canonical and alternative directory separators. Canonical should be + // first. + // +#ifdef _WIN32 + static constexpr const char* const directory_separators = "\\/"; +#else + static constexpr const char* const directory_separators = "/"; +#endif + + // Directory separator tests. On some platforms there could be multiple + // seperators. For example, on Windows we check for both '/' and '\'. // static bool is_separator (C c) @@ -84,10 +89,28 @@ namespace butl #endif } + // Return 1-based index in directory_separators string or 0 if not a + // separator. + // + static size_type + separator_index (C c) + { +#ifdef _WIN32 + return c == '\\' ? 1 : c == '/' ? 2 : 0; +#else + return c == '/' ? 1 : 0; +#endif + } + static size_type - find_separator (string_type const& s, size_type pos = 0) + find_separator (string_type const& s, + size_type pos = 0, + size_type n = string_type::npos) { - const C* r (find_separator (s.c_str () + pos, s.size () - pos)); + if (n == string_type::npos) + n = s.size (); + + const C* r (find_separator (s.c_str () + pos, n - pos)); return r != nullptr ? r - s.c_str () : string_type::npos; } @@ -223,116 +246,216 @@ namespace butl #endif }; - template <typename C> - class invalid_basic_path; - + // This implementation of a filesystem path has two types: path, which can + // represent any path (file, directory, etc.) and dir_path, which is derived + // from path. The internal representation of directories maintains a + // trailing slash. However, it is ignored in path comparison, size, and + // string spelling. For example: + // + // path p1 ("foo"); // File path. + // path p2 ("bar/"); // Directory path. + // + // path p3 (p1 / p2); // Throw: p1 is not a directory. + // path p4 (p2 / p1); // Ok, file "bar/foo". + // path p5 (p2 / p2); // Ok, directory "bar/bar/". + // + // dir_path d1 ("foo"); // Directory path "foo/". + // dir_path d2 ("bar\\"); // Directory path "bar\". + // + // dir_path d3 (d2 / d1); // "bar\\foo/" + // + // (p4 == d3); // true + // d3.string (); // "bar\\foo" + // d3.representation (); // "bar\\foo/" + // template <typename C, typename K> class basic_path; - // Cast from one path kind to another without any checking or - // processing. + template <typename C> struct any_path_kind; + template <typename C> struct dir_path_kind; + + using path = basic_path<char, any_path_kind<char>>; + using dir_path = basic_path<char, dir_path_kind<char>>; + using invalid_path = invalid_basic_path<char>; + + // Cast from one path kind to another. Note that no checking is performed + // (e.g., that there is a trailing slash if casting to dir_path) but the + // representation is adjusted if necessary (e.g., the trailing slash is + // added to dir_path if missing). // template <class P, class C, class K> P path_cast (const basic_path<C, K>&); template <class P, class C, class K> P path_cast (basic_path<C, K>&&); + // Low-level path data storage. It is also by the implementation to pass + // around initialized/valid paths. + // template <typename C> - class path_data; + struct path_data + { + using string_type = std::basic_string<C>; + using size_type = typename string_type::size_type; + using difference_type = typename string_type::difference_type; - template <typename C> - struct dir_path_kind; + // The idea is as follows: path_ is always the "traditional" form; that + // is, "/" for the root directory and "/tmp" (no trailing slash) for the + // rest. This means we can return/store references to path_. + // + // Then we have diff_ which is the size difference between path_ and its + // "pure" part, that is, without any trailing slashes, even for "/". So: + // + // diff_ == -1 -- trailing slash in path_ (the "/" case) + // diff_ == 0 -- no trailing slash + // + // Finally, to represent non-root ("/") trailing slashes we use positive + // diff_ values. In this case diff_ is interpreted as a 1-based index in + // the path_traits::directory_separators string. + // + // Notes: + // - If path_ is empty, then diff_ can only be 0. + // - We could have used a much narrower integer for diff_. + // + string_type path_; + difference_type diff_; - template <typename C> - struct any_path_kind - { - typedef path_data<C> base_type; - typedef basic_path<C, dir_path_kind<C>> dir_type; + size_type + _size () const {return path_.size () + (diff_ < 0 ? -1 : 0);} + + void + _swap (path_data& d) {path_.swap (d.path_); std::swap (diff_, d.diff_);} + + void + _clear () {path_.clear (); diff_ = 0;} + + // Constructors. + // + path_data (): diff_ (0) {} + + path_data (string_type&& p, difference_type d) + : path_ (std::move (p)), diff_ (path_.empty () ? 0 : d) {} + + explicit + path_data (string_type&& p) + : path_ (std::move (p)), diff_ (0) + { + size_type n (path_.size ()), i; + + if (n != 0 && (i = path_traits<C>::separator_index (path_[n - 1])) != 0) + { + if (n == 1) // The "/" case. + diff_ = -1; + else + { + diff_ = i; + path_.pop_back (); + } + } + } }; template <typename C> - struct dir_path_kind + struct any_path_kind { - typedef basic_path<C, any_path_kind<C>> base_type; - typedef basic_path<C, dir_path_kind<C>> dir_type; - }; + class base_type: protected path_data<C> // In essence protected path_data. + { + protected: + using path_data<C>::path_data; - typedef basic_path<char, any_path_kind<char>> path; - typedef basic_path<char, dir_path_kind<char>> dir_path; - typedef invalid_basic_path<char> invalid_path; + base_type () = default; + base_type (path_data<C>&& d): path_data<C> (std::move (d)) {} + }; - typedef basic_path<wchar_t, any_path_kind<wchar_t>> wpath; - typedef basic_path<wchar_t, dir_path_kind<wchar_t>> dir_wpath; - typedef invalid_basic_path<wchar_t> invalid_wpath; + using dir_type = basic_path<C, dir_path_kind<C>>; + + // Init and cast. + // + // If exact is true, return the path if the initialization was successful, + // that is, the passed string is a valid path and no modifications were + // necessary. Otherwise, return the empty object and leave the passed + // string untouched. + // + // If extact is false, throw invalid_path if the string is not a valid + // path (e.g., uses an unsupported path notation on Windows). + // + using data_type = path_data<C>; + using string_type = std::basic_string<C>; + + static data_type + init (string_type&&, bool exact = false); + + static void + cast (data_type&) {} + }; template <typename C> - class path_data + struct dir_path_kind { - public: - typedef std::basic_string<C> string_type; + using base_type = basic_path<C, any_path_kind<C>>; + using dir_type = basic_path<C, dir_path_kind<C>>; - path_data () = default; + // Init and cast. + // + using data_type = path_data<C>; + using string_type = std::basic_string<C>; - explicit - path_data (string_type s): path_ (std::move (s)) {} + static data_type + init (string_type&&, bool exact = false); - protected: - string_type path_; + static void + cast (data_type&); }; template <typename C, typename K> class basic_path: public K::base_type { public: - typedef std::basic_string<C> string_type; - typedef typename string_type::size_type size_type; - - typedef typename K::base_type base_type; - typedef typename K::dir_type dir_type; - - typedef path_traits<C> traits; + using string_type = std::basic_string<C>; + using size_type = typename string_type::size_type; + using difference_type = typename string_type::difference_type; + using traits = path_traits<C>; struct iterator; - typedef std::reverse_iterator<iterator> reverse_iterator; + using reverse_iterator = std::reverse_iterator<iterator>; - // Create a special empty path. Note that we have to provide our - // own implementation rather than using '=default' to make clang - // allow default-initialized const instances of this type. + using base_type = typename K::base_type; + using dir_type = typename K::dir_type; + + // Create a special empty path. Note that we have to provide our own + // implementation rather than using '=default' to make clang allow + // default-initialized const instances of this type. // - basic_path () {}; + basic_path () {} // Constructors that initialize a path from a string argument throw the // invalid_path exception if the string is not a valid path (e.g., uses // unsupported path notations on Windows). // explicit - basic_path (C const* s): base_type (s) {init (this->path_);} + basic_path (C const* s): base_type (K::init (s)) {} basic_path (C const* s, size_type n) - : base_type (string_type (s, n)) {init (this->path_);} + : base_type (K::init (string_type (s, n))) {} explicit - basic_path (string_type s): base_type (std::move (s)) {init (this->path_);} + basic_path (string_type s): base_type (K::init (std::move (s))) {} basic_path (const string_type& s, size_type n) - : base_type (string_type (s, 0, n)) {init (this->path_);} + : base_type (K::init (string_type (s, 0, n))) {} basic_path (const string_type& s, size_type p, size_type n) - : base_type (string_type (s, p, n)) {init (this->path_);} + : base_type (K::init (string_type (s, p, n))) {} // Create a path using the exact string representation. If the string is // not a valid path or if it would require a modification, then empty path // is created instead and the passed string rvalue-reference is left // untouched. Note that no exception is thrown if the path is invalid. See - // also string()&& below. + // also representation()&& below. // enum exact_type {exact}; basic_path (string_type&& s, exact_type) - { - if (init (s, true)) - this->path_ = std::move (s); - } + : base_type (K::init (std::move (s), true)) {} - // Create a path as a sub-path identified by the [begin, end) - // range of components. + // Create a path as a sub-path identified by the [begin, end) range of + // components. // basic_path (const iterator& begin, const iterator& end); @@ -340,13 +463,13 @@ namespace butl : basic_path (rend.base (), rbegin.base ()) {} void - swap (basic_path& p) {this->path_.swap (p.path_);} + swap (basic_path& p) {this->_swap (p);} void - clear () {this->path_.clear ();} + clear () {this->_clear ();} - // Get/set current working directory. Throw std::system_error - // to report the underlying OS errors. + // Get/set current working directory. Throw std::system_error to report + // the underlying OS errors. // static dir_type current () {return dir_type (traits::current ());} @@ -375,19 +498,22 @@ namespace butl static basic_path temp_path (const string_type& prefix) { - return temp_directory () / basic_path (traits::temp_name (prefix)); + return temp_directory () / traits::temp_name (prefix); } public: bool empty () const {return this->path_.empty ();} + // Note that size does not include the trailing separator except for + // the root case. + // size_type size () const {return this->path_.size ();} - // Return true if this path doesn't have any directories. Note - // that "/foo" is not a simple path (it is "foo" in root directory) - // while "/" is (it is the root directory). + // Return true if this path doesn't have any directories. Note that "/foo" + // is not a simple path (it is "foo" in root directory) while "/" is (it + // is the root directory). // bool simple () const; @@ -396,10 +522,7 @@ namespace butl absolute () const; bool - relative () const - { - return !absolute (); - } + relative () const {return !absolute ();} bool root () const; @@ -421,33 +544,35 @@ namespace butl sup (const basic_path&) const; public: - // Return the path without the directory part. + // Return the path without the directory part. Leaf of a directory is + // itself a directory (contains trailing slash). Leaf of a root is the + // path itself. // basic_path leaf () const; // Return the path without the specified directory part. Throws - // invalid_path if the directory is not a prefix of *this. Expects - // both paths to be normalized. + // invalid_path if the directory is not a prefix of *this. Expects both + // paths to be normalized. // basic_path leaf (basic_path const&) const; - // Return the directory part of the path or empty path if - // there is no directory. + // Return the directory part of the path or empty path if there is no + // directory. Directory of a root is an empty path. // dir_type directory () const; - // Return the directory part of the path without the specified - // leaf part. Throws invalid_path if the leaf is not a suffix of - // *this. Expects both paths to be normalized. + // Return the directory part of the path without the specified leaf part. + // Throws invalid_path if the leaf is not a suffix of *this. Expects both + // paths to be normalized. // dir_type directory (basic_path const&) const; - // Return the root directory of the path or empty path if - // the directory is not absolute. + // Return the root directory of the path or empty path if the directory is + // not absolute. // dir_type root_directory () const; @@ -476,25 +601,33 @@ namespace butl public: struct iterator { - typedef string_type value_type; - typedef string_type* pointer; - typedef string_type reference; - typedef std::ptrdiff_t difference_type; - typedef std::bidirectional_iterator_tag iterator_category; + using value_type = string_type ; + using pointer = string_type*; + using reference = string_type ; + using size_type = typename string_type::size_type; + using difference_type = std::ptrdiff_t ; + using iterator_category = std::bidirectional_iterator_tag ; - typedef typename string_type::size_type size_type; + using data_type = path_data<C>; iterator (): p_ (nullptr) {} - iterator (const string_type& p, size_type b, size_type e) - : p_ (&p), b_ (b), e_ (e) {} + iterator (const data_type* p, size_type b, size_type e) + : p_ (p), b_ (b), e_ (e) {} iterator& operator++ () { - b_ = e_; + const string_type& s (p_->path_); - if (b_ != string_type::npos) - e_ = traits::find_separator (*p_, ++b_); + // Position past trailing separator, if any. + // + b_ = e_ != string_type::npos && ++e_ != s.size () + ? e_ + : string_type::npos; + + // Find next trailing separator. + // + e_ = b_ != string_type::npos ? traits::find_separator (s, b_) : b_; return *this; } @@ -502,13 +635,21 @@ namespace butl iterator& operator-- () { - e_ = b_; - - b_ = e_ == string_type::npos // Last component? - ? traits::rfind_separator (*p_) - : (--e_ == 0 // First empty component? - ? string_type::npos - : traits::rfind_separator (*p_, e_ - 1)); + const string_type& s (p_->path_); + + // Find the new end. + // + e_ = b_ == string_type::npos // Past end? + ? (traits::is_separator (s.back ()) // Have trailing slash? + ? s.size () - 1 + : string_type::npos) + : b_ - 1; + + // Find the new begin. + // + b_ = e_ == 0 // Empty component? + ? string_type::npos + : traits::rfind_separator (s, e_ != string_type::npos ? e_ - 1 : e_); b_ = b_ == string_type::npos // First component? ? 0 @@ -523,9 +664,26 @@ namespace butl iterator operator-- (int) {iterator r (*this); operator-- (); return r;} - string_type operator* () const + string_type + operator* () const + { + return string_type (p_->path_, + b_, + e_ != string_type::npos ? e_ - b_ : e_); + } + + // Return the directory separator after this component or '\0' if there + // is none. This, for example, can be used to determine if the last + // component is a directory. + // + C + separator () const { - return string_type (*p_, b_, (e_ != string_type::npos ? e_ - b_ : e_)); + return e_ != string_type::npos + ? p_->path_[e_] + : (p_->diff_ > 0 + ? path_traits<C>::directory_separators[p_->diff_ - 1] + : 0); } pointer operator-> () const = delete; @@ -542,10 +700,11 @@ namespace butl private: friend class basic_path; - // b != npos && e == npos - last component + // b - first character of component + // e - separator after component (or npos if none) // b == npos && e == npos - one past last component (end) // - const string_type* p_; + const data_type* p_; size_type b_; size_type e_; }; @@ -558,15 +717,15 @@ namespace butl public: // Normalize the path. This includes collapsing the '.' and '..' - // directories if possible, collapsing multiple directory - // separators, and converting all directory separators to the - // canonical form. Return *this. + // directories if possible, collapsing multiple directory separators, and + // converting all directory separators to the canonical form. Return + // *this. // basic_path& normalize (); - // Make the path absolute using the current directory unless - // it is already absolute. Return *this. + // Make the path absolute using the current directory unless it is already + // absolute. Return *this. // basic_path& complete (); @@ -582,7 +741,7 @@ namespace butl basic_path& operator/= (basic_path const&); - // Append a single path component (must not contain directory separators) + // Combine a single path component (must not contain directory separators) // as a string, without first constructing the path object. // basic_path& @@ -591,47 +750,20 @@ namespace butl basic_path& operator/= (const C*); - basic_path - operator+ (string_type const& s) const - { - return basic_path (this->path_ + s); - } - - basic_path - operator+ (const C* s) const - { - return basic_path (this->path_ + s); - } - - basic_path - operator+ (C c) const - { - return basic_path (this->path_ + c); - } - + // Append to the end of the path (normally an extension, etc). + // basic_path& - operator+= (string_type const& s) - { - this->path_ += s; - return *this; - } + operator+= (string_type const&); basic_path& - operator+= (const C* s) - { - this->path_ += s; - return *this; - } + operator+= (const C*); basic_path& - operator+= (C c) - { - this->path_ += c; - return *this; - } + operator+= (C); - // Note that comparison is case-insensitive if the filesystem is - // not case-sensitive (e.g., Windows). + // Note that comparison is case-insensitive if the filesystem is not + // case-sensitive (e.g., Windows). And it ignored trailing slashes + // except for the root case. // template <typename K1> int @@ -639,56 +771,88 @@ namespace butl return traits::compare (this->path_, x.path_);} public: + // Path string and representation. The string does not contain the + // trailing slash except for the root case. In other words, it is the + // "traditional" spelling of the path that can be passed to system calls, + // etc. Representation, on the other hand is the "precise" spelling that + // includes the trailing slash, if any. One cannot always round-trip a + // path using string() but can using representation(). Note also that + // representation() returns a copy while string() returns a (tracking) + // reference. + // const string_type& string () const& {return this->path_;} - // Moves the underlying path string out of the path object. The - // path object becomes empty. Usage: std::move (p).string (). + string_type + representation () const&; + + // Moves the underlying path string out of the path object. The path + // object becomes empty. Usage: std::move (p).string (). // string_type string () && {string_type r; r.swap (this->path_); return r;} - // If possible, return a POSIX representation of the path. For example, - // for a Windows path in the form foo\bar this function will return - // foo/bar. If it is not possible to create a POSIX representation for - // this path (e.g., c:\foo), this function will throw the invalid_path - // exception. + string_type + representation () &&; + + // Trailing directory separator or '\0' if there is none. + // + C + separator () const; + + // As above but return it as a (potentially empty) string. + // + string_type + separator_string () const; + + // If possible, return a POSIX version of the path. For example, for a + // Windows path in the form foo\bar this function will return foo/bar. If + // it is not possible to create a POSIX version for this path (e.g., + // c:\foo), this function will throw the invalid_path exception. // string_type posix_string () const; + // Implementation details. + // protected: - basic_path (string_type s, bool i): base_type (std::move (s)) - { - if (i) - init (this->path_); - } + using data_type = path_data<C>; + + // Direct initialization without init()/cast(). + // + explicit + basic_path (data_type&& d): base_type (std::move (d)) {} + + using base_type::_size; - // Common implementation for operator=/(). + // Common implementation for operator/= and operator+=. // void - combine (const C*, size_type); + combine (const C*, size_type, difference_type); - private: - template <class P, class C1, class K1> - friend P butl::path_cast (const basic_path<C1, K1>&); + void + combine (const C*, size_type); - template <class P, class C1, class K1> - friend P butl::path_cast (basic_path<C1, K1>&&); + void + append (const C*, size_type); - // If exact is true, return whether the initialization was successful, - // that is, the passed string is a valid path and no modifications were - // necessary. Otherwise (extact is false), throw invalid_path if the - // string is not a valid path (e.g., uses an unsupported path notation on - // Windows). + // Friends. // - bool - init (string_type& s, bool exact = false); + template <class C1, class K1> + friend class basic_path; + + template <class C1, class K1, class K2> + friend basic_path<C1, K1> + path_cast_impl (const basic_path<C1, K2>&, basic_path<C1, K1>*); + + template <class C1, class K1, class K2> + friend basic_path<C1, K1> + path_cast_impl (basic_path<C1, K2>&&, basic_path<C1, K1>*); }; template <typename C, typename K> inline basic_path<C, K> - operator/ (basic_path<C, K> const& x, basic_path<C, K> const& y) + operator/ (const basic_path<C, K>& x, const basic_path<C, K>& y) { basic_path<C, K> r (x); r /= y; @@ -697,7 +861,7 @@ namespace butl template <typename C, typename K> inline basic_path<C, K> - operator/ (basic_path<C, K> const& x, std::basic_string<C> const& y) + operator/ (const basic_path<C, K>& x, const std::basic_string<C>& y) { basic_path<C, K> r (x); r /= y; @@ -706,13 +870,40 @@ namespace butl template <typename C, typename K> inline basic_path<C, K> - operator/ (basic_path<C, K> const& x, const C* y) + operator/ (const basic_path<C, K>& x, const C* y) { basic_path<C, K> r (x); r /= y; return r; } + template <typename C, typename K> + inline basic_path<C, K> + operator+ (const basic_path<C, K>& x, const std::basic_string<C>& y) + { + basic_path<C, K> r (x); + r += y; + return r; + } + + template <typename C, typename K> + inline basic_path<C, K> + operator+ (const basic_path<C, K>& x, const C* y) + { + basic_path<C, K> r (x); + r += y; + return r; + } + + template <typename C, typename K> + inline basic_path<C, K> + operator+ (const basic_path<C, K>& x, C y) + { + basic_path<C, K> r (x); + r += y; + return r; + } + template <typename C, typename K1, typename K2> inline bool operator== (const basic_path<C, K1>& x, const basic_path<C, K2>& y) diff --git a/butl/path.cxx b/butl/path.cxx index 46e36fd..9325c1e 100644 --- a/butl/path.cxx +++ b/butl/path.cxx @@ -60,8 +60,6 @@ namespace butl LIBBUTL_EXPORT path_traits<char>::string_type path_traits<char>:: current () { - // @@ throw system_error (and in the other current() versions). - #ifdef _WIN32 char cwd[_MAX_PATH]; if (_getcwd (cwd, _MAX_PATH) == 0) @@ -72,7 +70,7 @@ namespace butl throw system_error (errno, system_category ()); #endif - return string_type (cwd); + return cwd; } template <> @@ -147,17 +145,15 @@ namespace butl { #ifdef _WIN32 char d[_MAX_PATH + 1]; - DWORD r (GetTempPathA (_MAX_PATH + 1, d)); - - if (r == 0) + if (GetTempPathA (_MAX_PATH + 1, d) == 0) { string e (last_error_msg ()); throw system_error (ENOTDIR, system_category (), e); } - return string_type (d); + return d; #else - return string_type (butl::temp_directory ()); + return butl::temp_directory (); #endif } @@ -177,12 +173,12 @@ namespace butl home () { #ifndef _WIN32 - return string_type (butl::home ()); + return butl::home (); #else // Could be set by, e.g., MSYS and Cygwin shells. // if (const char* h = getenv ("HOME")) - return string_type (h); + return h; char h[_MAX_PATH]; HRESULT r (SHGetFolderPathA (NULL, CSIDL_PROFILE, NULL, 0, h)); @@ -193,7 +189,7 @@ namespace butl throw system_error (ENOTDIR, system_category (), e); } - return string_type (h); + return h; #endif } @@ -240,7 +236,7 @@ namespace butl throw system_error (EINVAL, system_category ()); #endif - return string_type (wcwd); + return wcwd; } template <> @@ -269,9 +265,7 @@ namespace butl { #ifdef _WIN32 wchar_t d[_MAX_PATH + 1]; - DWORD r (GetTempPathW (_MAX_PATH + 1, d)); - - if (r == 0) + if (GetTempPathW (_MAX_PATH + 1, d) == 0) { string e (last_error_msg ()); throw system_error (ENOTDIR, system_category (), e); @@ -293,7 +287,7 @@ namespace butl throw system_error (ENOTSUP, system_category ()); #endif - return string_type (d); + return d; } template <> @@ -319,12 +313,12 @@ namespace butl if (r == PATH_MAX) throw system_error (ENOTSUP, system_category ()); - return string_type (d); + return d; #else // Could be set by, e.g., MSYS and Cygwin shells. // if (const wchar_t* h = _wgetenv (L"HOME")) - return string_type (h); + return h; wchar_t h[_MAX_PATH]; HRESULT r (SHGetFolderPathW (NULL, CSIDL_PROFILE, NULL, 0, h)); @@ -335,7 +329,7 @@ namespace butl throw system_error (ENOTDIR, system_category (), e); } - return string_type (h); + return h; #endif } diff --git a/butl/path.ixx b/butl/path.ixx index 48d6576..3d1f20c 100644 --- a/butl/path.ixx +++ b/butl/path.ixx @@ -25,41 +25,57 @@ namespace butl } #endif - // @@ Should only enable_if P is basic_path<C, K1>. - // + template <class C, class K1, class K2> + inline basic_path<C, K1> + path_cast_impl (const basic_path<C, K2>& p, basic_path<C, K1>*) + { + typename basic_path<C, K1>::data_type d ( + typename basic_path<C, K1>::string_type (p.path_), p.diff_); + K1::cast (d); + return basic_path<C, K1> (std::move (d)); + } + + template <class C, class K1, class K2> + inline basic_path<C, K1> + path_cast_impl (basic_path<C, K2>&& p, basic_path<C, K1>*) + { + typename basic_path<C, K1>::data_type d (std::move (p.path_), p.diff_); + K1::cast (d); + return basic_path<C, K1> (std::move (d)); + } + template <class P, class C, class K> inline P path_cast (const basic_path<C, K>& p) { - return P (p.path_, false); + return path_cast_impl (p, static_cast<P*> (nullptr)); } template <class P, class C, class K> inline P path_cast (basic_path<C, K>&& p) { - return P (std::move (p.path_), false); + return path_cast_impl (std::move (p), static_cast<P*> (nullptr)); } template <typename C, typename K> inline bool basic_path<C, K>:: simple () const { - return -#ifndef _WIN32 - root () || -#endif - traits::find_separator (this->path_) == string_type::npos; + return empty () || + traits::rfind_separator (this->path_, _size () - 1) == string_type::npos; } template <typename C, typename K> inline bool basic_path<C, K>:: absolute () const { + const string_type& s (this->path_); + #ifdef _WIN32 - return this->path_.size () > 1 && this->path_[1] == ':'; + return s.size () > 1 && s[1] == ':'; #else - return !this->path_.empty () && traits::is_separator (this->path_[0]); + return s.size () != 0 && traits::is_separator (s[0]); #endif } @@ -67,10 +83,12 @@ namespace butl inline bool basic_path<C, K>:: root () const { + const string_type& s (this->path_); + #ifdef _WIN32 - return this->path_.size () == 2 && this->path_[1] == ':'; + return s.size () == 2 && s[1] == ':'; #else - return this->path_.size () == 1 && traits::is_separator (this->path_[0]); + return s.size () == 1 && traits::is_separator (s[0]); #endif } @@ -78,95 +96,116 @@ namespace butl inline bool basic_path<C, K>:: sub (const basic_path& p) const { - size_type n (p.path_.size ()); + // The thinking here is that we can use the full string representations + // (including the trailing slash in "/"). + // + const string_type& ps (p.path_); + size_type pn (ps.size ()); - if (n == 0) + if (pn == 0) return true; - size_type m (this->path_.size ()); + const string_type& s (this->path_); + size_type n (s.size ()); // The second condition guards against the /foo-bar vs /foo case. // - return m >= n && - traits::compare (this->path_.c_str (), n, p.path_.c_str (), n) == 0 && - (traits::is_separator (p.path_.back ()) || // p ends with a separator - m == n || // *this == p - traits::is_separator (this->path_[n])); // next char is a separator + return n >= pn && + traits::compare (s.c_str (), pn, ps.c_str (), pn) == 0 && + (traits::is_separator (ps.back ()) || // p ends with a separator + n == pn || // *this == p + traits::is_separator (s[pn])); // next char is a separator } template <typename C, typename K> inline bool basic_path<C, K>:: sup (const basic_path& p) const { - size_type n (p.path_.size ()); + // The thinking here is that we can use the full string representations + // (including the trailing slash in "/"). + // + const string_type& ps (p.path_); + size_type pn (ps.size ()); - if (n == 0) + if (pn == 0) return true; - size_type m (this->path_.size ()); + const string_type& s (this->path_); + size_type n (s.size ()); // The second condition guards against the /foo-bar vs bar case. // - return m >= n && - traits::compare ( - this->path_.c_str () + m - n, n, p.path_.c_str (), n) == 0 && - (m == n || // *this == p - traits::is_separator (this->path_[m - n - 1])); // prev char separator + return n >= pn && + traits::compare (s.c_str () + n - pn, pn, ps.c_str (), pn) == 0 && + (n == pn || // *this == p + traits::is_separator (s[n - pn - 1])); // previous char is a separator + } + + template <typename C, typename K> + inline basic_path<C, K> basic_path<C, K>:: + leaf () const + { + const string_type& s (this->path_); + size_type n (_size ()); + + size_type p (n != 0 + ? traits::rfind_separator (s, n - 1) + : string_type::npos); + + return p != string_type::npos + ? basic_path (data_type (string_type (s, p + 1), this->diff_)) + : *this; + } + + template <typename C, typename K> + inline typename basic_path<C, K>::dir_type basic_path<C, K>:: + directory () const + { + const string_type& s (this->path_); + size_type n (_size ()); + + size_type p (n != 0 + ? traits::rfind_separator (s, n - 1) + : string_type::npos); + + return p != string_type::npos + ? dir_type (data_type (string_type (s, 0, p + 1))) // Include slash. + : dir_type (); } template <typename C, typename K> inline auto basic_path<C, K>:: begin () const -> iterator { - size_type b, e; + const string_type& s (this->path_); - if (this->path_.empty ()) - b = e = string_type::npos; + size_type b (s.empty () ? string_type::npos : 0); + size_type e (b == 0 ? traits::find_separator (s) : b); -#ifndef _WIN32 - else if (root ()) - { - // We want to return a single empty component. Here we return - // the begin position one past the end. Not sure if this legal. - // - b = 1; - e = string_type::npos; - } -#endif - else - { - b = 0; - e = traits::find_separator (this->path_); - } - - return iterator (this->path_, b, e); + return iterator (this, b, e); } template <typename C, typename K> inline auto basic_path<C, K>:: end () const -> iterator { - return iterator (this->path_, string_type::npos, string_type::npos); + return iterator (this, string_type::npos, string_type::npos); } template <typename C, typename K> inline basic_path<C, K>:: basic_path (const iterator& b, const iterator& e) + : base_type ( + b == e + ? data_type () + // We need to include the trailing separator but it is implied if + // e == end(). + // + : (e.b_ != string_type::npos + ? data_type (string_type (b.p_->path_, b.b_, e.b_ - b.b_)) + : data_type (string_type (b.p_->path_, b.b_), b.p_->diff_))) { //assert (b.p_ == e.p_); - - if (b != e) - { - this->path_.assign ( - *b.p_, b.b_, (e.b_ != string_type::npos ? e.b_ - b.b_ - 1 : e.b_)); - -#ifndef _WIN32 - if (this->path_.empty ()) - this->path_ = '/'; -#endif - - // No init() should be necessary. - } } template <typename C, typename K> @@ -187,7 +226,7 @@ namespace butl complete (); normalize (); #else - traits::realize (this->path_); + traits::realize (this->path_); // Note: we retail trailing slash. #endif return *this; } @@ -196,24 +235,34 @@ namespace butl inline typename basic_path<C, K>::dir_type basic_path<C, K>:: root_directory () const { - return absolute () #ifdef _WIN32 - // Disambiguate with dir_type(string_type, bool). - // - ? dir_type (this->path_, static_cast<size_type> (2)) + // Note: on Windows we may have "c:" but still need to return "c:\". + // + const string_type& s (this->path_); + + return absolute () + ? dir_type ( + s.size () > 2 + ? data_type (string_type (s, 0, 3)) + : data_type (string_type (s), this->diff_ != 0 ? this->diff_ : 1)) + : dir_type (); #else - ? dir_type ("/") -#endif + return absolute () + ? dir_type (data_type ("/", -1)) : dir_type (); +#endif + } template <typename C, typename K> inline basic_path<C, K> basic_path<C, K>:: base () const { - size_type p (traits::find_extension (this->path_)); + const string_type& s (this->path_); + size_type p (traits::find_extension (s)); + return p != string_type::npos - ? basic_path (this->path_.c_str (), p) + ? basic_path (data_type (string_type (s, 0, p), this->diff_)) : *this; } @@ -221,8 +270,9 @@ namespace butl inline const C* basic_path<C, K>:: extension () const { - size_type p (traits::find_extension (this->path_)); - return p != string_type::npos ? this->path_.c_str () + p + 1 : nullptr; + const string_type& s (this->path_); + size_type p (traits::find_extension (s)); + return p != string_type::npos ? s.c_str () + p + 1 : nullptr; } #ifndef _WIN32 @@ -236,16 +286,160 @@ namespace butl template <typename C, typename K> inline void basic_path<C, K>:: - combine (const C* r, size_type rn) + combine (const C* r, size_type rn, difference_type rd) { - size_type ln (this->path_.size ()); + //assert (rn != 0); - if (ln != 0 && rn != 0) + string_type& l (this->path_); + difference_type& d (this->diff_); + + // Handle the separator. LHS should be empty or already have one. + // + switch (d) { - if (!traits::is_separator (this->path_[ln - 1])) - this->path_ += traits::directory_separator; + case 0: if (!l.empty ()) throw invalid_basic_path<C> (l); break; + case -1: break; // Already in the string. + default: l += path_traits<C>::directory_separators[d - 1]; } + l.append (r, rn); + d = rd; // New trailing separator from RHS. + } + + template <typename C, typename K> + inline void basic_path<C, K>:: + combine (const C* r, size_type rn) + { + // If we do (dir_path / path) then we will end up with path. What should + // we end up if we do (dir_path / "foo") vs (dir_path / "foo/")? We cannot + // choose at runtime what kind of path to return. One (elaborate) option + // would be to handle the trailing slash but also call K::cast() so that + // dir_path gets the canonical trailing slash if one wasn't there. + // + // For now we won't allow the slash and will always add the canonical one + // for dir_path (via cast()). + // + if (traits::find_separator (r, rn) != nullptr) + throw invalid_basic_path<C> (r); + + combine (r, rn, 0); + K::cast (*this); + } + + template <typename C, typename K> + inline basic_path<C, K>& basic_path<C, K>:: + operator/= (basic_path<C, K> const& r) + { + if (r.absolute () && !empty ()) // Allow ('' / '/foo'). + throw invalid_basic_path<C> (r.path_); + + if (!r.empty ()) + combine (r.path_.c_str (), r.path_.size (), r.diff_); + + return *this; + } + + template <typename C, typename K> + inline basic_path<C, K>& basic_path<C, K>:: + operator/= (string_type const& r) + { + if (size_type rn = r.size ()) + combine (r.c_str (), rn); + + return *this; + } + + template <typename C, typename K> + inline basic_path<C, K>& basic_path<C, K>:: + operator/= (const C* r) + { + if (size_type rn = string_type::traits_type::length (r)) + combine (r, rn); + + return *this; + } + + template <typename C, typename K> + inline void basic_path<C, K>:: + append (const C* r, size_type rn) + { + //assert (this->diff_ != -1); // Append to root? this->path_.append (r, rn); } + + template <typename C, typename K> + inline basic_path<C, K>& basic_path<C, K>:: + operator+= (string_type const& s) + { + append (s.c_str (), s.size ()); + return *this; + } + + template <typename C, typename K> + inline basic_path<C, K>& basic_path<C, K>:: + operator+= (const C* s) + { + append (s, string_type::traits_type::length (s)); + return *this; + } + + template <typename C, typename K> + inline basic_path<C, K>& basic_path<C, K>:: + operator+= (C c) + { + append (&c, 1); + return *this; + } + + template <typename C, typename K> + inline auto basic_path<C, K>:: + representation () const& -> string_type + { + string_type r (this->path_); + + if (this->diff_ > 0) + r += path_traits<C>::directory_separators[this->diff_ - 1]; + + return r; + } + + template <typename C, typename K> + inline auto basic_path<C, K>:: + representation () && -> string_type + { + string_type r; + r.swap (this->path_); + + if (this->diff_ > 0) + r += path_traits<C>::directory_separators[this->diff_ - 1]; + + return r; + } + + template <typename C, typename K> + inline C basic_path<C, K>:: + separator () const + { + return (this->diff_ == 0 ? 0 : + this->diff_ == -1 ? this->path_[0] : + path_traits<C>::directory_separators[this->diff_ - 1]); + } + + template <typename C, typename K> + inline auto basic_path<C, K>:: + separator_string () const -> string_type + { + C c (separator ()); + return c == 0 ? string_type () : string_type (1, c); + } + + template <typename C> + inline void dir_path_kind<C>:: + cast (data_type& d) + { + // Add trailing slash if one isn't already there. + // + if (!d.path_.empty () && d.diff_ == 0) + d.diff_ = 1; // Canonical separator is always first. + } } diff --git a/butl/path.txx b/butl/path.txx index 94fbd90..1d6995e 100644 --- a/butl/path.txx +++ b/butl/path.txx @@ -8,30 +8,51 @@ namespace butl { template <typename C, typename K> basic_path<C, K> basic_path<C, K>:: - leaf () const + leaf (basic_path<C, K> const& d) const { - size_type p (traits::rfind_separator (this->path_)); + size_type dn (d.path_.size ()); + + if (dn == 0) + return *this; + + const string_type& s (this->path_); - return p != string_type::npos - ? basic_path (this->path_.c_str () + p + 1, this->path_.size () - p - 1) - : *this; + if (!sub (d)) + throw invalid_basic_path<C> (s); + + // If there is implied trailing slash, add it to count. Unless it is + // "matched" by the implied slash on the other side. + // + if (d.diff_ > 0 && dn < s.size ()) + dn++; + + // Preserve trailing slash. + // + return basic_path (data_type (string_type (s, dn, s.size () - dn), + this->diff_)); } template <typename C, typename K> typename basic_path<C, K>::dir_type basic_path<C, K>:: - directory () const + directory (basic_path<C, K> const& l) const { - if (root ()) - return dir_type (); + size_type ln (l.path_.size ()); - size_type p (traits::rfind_separator (this->path_)); + const string_type& s (this->path_); - // Include the trailing slash so that we get correct behavior - // if directory is root. - // - return p != string_type::npos - ? dir_type (this->path_.c_str (), p + 1) - : dir_type (); + if (ln == 0) + { + if (this->diff_ == 0) // Must be a directory. + throw invalid_basic_path<C> (s); + + return dir_type (data_type (string_type (s), this->diff_)); + } + + if (!sup (l)) + throw invalid_basic_path<C> (s); + + return dir_type ( + data_type (string_type (s, 0, s.size () - ln))); // Include slash. } #ifdef _WIN32 @@ -55,85 +76,6 @@ namespace butl #endif template <typename C, typename K> - basic_path<C, K>& basic_path<C, K>:: - operator/= (basic_path<C, K> const& r) - { - if (r.absolute () && !this->path_.empty ()) // Allow ('' / '/foo'). - throw invalid_basic_path<C> (r.path_); - - combine (r.path_.c_str (), r.path_.size ()); - return *this; - } - - template <typename C, typename K> - basic_path<C, K>& basic_path<C, K>:: - operator/= (string_type const& r) - { - if (traits::find_separator (r) != string_type::npos) - throw invalid_basic_path<C> (r); - - combine (r.c_str (), r.size ()); - return *this; - } - - template <typename C, typename K> - basic_path<C, K>& basic_path<C, K>:: - operator/= (const C* r) - { - size_type rn (string_type::traits_type::length (r)); - - if (traits::find_separator (r, rn) != nullptr) - throw invalid_basic_path<C> (r); - - combine (r, rn); - return *this; - } - - template <typename C, typename K> - basic_path<C, K> basic_path<C, K>:: - leaf (basic_path<C, K> const& d) const - { - size_type n (d.path_.size ()); - - if (n == 0) - return *this; - - if (!sub (d)) - throw invalid_basic_path<C> (this->path_); - - size_type m (this->path_.size ()); - - if (n != m -#ifndef _WIN32 - && !d.root () -#endif - ) - n++; // Skip the directory separator (unless it is POSIX root). - - return basic_path (this->path_.c_str () + n, m - n); - } - - template <typename C, typename K> - typename basic_path<C, K>::dir_type basic_path<C, K>:: - directory (basic_path<C, K> const& l) const - { - size_type n (l.path_.size ()); - - if (n == 0) - return dir_type (this->path_); - - if (!sup (l)) - throw invalid_basic_path<C> (this->path_); - - size_type m (this->path_.size ()); - - if (n != m) - n++; // Skip the directory separator. - - return dir_type (this->path_.c_str (), m - n); - } - - template <typename C, typename K> basic_path<C, K> basic_path<C, K>:: relative (basic_path<C, K> d) const { @@ -144,7 +86,7 @@ namespace butl if (sub (d)) break; - r /= basic_path (".."); + r /= basic_path ("../"); // Roots of the paths do not match. // @@ -162,40 +104,61 @@ namespace butl if (empty ()) return *this; + string_type& s (this->path_); + difference_type& d (this->diff_); + bool abs (absolute ()); typedef std::vector<string_type> paths; paths ps; - for (size_type b (0), e (traits::find_separator (this->path_)), - n (this->path_.size ());; - e = traits::find_separator (this->path_, b)) + bool tsep (d != 0); // Trailing directory separator. { - string_type s (this->path_, b, e == string_type::npos ? e : e - b); - ps.push_back (s); + size_type n (_size ()); - if (e == string_type::npos) - break; + for (size_type b (0), e (traits::find_separator (s, 0, n)); + ; + e = traits::find_separator (s, b, n)) + { + ps.push_back ( + string_type (s, b, (e == string_type::npos ? n : e) - b)); - ++e; + if (e == string_type::npos) + break; - while (e < n && traits::is_separator (this->path_[e])) ++e; - if (e == n) - break; + // Skip consecutive directory separators. + // + while (e != n && traits::is_separator (s[e])) + ++e; + + if (e == n) + break; - b = e; + b = e; + } + + // If the last component is "." or ".." then this is a directory. + // + if (!tsep) + { + const string_type& l (ps.back ()); + size_type ln (l.size ()); + + if ((ln == 1 && l[0] == '.') || + (ln == 2 && l[0] == '.' && l[1] == '.')) + tsep = true; + } } - // First collapse '.' and '..'. + // Collapse "." and "..". // paths r; - for (typename paths::const_iterator i (ps.begin ()), e (ps.end ()); - i != e; ++i) + for (typename paths::iterator i (ps.begin ()), e (ps.end ()); i != e; ++i) { - string_type const& s (*i); + string_type& s (*i); size_type n (s.size ()); if (n == 1 && s[0] == '.') @@ -222,7 +185,7 @@ namespace butl } } - r.push_back (s); + r.push_back (std::move (s)); } // Reassemble the path. @@ -238,10 +201,20 @@ namespace butl p += traits::directory_separator; } - if (p.empty () && !r.empty ()) - p += traits::directory_separator; // Root directory. + if (tsep && (!p.empty () || abs)) // Distinguish "/"-empty and "."-empty. + { + if (p.empty ()) + { + p += traits::directory_separator; + d = -1; + } + else + d = 1; // Canonical separator is always first. + } + else + d = 0; - this->path_.swap (p); + s.swap (p); return *this; } @@ -257,10 +230,13 @@ namespace butl traits::current (s); } - template <typename C, typename K> - bool basic_path<C, K>:: - init (string_type& s, bool exact) + template <typename C> + auto any_path_kind<C>:: + init (string_type&& s, bool exact) -> data_type { + using size_type = typename string_type::size_type; + using difference_type = typename string_type::difference_type; + size_type n (s.size ()); #ifdef _WIN32 @@ -272,25 +248,58 @@ namespace butl (n > 1 && s[0] == '\\' && s[1] == '\\')) { if (exact) - return false; + return data_type (); else throw invalid_basic_path<C> (s); } #endif - // Strip trailing slashes except for the case where the single slash - // represents the root directory. + // Strip trailing slashes. // - for (; n > 1 && traits::is_separator (s[n - 1]); --n) ; + size_type m (n), di (0); + for (size_type i; + m != 0 && (i = path_traits<C>::separator_index (s[m - 1])) != 0; + --m) di = i; - if (n != s.size ()) + difference_type d (0); + if (size_t k = n - m) { - if (!exact) - this->path_.resize (n); + // We can only accomodate one trailing slash in the exact mode. + // + if (exact && k > 1) + return data_type (); - return !exact; + if (m == 0) // The "/" case. + { + ++m; // Keep one slash in the string. + d = -1; + } + else + d = di; + + s.resize (m); } - return true; + return data_type (std::move (s), d); + } + + template <typename C> + auto dir_path_kind<C>:: + init (string_type&& s, bool exact) -> data_type + { + // If we don't already have the separator then this can't be the exact + // initialization. + // + if (exact && !s.empty () && !path_traits<C>::is_separator (s.back ())) + return data_type (); + + data_type r (any_path_kind<C>::init (std::move (s), exact)); + + // Unless the result is empty, make sure we have the trailing slash. + // + if (!r.path_.empty () && r.diff_ == 0) + r.diff_ = 1; // Canonical separator is always first. + + return r; } } diff --git a/butl/process.cxx b/butl/process.cxx index 9bb0ea2..5c9a0f0 100644 --- a/butl/process.cxx +++ b/butl/process.cxx @@ -281,13 +281,13 @@ namespace butl for (size_t b (0), e (paths.find (traits::path_separator)); b != string::npos;) { - path p (string (paths, b, e != string::npos ? e - b : e)); + dir_path p (string (paths, b, e != string::npos ? e - b : e)); // Empty path (i.e., a double colon or a colon at the beginning or end // of PATH) means search in the current dirrectory. // if (p.empty ()) - p = path ("."); + p = dir_path ("."); path dp (p / f); |