From e6cee3c2f9b03852ed4837f9be05e0a2fa4542a8 Mon Sep 17 00:00:00 2001 From: Karen Arutyunov Date: Mon, 30 Sep 2019 13:48:28 +0300 Subject: Move path match to path-pattern.?xx --- libbutl/filesystem.mxx | 214 ++----------------------------------------------- 1 file changed, 7 insertions(+), 207 deletions(-) (limited to 'libbutl/filesystem.mxx') diff --git a/libbutl/filesystem.mxx b/libbutl/filesystem.mxx index 261b985..43c23b7 100644 --- a/libbutl/filesystem.mxx +++ b/libbutl/filesystem.mxx @@ -23,14 +23,12 @@ using mode_t = int; #endif -#include - #ifndef __cpp_lib_modules_ts #include -#include // ptrdiff_t, size_t +#include // ptrdiff_t #include // uint16_t, etc #include // move(), pair -#include +#include // input_iterator_tag #include #include //@@ MOD needed by timestamp module (no re-export). @@ -45,14 +43,14 @@ import std.core; #endif import butl.path; -import butl.optional; import butl.timestamp; +import butl.path_pattern; // path_match_flags -import butl.utility; +import butl.utility; // operator<<(ostream,exception), throw_generic_error() #else #include -#include #include +#include #include #endif @@ -717,79 +715,9 @@ LIBBUTL_MODEXPORT namespace butl inline dir_iterator begin (dir_iterator&&); #endif - // Wildcard pattern match and search (aka glob). - // - // The wildcard pattern contains the literal characters that match - // themselves and the wildcard characters that match a single or multiple - // characters. Currently the following wildcards are supported: - // - // * - match any number of characters (including zero) - // ? - match any single character - // [...] - match a character with a "bracket expression"; currently we only - // support literal characters and ranges (no character/equivalence - // classes, etc; see Pattern Matching Notation section of the Shell - // Command Language POSIX specification for details) - // - // Note also that currently we don't support the special characters - // backslash-escaping (as mandated by POSIX). - - // Path match/search flags. - // - enum class path_match_flags: std::uint16_t - { - // Follow symlinks. This only applies to symlinks that are matched against - // the rightmost component of the pattern. In particular, this mean that - // such symlinks will never match a directory pattern and some results can - // be missing for the recursive rightmost component. - // - follow_symlinks = 0x1, - - // Make wildcard-only pattern component (e.g., `*/...`, `.../*/...`, or - // `.../*`) match absent path component. For example, with this flag - // set, the `a/*/b` pattern matches not only `a/x/b` path, but also `a/b`. - // - // Note that this does not apply to single-component patterns and the - // pattern type is always preserved. In particular, the `a/*/` pattern - // matches `a/` but not `a`. - // - // Finally, keep in mind that only absent directory components can be - // matched this way. In particular, pattern `a*/*` does not match `ab` - // (but `a*/*/` matches `ab/`). - // - match_absent = 0x2, - - none = 0 - }; - - inline path_match_flags operator& (path_match_flags, path_match_flags); - inline path_match_flags operator| (path_match_flags, path_match_flags); - inline path_match_flags operator&= (path_match_flags&, path_match_flags); - inline path_match_flags operator|= (path_match_flags&, path_match_flags); - - // Return true if name matches pattern. Both must be single path components, - // possibly with a trailing directory separator to indicate a directory. - // - // If the pattern ends with a directory separator, then it only matches a - // directory name (i.e., ends with a directory separator, but potentially - // different). Otherwise, it only matches a non-directory name (no trailing - // directory separator). - // - LIBBUTL_SYMEXPORT bool - path_match (const std::string& name, const std::string& pattern); - - // Return true if path entry matches pattern. Note that the match is - // performed literally, with no paths normalization being performed. The - // start directory is used if the first pattern component is a self-matching - // wildcard (see below for the start directory and wildcard semantics). + // Wildcard pattern search (aka glob). // - // In addition to the wildcard characters, it also recognizes the ** and *** - // wildcard sequences (see path_search() for details). - // - LIBBUTL_SYMEXPORT bool - path_match (const path& entry, - const path& pattern, - const dir_path& start = dir_path (), - path_match_flags = path_match_flags::none); + // For details on the wildcard patterns see // Search for paths matching the pattern calling the specified function for // each matching path (see below for details). @@ -883,134 +811,6 @@ LIBBUTL_MODEXPORT namespace butl bool interm)>&, const dir_path& start = dir_path (), path_match_flags = path_match_flags::none); - - // Return true if a name contains the wildcard characters. - // - bool - path_pattern (const std::string&); - - // Return true if a name contains the ** wildcard sequences. - // - bool - path_pattern_recursive (const std::string&); - - // Return true if a name contains the *** wildcard sequences. - // - bool - path_pattern_self_matching (const std::string&); - - // Return true if a path contains the pattern components. - // - bool - path_pattern (const path&); - - // Return the number of recursive pattern components. - // - // Knowing the number of such components allows us to make some assumptions - // regarding the search result. For example, if it is zero or one, then the - // result contains no duplicates. - // - // Also note that the result can be used as bool. - // - size_t - path_pattern_recursive (const path&); - - // Return true if the path is not empty and its first component is a self- - // matching pattern. - // - bool - path_pattern_self_matching (const path&); - - // Iteration over pattern terminals. - // - enum class path_pattern_term_type - { - literal, // Literal character. - question, // Question mark wildcard. - star, // Star wildcard. - bracket // Bracket expression wildcard. - }; - - class path_pattern_term - { - public: - path_pattern_term_type type; - std::string::const_iterator begin; - std::string::const_iterator end; - - std::size_t - size () const {return end - begin;} - - // Predicates. - // - bool literal () const {return type == path_pattern_term_type::literal;} - bool question () const {return type == path_pattern_term_type::question;} - bool star () const {return type == path_pattern_term_type::star;} - bool bracket () const {return type == path_pattern_term_type::bracket;} - }; - - // Return the literal terminal character. - // - char - get_literal (const path_pattern_term&); - - // Match a character against the bracket expression terminal. - // - LIBBUTL_SYMEXPORT bool - match_bracket (char, const path_pattern_term&); - - class LIBBUTL_SYMEXPORT path_pattern_iterator - { - public: - using value_type = path_pattern_term; - using pointer = const path_pattern_term*; - using reference = const path_pattern_term&; - using difference_type = std::ptrdiff_t; - using iterator_category = std::input_iterator_tag; - - explicit - path_pattern_iterator (const std::string&); - - path_pattern_iterator (std::string::const_iterator begin, - std::string::const_iterator end); - - path_pattern_iterator () = default; // Create the end iterator. - - path_pattern_iterator& operator++ () {assert (t_); next (); return *this;} - - reference operator* () const {assert (t_); return *t_;} - pointer operator-> () const {assert (t_); return &*t_;} - - friend bool - operator== (const path_pattern_iterator&, const path_pattern_iterator&); - - friend bool - operator!= (const path_pattern_iterator&, const path_pattern_iterator&); - - private: - void - next (); - - private: - // nullopt denotes the end iterator. - // - // Note that the default-constructed i_ and e_ iterators (having singular - // values) may not represent the end iterator as are not comparable for - // equality. That's why we use an absent term to represent such an - // iterator. - // - optional t_; - - std::string::const_iterator i_; - std::string::const_iterator e_; - }; - - // Range-based for loop support. - // - // for (const path_pattern_term& t: path_pattern_iterator (pattern)) ... - // - path_pattern_iterator begin (const path_pattern_iterator&); - path_pattern_iterator end (const path_pattern_iterator&); } #include -- cgit v1.1