From 61377c582e0f2675baa5f5e6e30a35d1a4164b33 Mon Sep 17 00:00:00 2001 From: Karen Arutyunov Date: Mon, 1 May 2017 16:08:43 +0300 Subject: Add hxx extension for headers and lib prefix for library dir --- libbutl/.gitignore | 1 + libbutl/base64.cxx | 211 ++++++ libbutl/base64.hxx | 50 ++ libbutl/buildfile | 93 +++ libbutl/char-scanner.cxx | 91 +++ libbutl/char-scanner.hxx | 90 +++ libbutl/const-ptr.hxx | 80 +++ libbutl/curl.cxx | 166 +++++ libbutl/curl.hxx | 173 +++++ libbutl/curl.ixx | 29 + libbutl/curl.txx | 99 +++ libbutl/diagnostics.cxx | 80 +++ libbutl/diagnostics.hxx | 226 ++++++ libbutl/export.hxx | 41 ++ libbutl/fdstream.cxx | 1085 +++++++++++++++++++++++++++++ libbutl/fdstream.hxx | 643 +++++++++++++++++ libbutl/fdstream.ixx | 266 ++++++++ libbutl/filesystem.cxx | 1396 +++++++++++++++++++++++++++++++++++++ libbutl/filesystem.hxx | 567 +++++++++++++++ libbutl/filesystem.ixx | 144 ++++ libbutl/ft/exception.hxx | 39 ++ libbutl/ft/lang.hxx | 29 + libbutl/ft/shared_mutex.hxx | 58 ++ libbutl/manifest-forward.hxx | 15 + libbutl/manifest-parser.cxx | 387 +++++++++++ libbutl/manifest-parser.hxx | 94 +++ libbutl/manifest-serializer.cxx | 238 +++++++ libbutl/manifest-serializer.hxx | 75 ++ libbutl/multi-index.hxx | 59 ++ libbutl/optional.hxx | 96 +++ libbutl/pager.cxx | 207 ++++++ libbutl/pager.hxx | 88 +++ libbutl/path-io.hxx | 29 + libbutl/path-map.hxx | 120 ++++ libbutl/path.cxx | 395 +++++++++++ libbutl/path.hxx | 1136 ++++++++++++++++++++++++++++++ libbutl/path.ixx | 508 ++++++++++++++ libbutl/path.txx | 377 ++++++++++ libbutl/prefix-map.hxx | 138 ++++ libbutl/prefix-map.txx | 39 ++ libbutl/process-details.hxx | 49 ++ libbutl/process-io.hxx | 28 + libbutl/process-run.cxx | 32 + libbutl/process-run.txx | 226 ++++++ libbutl/process.cxx | 1440 +++++++++++++++++++++++++++++++++++++++ libbutl/process.hxx | 569 ++++++++++++++++ libbutl/process.ixx | 207 ++++++ libbutl/sendmail.cxx | 40 ++ libbutl/sendmail.hxx | 91 +++ libbutl/sendmail.ixx | 68 ++ libbutl/sha256.cxx | 142 ++++ libbutl/sha256.hxx | 101 +++ libbutl/sha256c.c | 393 +++++++++++ libbutl/sha256c.c.orig | 316 +++++++++ libbutl/small-vector.hxx | 283 ++++++++ libbutl/standard-version.cxx | 632 +++++++++++++++++ libbutl/standard-version.hxx | 257 +++++++ libbutl/standard-version.ixx | 100 +++ libbutl/string-parser.cxx | 132 ++++ libbutl/string-parser.hxx | 56 ++ libbutl/string-table.hxx | 98 +++ libbutl/string-table.txx | 33 + libbutl/strptime.c | 629 +++++++++++++++++ libbutl/strptime.c.orig | 689 +++++++++++++++++++ libbutl/tab-parser.cxx | 88 +++ libbutl/tab-parser.hxx | 72 ++ libbutl/target-triplet.cxx | 145 ++++ libbutl/target-triplet.hxx | 155 +++++ libbutl/timelocal.c | 157 +++++ libbutl/timelocal.c.orig | 153 +++++ libbutl/timelocal.h | 65 ++ libbutl/timelocal.h.orig | 61 ++ libbutl/timestamp.cxx | 612 +++++++++++++++++ libbutl/timestamp.hxx | 146 ++++ libbutl/utility.cxx | 143 ++++ libbutl/utility.hxx | 264 +++++++ libbutl/utility.ixx | 136 ++++ libbutl/vector-view.hxx | 120 ++++ libbutl/version.hxx.in | 40 ++ libbutl/win32-utility.cxx | 54 ++ libbutl/win32-utility.hxx | 54 ++ 81 files changed, 18734 insertions(+) create mode 100644 libbutl/.gitignore create mode 100644 libbutl/base64.cxx create mode 100644 libbutl/base64.hxx create mode 100644 libbutl/buildfile create mode 100644 libbutl/char-scanner.cxx create mode 100644 libbutl/char-scanner.hxx create mode 100644 libbutl/const-ptr.hxx create mode 100644 libbutl/curl.cxx create mode 100644 libbutl/curl.hxx create mode 100644 libbutl/curl.ixx create mode 100644 libbutl/curl.txx create mode 100644 libbutl/diagnostics.cxx create mode 100644 libbutl/diagnostics.hxx create mode 100644 libbutl/export.hxx create mode 100644 libbutl/fdstream.cxx create mode 100644 libbutl/fdstream.hxx create mode 100644 libbutl/fdstream.ixx create mode 100644 libbutl/filesystem.cxx create mode 100644 libbutl/filesystem.hxx create mode 100644 libbutl/filesystem.ixx create mode 100644 libbutl/ft/exception.hxx create mode 100644 libbutl/ft/lang.hxx create mode 100644 libbutl/ft/shared_mutex.hxx create mode 100644 libbutl/manifest-forward.hxx create mode 100644 libbutl/manifest-parser.cxx create mode 100644 libbutl/manifest-parser.hxx create mode 100644 libbutl/manifest-serializer.cxx create mode 100644 libbutl/manifest-serializer.hxx create mode 100644 libbutl/multi-index.hxx create mode 100644 libbutl/optional.hxx create mode 100644 libbutl/pager.cxx create mode 100644 libbutl/pager.hxx create mode 100644 libbutl/path-io.hxx create mode 100644 libbutl/path-map.hxx create mode 100644 libbutl/path.cxx create mode 100644 libbutl/path.hxx create mode 100644 libbutl/path.ixx create mode 100644 libbutl/path.txx create mode 100644 libbutl/prefix-map.hxx create mode 100644 libbutl/prefix-map.txx create mode 100644 libbutl/process-details.hxx create mode 100644 libbutl/process-io.hxx create mode 100644 libbutl/process-run.cxx create mode 100644 libbutl/process-run.txx create mode 100644 libbutl/process.cxx create mode 100644 libbutl/process.hxx create mode 100644 libbutl/process.ixx create mode 100644 libbutl/sendmail.cxx create mode 100644 libbutl/sendmail.hxx create mode 100644 libbutl/sendmail.ixx create mode 100644 libbutl/sha256.cxx create mode 100644 libbutl/sha256.hxx create mode 100644 libbutl/sha256c.c create mode 100644 libbutl/sha256c.c.orig create mode 100644 libbutl/small-vector.hxx create mode 100644 libbutl/standard-version.cxx create mode 100644 libbutl/standard-version.hxx create mode 100644 libbutl/standard-version.ixx create mode 100644 libbutl/string-parser.cxx create mode 100644 libbutl/string-parser.hxx create mode 100644 libbutl/string-table.hxx create mode 100644 libbutl/string-table.txx create mode 100644 libbutl/strptime.c create mode 100644 libbutl/strptime.c.orig create mode 100644 libbutl/tab-parser.cxx create mode 100644 libbutl/tab-parser.hxx create mode 100644 libbutl/target-triplet.cxx create mode 100644 libbutl/target-triplet.hxx create mode 100644 libbutl/timelocal.c create mode 100644 libbutl/timelocal.c.orig create mode 100644 libbutl/timelocal.h create mode 100644 libbutl/timelocal.h.orig create mode 100644 libbutl/timestamp.cxx create mode 100644 libbutl/timestamp.hxx create mode 100644 libbutl/utility.cxx create mode 100644 libbutl/utility.hxx create mode 100644 libbutl/utility.ixx create mode 100644 libbutl/vector-view.hxx create mode 100644 libbutl/version.hxx.in create mode 100644 libbutl/win32-utility.cxx create mode 100644 libbutl/win32-utility.hxx (limited to 'libbutl') diff --git a/libbutl/.gitignore b/libbutl/.gitignore new file mode 100644 index 0000000..426db9e --- /dev/null +++ b/libbutl/.gitignore @@ -0,0 +1 @@ +version.hxx diff --git a/libbutl/base64.cxx b/libbutl/base64.cxx new file mode 100644 index 0000000..580090c --- /dev/null +++ b/libbutl/base64.cxx @@ -0,0 +1,211 @@ +// file : libbutl/base64.cxx -*- C++ -*- +// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#include + +#include // size_t +#include +#include +#include // {istreambuf, ostreambuf, back_insert}_iterator +#include // invalid_argument + +using namespace std; + +namespace butl +{ + static const char codes[] = + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; + + // base64-encode the data in the iterator range [i, e). Write the encoded + // data starting at the iterator position o. + // + template + static void + base64_encode (I& i, const I& e, O& o) + { + const size_t un (65); // Non-existing index of the codes string. + for (size_t n (0); i != e; ++n) + { + if (n && n % 19 == 0) + *o++ = '\n'; // Split into lines, like the base64 utility does. + + char c (*i++); + size_t i1 ((c >> 2) & 0x3F); + size_t i2 ((c << 4) & 0x30); + + size_t i3 (un); + if (i != e) + { + c = *i++; + i2 |= (c >> 4) & 0xF; + i3 = (c << 2) & 0x3C; + } + + size_t i4 (un); + if (i != e) + { + c = *i++; + i3 |= (c >> 6) & 0x3; + i4 = c & 0x3F; + } + + *o++ = codes[i1]; + *o++ = codes[i2]; + *o++ = i3 == un ? '=' : codes[i3]; + *o++ = i4 == un ? '=' : codes[i4]; + } + } + + static char + index (char c) + { + switch (c) + { + case '/': return 63; + case '+': return 62; + default: + { + if (c >= 'A' && c <= 'Z') + return c - 'A'; + else if (c >= 'a' && c <= 'z') + return c - 'a' + 26; + else if (c >= '0' && c <= '9') + return c - '0' + 52; + else + throw invalid_argument ("invalid input"); + } + } + } + + // base64-decode the data in the iterator range [i, e). Write the decoded + // data starting at the iterator position o. Throw invalid_argument if the + // input data is invalid. + // + template + static void + base64_decode (I& i, const I& e, O& o) + { + auto bad = []() {throw invalid_argument ("invalid input");}; + + auto next = [&i, &e, &bad]() -> char + { + if (i == e) + bad (); + return *i++; + }; + + while (i != e) + { + char c (next ()); + if (c == '\n') // @@ Should we check for '\r' as well ? + continue; + + char i1 = index (c); + char i2 = index (next ()); + *o++ = (i1 << 2) | (i2 >> 4); + + c = next (); + if (c == '=') + { + if (next () != '=' || i != e) + bad (); + } + else + { + char i3 = index (c); + *o++ = (i2 << 4) | (i3 >> 2); + + c = next (); + if (c == '=') + { + if (i != e) + bad (); + } + else + *o++ = (i3 << 6) | index (c); + } + } + } + + string + base64_encode (istream& is) + { + if (!is.good ()) + throw invalid_argument ("bad stream"); + + string r; + istreambuf_iterator i (is); + back_insert_iterator o (r); + + base64_encode (i, istreambuf_iterator (), o); + is.setstate (istream::eofbit); + return r; + } + + void + base64_encode (ostream& os, istream& is) + { + if (!os.good () || !is.good ()) + throw invalid_argument ("bad stream"); + + istreambuf_iterator i (is); + ostreambuf_iterator o (os); + base64_encode (i, istreambuf_iterator (), o); + + if (o.failed ()) + os.setstate (istream::badbit); + + is.setstate (istream::eofbit); + } + + string + base64_encode (const vector& v) + { + string r; + back_insert_iterator o (r); + auto i (v.begin ()); + base64_encode (i, v.end (), o); + return r; + } + + void + base64_decode (ostream& os, istream& is) + { + if (!os.good () || !is.good ()) + throw invalid_argument ("bad stream"); + + istreambuf_iterator i (is); + ostreambuf_iterator o (os); + base64_decode (i, istreambuf_iterator (), o); + + if (o.failed ()) + os.setstate (istream::badbit); + + is.setstate (istream::eofbit); + } + + void + base64_decode (ostream& os, const string& s) + { + if (!os.good ()) + throw invalid_argument ("bad stream"); + + ostreambuf_iterator o (os); + auto i (s.cbegin ()); + base64_decode (i, s.cend (), o); + + if (o.failed ()) + os.setstate (istream::badbit); + } + + vector + base64_decode (const string& s) + { + vector r; + back_insert_iterator> o (r); + auto i (s.cbegin ()); + base64_decode (i, s.cend (), o); + return r; + } +} diff --git a/libbutl/base64.hxx b/libbutl/base64.hxx new file mode 100644 index 0000000..29c6198 --- /dev/null +++ b/libbutl/base64.hxx @@ -0,0 +1,50 @@ +// file : libbutl/base64.hxx -*- C++ -*- +// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#ifndef LIBBUTL_BASE64_HXX +#define LIBBUTL_BASE64_HXX + +#include +#include +#include + +#include + +namespace butl +{ + // Base64-encode a stream or a buffer. Split the output into 76 char-long + // lines (new line is the 77th). If reading from a stream, check if it has + // badbit, failbit, or eofbit set and throw invalid_argument if that's the + // case. Otherwise, set eofbit on completion. If writing to a stream, check + // if it has badbit, failbit, or eofbit set and throw invalid_argument if + // that's the case. Otherwise set badbit if the write operation fails. + // + LIBBUTL_EXPORT void + base64_encode (std::ostream&, std::istream&); + + LIBBUTL_EXPORT std::string + base64_encode (std::istream&); + + LIBBUTL_EXPORT std::string + base64_encode (const std::vector&); + + // Base64-decode a stream or a string. Throw invalid_argument if the input + // is not a valid base64 representation. If reading from a stream, check if + // it has badbit, failbit, or eofbit set and throw invalid_argument if + // that's the case. Otherwise, set eofbit on completion. If writing to a + // stream, check if it has badbit, failbit, or eofbit set and throw + // invalid_argument if that's the case. Otherwise set badbit if the write + // operation fails. + // + LIBBUTL_EXPORT void + base64_decode (std::ostream&, std::istream&); + + LIBBUTL_EXPORT void + base64_decode (std::ostream&, const std::string&); + + LIBBUTL_EXPORT std::vector + base64_decode (const std::string&); +} + +#endif // LIBBUTL_BASE64_HXX diff --git a/libbutl/buildfile b/libbutl/buildfile new file mode 100644 index 0000000..a55311c --- /dev/null +++ b/libbutl/buildfile @@ -0,0 +1,93 @@ +# file : libbutl/buildfile +# copyright : Copyright (c) 2014-2017 Code Synthesis Ltd +# license : MIT; see accompanying LICENSE file + +lib{butl}: \ + {hxx cxx}{ base64 } \ + {hxx cxx}{ char-scanner } \ + {hxx }{ const-ptr } \ + {hxx ixx txx cxx}{ curl } \ + {hxx cxx}{ diagnostics } \ + {hxx }{ export } \ + {hxx ixx cxx}{ fdstream } \ + {hxx ixx cxx}{ filesystem } \ + {hxx }{ manifest-forward } \ + {hxx cxx}{ manifest-parser } \ + {hxx cxx}{ manifest-serializer } \ + {hxx }{ multi-index } \ + {hxx }{ optional } \ + {hxx cxx}{ pager } \ + {hxx ixx txx cxx}{ path } \ + {hxx }{ path-io } \ + {hxx }{ path-map } \ + {hxx txx }{ prefix-map } \ + {hxx ixx cxx}{ process } \ + {hxx }{ process-details } \ + {hxx }{ process-io } \ + { txx cxx}{ process-run } \ + {hxx ixx cxx}{ sendmail } \ + {hxx cxx}{ sha256 } \ + {hxx }{ small-vector } \ + {hxx ixx cxx}{ standard-version } \ + {hxx cxx}{ string-parser } \ + {hxx txx }{ string-table } \ + {hxx cxx}{ tab-parser } \ + {hxx cxx}{ target-triplet } \ + {hxx cxx}{ timestamp } \ + {hxx ixx cxx}{ utility } \ + {hxx }{ vector-view } \ + {hxx }{ version } \ +ft/{hxx }{ exception } \ +ft/{hxx }{ lang } \ +ft/{hxx }{ shared_mutex } + +# Exclude these from compilation on non-Windows targets. +# +if ($cxx.target.class == "windows") + lib{butl}: {hxx cxx}{ win32-utility } +else + lib{butl}: file{ win32-utility.hxx win32-utility.cxx } + +# This one is included into sha256.cxx so treat it as file to exclude +# from the compilation. +# +lib{butl}: file{sha256c.c} + +# These ones are included into timestamp.cxx so treat them as files to exclude +# from the compilation. +# +lib{butl}: file{strptime.c timelocal.h timelocal.c} + +hxx{version}: in{version} $src_root/file{manifest} +hxx{version}: dist = true + +# For pre-releases use the complete version to make sure they cannot be used +# in place of another pre-release or the final version. +# +if $version.pre_release + lib{butl}: bin.lib.version = @"-$version.project_id" +else + lib{butl}: bin.lib.version = @"-$version.major.$version.minor" + +cxx.poptions =+ "-I$out_root" "-I$src_root" +obja{*}: cxx.poptions += -DLIBBUTL_STATIC_BUILD +objs{*}: cxx.poptions += -DLIBBUTL_SHARED_BUILD + +lib{butl}: cxx.export.poptions = "-I$out_root" "-I$src_root" +liba{butl}: cxx.export.poptions += -DLIBBUTL_STATIC +libs{butl}: cxx.export.poptions += -DLIBBUTL_SHARED + +if ($cxx.target.class == "windows") +{ + if ($cxx.target.system == "mingw32") + cxx.libs += -lpsapi + else + cxx.libs += psapi.lib +} +else + cxx.libs += -lpthread + +# Install into the libbutl/ subdirectory of, say, /usr/include/. +# +install.include = $install.include/libbutl/ +install.include.subdirs = true # Recreate subdirectories. diff --git a/libbutl/char-scanner.cxx b/libbutl/char-scanner.cxx new file mode 100644 index 0000000..cbc2503 --- /dev/null +++ b/libbutl/char-scanner.cxx @@ -0,0 +1,91 @@ +// file : libbutl/char-scanner.cxx -*- C++ -*- +// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#include + +#include + +using namespace std; + +namespace butl +{ + auto char_scanner:: + peek () -> xchar + { + if (unget_) + return buf_; + else + { + if (eos_) + return xchar (xchar::traits_type::eof (), line, column); + else + { + xchar::int_type v (is_.peek ()); + + if (v == xchar::traits_type::eof ()) + eos_ = true; + else if (crlf_ && v == 0x0D) + { + is_.get (); + xchar::int_type v1 (is_.peek ()); + + if (v1 != '\n') + { + unget_ = true; + buf_ = '\n'; + } + + v = '\n'; + } + + return xchar (v, line, column); + } + } + } + + auto char_scanner:: + get () -> xchar + { + if (unget_) + { + unget_ = false; + return buf_; + } + else + { + // When is_.get () returns eof, the failbit is also set (stupid, + // isn't?) which may trigger an exception. To work around this + // we will call peek() first and only call get() if it is not + // eof. But we can only call peek() on eof once; any subsequent + // calls will spoil the failbit (even more stupid). + // + xchar c (peek ()); + + if (!eos (c)) + { + is_.get (); + + if (c == '\n') + { + line++; + column = 1; + } + else + column++; + } + + return c; + } + } + + void char_scanner:: + unget (const xchar& c) + { + // Because iostream::unget cannot work once eos is reached, + // we have to provide our own implementation. + // + buf_ = c; + unget_ = true; + } +} diff --git a/libbutl/char-scanner.hxx b/libbutl/char-scanner.hxx new file mode 100644 index 0000000..71f8313 --- /dev/null +++ b/libbutl/char-scanner.hxx @@ -0,0 +1,90 @@ +// file : libbutl/char-scanner.hxx -*- C++ -*- +// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#ifndef LIBBUTL_CHAR_SCANNER_HXX +#define LIBBUTL_CHAR_SCANNER_HXX + +#include // char_traits +#include +#include // uint64_t + +#include + +namespace butl +{ + // Low-level character stream scanner. Normally used as a base for + // higher-level lexers. + // + class LIBBUTL_EXPORT char_scanner + { + public: + // If the crlf argument is true, then recognize Windows newlines (0x0D + // 0x0A) and convert them to just '\n' (0x0A). Note that a standalone + // 0x0D is treated "as if" it was followed by 0x0A. + // + char_scanner (std::istream& is, bool crlf = true) + : is_ (is), crlf_ (crlf) {} + + char_scanner (const char_scanner&) = delete; + char_scanner& operator= (const char_scanner&) = delete; + + // Scanner interface. + // + public: + + // Extended character. It includes line/column information + // and is capable of representing EOF. + // + class xchar + { + public: + typedef std::char_traits traits_type; + typedef traits_type::int_type int_type; + typedef traits_type::char_type char_type; + + int_type value; + std::uint64_t line; + std::uint64_t column; + + operator char_type () const {return static_cast (value);} + + xchar (int_type v, std::uint64_t l = 0, std::uint64_t c = 0) + : value (v), line (l), column (c) {} + }; + + xchar + get (); + + void + unget (const xchar&); + + // Note that if there is an "ungot" character, peek() will return + // that. + // + xchar + peek (); + + // Tests. In the future we can add tests line alpha(), alnum(), + // etc. + // + static bool + eos (const xchar& c) {return c.value == xchar::traits_type::eof ();} + + // Line and column of the furthest seen (either via get() or + // peek()) character. + // + std::uint64_t line {1}; + std::uint64_t column {1}; + + protected: + std::istream& is_; + bool crlf_; + + bool unget_ {false}; + xchar buf_ = '\0'; + bool eos_ {false}; + }; +} + +#endif // LIBBUTL_CHAR_SCANNER_HXX diff --git a/libbutl/const-ptr.hxx b/libbutl/const-ptr.hxx new file mode 100644 index 0000000..e50704d --- /dev/null +++ b/libbutl/const-ptr.hxx @@ -0,0 +1,80 @@ +// file : libbutl/const-ptr.hxx -*- C++ -*- +// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#ifndef LIBBUTL_CONST_PTR_HXX +#define LIBBUTL_CONST_PTR_HXX + +#include // nullptr_t + +namespace butl +{ + // Const-propagating pointer. + // + // It has the semantics of a raw pointer except that it passes on its own + // const-ness to the pointed-to object. In other words, if you have a const + // instance of this pointer, then you can only obtain a const raw pointer to + // the underlying object. It is normally used as a data member, for example: + // + // struct tree + // { + // const_ptr left; + // const_ptr right; + // + // void modify (); + // }; + // + // tree* x = ...; + // const tree* y = ...; + // + // x.left->modify (); // Ok. + // y.left->modify (); // Error. + // + // Note that due to this semantics, copy construction/assignment requires + // a non-const instance of const_ptr. + // + // Note that this type is standard layout (which means we can reinterpret + // it as a raw pointer). + // + // Known drawbacks/issues: + // + // 1. Cannot do static_cast (x.left). + // + template + class const_ptr + { + public: + const_ptr () = default; + explicit const_ptr (T* p): p_ (p) {} + const_ptr (std::nullptr_t): p_ (nullptr) {} + + const_ptr& operator= (T* p) {p_ = p; return *this;} + const_ptr& operator= (std::nullptr_t) {p_ = nullptr; return *this;} + + template explicit const_ptr (T1* p): p_ (p) {} + template const_ptr (const_ptr& p): p_ (p.p_) {} + + template const_ptr& operator= (T1* p) {p_ = p; return *this;} + template const_ptr& operator= (const_ptr& p) { + p_ = p.p_; return *this;} + + T* operator-> () {return p_;} + const T* operator-> () const {return p_;} + + T& operator* () {return *p_;} + const T& operator* () const {return *p_;} + + operator T* () {return p_;} + operator const T* () const {return p_;} + + explicit operator bool () const {return p_ != nullptr;} + + T* get () {return p_;} + const T* get () const {return p_;} + + private: + T* p_; + }; +} + +#endif // LIBBUTL_CONST_PTR_HXX diff --git a/libbutl/curl.cxx b/libbutl/curl.cxx new file mode 100644 index 0000000..daa1fd3 --- /dev/null +++ b/libbutl/curl.cxx @@ -0,0 +1,166 @@ +// file : libbutl/curl.cxx -*- C++ -*- +// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#include + +#include // move(), forward() +#include // invalid_argument + +#include // casecmp() + +using namespace std; + +namespace butl +{ + int curl:: + map_in (nullfd_t, method_proto mp, io_data& d) + { + switch (mp) + { + case ftp_put: + throw invalid_argument ("no input specified for PUT method"); + case http_post: + throw invalid_argument ("no input specified for POST method"); + case ftp_get: + case http_get: + { + d.pipe.in = fdnull (); // /dev/null + return d.pipe.in.get (); + } + } + + return -1; + } + + int curl:: + map_in (const path& f, method_proto mp, io_data& d) + { + switch (mp) + { + case ftp_put: + case http_post: + { + if (mp == ftp_put) + { + d.options.push_back ("--upload-file"); + d.options.push_back (f.string ().c_str ()); + } + else + { + d.storage = '@' + f.string (); + + d.options.push_back ("--data-binary"); + d.options.push_back (d.storage.c_str ()); + } + + if (f.string () == "-") + { + d.pipe = fdopen_pipe (fdopen_mode::binary); + out.open (move (d.pipe.out)); + } + else + d.pipe.in = fdnull (); // /dev/null + + return d.pipe.in.get (); + } + case ftp_get: + case http_get: + { + throw invalid_argument ("file input specified for GET method"); + } + } + + return -1; + } + + int curl:: + map_out (nullfd_t, method_proto mp, io_data& d) + { + switch (mp) + { + case ftp_get: + case http_get: + throw invalid_argument ("no output specified for GET method"); + case ftp_put: + case http_post: // May or may not produce output. + { + d.pipe.out = fdnull (); + return d.pipe.out.get (); // /dev/null + } + } + + return -1; + } + + int curl:: + map_out (const path& f, method_proto mp, io_data& d) + { + switch (mp) + { + case ftp_get: + case http_get: + case http_post: + { + if (f.string () == "-") + { + // Note: no need for any options, curl writes to stdout by default. + // + d.pipe = fdopen_pipe (fdopen_mode::binary); + in.open (move (d.pipe.in)); + } + else + { + d.options.push_back ("-o"); + d.options.push_back (f.string ().c_str ()); + d.pipe.out = fdnull (); // /dev/null + } + + return d.pipe.out.get (); + } + case ftp_put: + { + throw invalid_argument ("file output specified for PUT method"); + } + } + + return -1; + } + + curl::method_proto curl:: + translate (method_type m, const string& u, method_proto_options& o) + { + size_t n (u.find ("://")); + + if (n == string::npos) + throw invalid_argument ("no protocol in URL"); + + if (casecmp (u, "ftp", n) == 0 || + casecmp (u, "tftp", n) == 0) + { + switch (m) + { + case method_type::get: return method_proto::ftp_get; + case method_type::put: return method_proto::ftp_put; + case method_type::post: + throw invalid_argument ("POST method with FTP protocol"); + } + } + else if (casecmp (u, "http", n) == 0 || + casecmp (u, "https", n) == 0) + { + o.push_back ("--fail"); // Fail on HTTP errors (e.g., 404). + o.push_back ("--location"); // Follow redirects. + + switch (m) + { + case method_type::get: return method_proto::http_get; + case method_type::post: return method_proto::http_post; + case method_type::put: + throw invalid_argument ("PUT method with HTTP protocol"); + } + } + + throw invalid_argument ("unsupported protocol"); + } +} diff --git a/libbutl/curl.hxx b/libbutl/curl.hxx new file mode 100644 index 0000000..bc4eb0f --- /dev/null +++ b/libbutl/curl.hxx @@ -0,0 +1,173 @@ +// file : libbutl/curl.hxx -*- C++ -*- +// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#ifndef LIBBUTL_CURL_HXX +#define LIBBUTL_CURL_HXX + +#include +#include + +#include + +#include +#include +#include + +namespace butl +{ + // Perform a method (GET, POST, PUT) on a URL using the curl(1) program. + // Throw process_error and io_error (both derive from system_error) in case + // of errors. + // + // The I (in) and O (out) can be of the following types/values: + // + // nullfd Signal that no input/output is expected. + // + // path Read input/write output from/to a file. If the special "-" + // value is used, then instead input is connected to the curl::out + // ofdstream member and output -- to the curl::in ifdstream member. + // Note that the argument type should be path, not string (i.e., + // pass path("-")). + // + // other Forwarded as is to process_start(). Normally either int or + // auto_fd. + // + // For example: + // + // curl (nullfd, // No input expected for GET. + // path ("-"), // Write response to curl::in. + // 2, + // curl::get, + // "http://example.org"); + // + // curl (path ("-"), // Read request from curl::out. + // path::temp_path (), // Write result to a file. + // 2, + // curl::post, + // "http://example.org"); + // + // curl (nullfd, + // fdnull (), // Write result to /dev/null. + // 2, + // curl::get, + // "tftp://localhost/foo"); + // + // Typical usage: + // + // try + // { + // curl c (nullfd, // No input expected. + // path ("-"), // Output to curl::in. + // 2, // Diagnostics to stderr. + // curl::get, // GET method. + // "https://example.org", + // "-A", "foobot/1.2.3"); // Additional curl(1) options. + // + // for (string s; getline (c.in, s); ) + // cout << s << endl; + // + // c.in.close (); + // + // if (!c.wait ()) + // ... // curl returned non-zero status. + // } + // catch (const std::system_error& e) + // { + // cerr << "curl error: " << e << endl; + // } + // + // Notes: + // + // 1. If opened, in/out streams are in the binary mode. + // + // 2. If opened, in/out must be explicitly closed before calling wait(). + // + // 3. Only binary data HTTP POST is currently supported (the --data-binary + // curl option). + // + class LIBBUTL_EXPORT curl: public process + { + public: + enum method_type {get, put, post}; + + ifdstream in; + ofdstream out; + + template + curl (I&& in, + O&& out, + E&& err, + method_type, + const std::string& url, + A&&... options); + + // Version with the command line callback (see process_run() for details). + // + template + curl (const C&, + I&& in, + O&& out, + E&& err, + method_type, + const std::string& url, + A&&... options); + + private: + enum method_proto {ftp_get, ftp_put, http_get, http_post}; + using method_proto_options = small_vector; + + method_proto + translate (method_type, const std::string& url, method_proto_options&); + + private: + template + struct is_other + { + using type = typename std::remove_reference< + typename std::remove_cv::type>::type; + + static const bool value = !(std::is_same::value || + std::is_same::value); + }; + + struct io_data + { + fdpipe pipe; + method_proto_options options; + std::string storage; + }; + + int + map_in (nullfd_t, method_proto, io_data&); + + int + map_in (const path&, method_proto, io_data&); + + template + typename std::enable_if::value, I>::type + map_in (I&&, method_proto, io_data&); + + int + map_out (nullfd_t, method_proto, io_data&); + + int + map_out (const path&, method_proto, io_data&); + + template + typename std::enable_if::value, O>::type + map_out (O&&, method_proto, io_data&); + }; +} + +#include +#include + +#endif // LIBBUTL_CURL_HXX diff --git a/libbutl/curl.ixx b/libbutl/curl.ixx new file mode 100644 index 0000000..fcc1bab --- /dev/null +++ b/libbutl/curl.ixx @@ -0,0 +1,29 @@ +// file : libbutl/curl.ixx -*- C++ -*- +// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#include // move(), forward() + +namespace butl +{ + template + inline curl:: + curl (I&& in, + O&& out, + E&& err, + method_type m, + const std::string& url, + A&&... options) + : curl ([] (const char* [], std::size_t) {}, + std::forward (in), + std::forward (out), + std::forward (err), + m, + url, + std::forward (options)...) + { + } +} diff --git a/libbutl/curl.txx b/libbutl/curl.txx new file mode 100644 index 0000000..5fd81dc --- /dev/null +++ b/libbutl/curl.txx @@ -0,0 +1,99 @@ +// file : libbutl/curl.txx -*- C++ -*- +// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#include // move(), forward() +#include // invalid_argument + +namespace butl +{ + template + typename std::enable_if::value, I>::type curl:: + map_in (I&& in, method_proto mp, io_data& d) + { + switch (mp) + { + case ftp_put: + { + d.options.push_back ("--upload-file"); + d.options.push_back ("-"); + break; + } + case http_post: + { + d.options.push_back ("--data-binary"); + d.options.push_back ("@-"); + break; + } + case ftp_get: + case http_get: + { + throw std::invalid_argument ("input specified for GET method"); + } + } + + return std::forward (in); + } + + template + typename std::enable_if::value, O>::type curl:: + map_out (O&& out, method_proto mp, io_data&) + { + switch (mp) + { + case ftp_get: + case http_get: + case http_post: + { + // Note: no need for any options, curl writes to stdout by default. + // + break; + } + case ftp_put: + { + throw std::invalid_argument ("output specified for PUT method"); + } + } + + return std::forward (out); + } + + template + curl:: + curl (const C& cmdc, + I&& in, + O&& out, + E&& err, + method_type m, + const std::string& url, + A&&... options) + { + method_proto_options mpo; + method_proto mp (translate (m, url, mpo)); + + io_data in_data; + io_data out_data; + + process& p (*this); + p = process_start ( + cmdc, + map_in (std::forward (in), mp, in_data), + map_out (std::forward (out), mp, out_data), + std::forward (err), + dir_path (), + "curl", + "-s", // Silent. + "-S", // But do show diagnostics. + mpo, + in_data.options, + out_data.options, + std::forward (options)..., + url); + + // Note: leaving this scope closes any open ends of the pipes in io_data. + } +} diff --git a/libbutl/diagnostics.cxx b/libbutl/diagnostics.cxx new file mode 100644 index 0000000..e73412b --- /dev/null +++ b/libbutl/diagnostics.cxx @@ -0,0 +1,80 @@ +// file : libbutl/diagnostics.cxx -*- C++ -*- +// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#include + +#include +#include // cerr + +using namespace std; + +namespace butl +{ + ostream* diag_stream = &cerr; + + static mutex diag_mutex; + + diag_lock::diag_lock () + { + diag_mutex.lock (); + } + + diag_lock::~diag_lock () + { + diag_mutex.unlock (); + } + + void diag_record:: + flush () const + { + if (!empty_) + { + if (epilogue_ == nullptr) + { + os.put ('\n'); + + { + diag_lock l; + *diag_stream << os.str (); + } + + // We can endup flushing the result of several writes. The last one may + // possibly be incomplete, but that's not a problem as it will also be + // followed by the flush() call. + // + diag_stream->flush (); + + empty_ = true; + } + else + { + // Clear the epilogue in case it calls us back. + // + auto e (epilogue_); + epilogue_ = nullptr; + e (*this); // Can throw. + flush (); // Call ourselves to write the data in case it returns. + } + } + } + + diag_record:: + ~diag_record () noexcept (false) + { + // Don't flush the record if this destructor was called as part of the + // stack unwinding. + // +#ifdef __cpp_lib_uncaught_exceptions + if (uncaught_ == std::uncaught_exceptions ()) + flush (); +#else + // Fallback implementation. Right now this means we cannot use this + // mechanism in destructors, which is not a big deal, except for one + // place: exception_guard. Thus the ugly special check. + // + if (!std::uncaught_exception () || exception_unwinding_dtor ()) + flush (); +#endif + } +} diff --git a/libbutl/diagnostics.hxx b/libbutl/diagnostics.hxx new file mode 100644 index 0000000..cc83aef --- /dev/null +++ b/libbutl/diagnostics.hxx @@ -0,0 +1,226 @@ +// file : libbutl/diagnostics.hxx -*- C++ -*- +// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#ifndef LIBBUTL_DIAGNOSTICS_HXX +#define LIBBUTL_DIAGNOSTICS_HXX + +#include +#include +#include +#include // move(), forward() +#include // uncaught_exceptions +#include // uncaught_exception(s)() + +#include +#include + +namespace butl +{ + // Diagnostic facility base infrastructure. + // + + // Diagnostics destination stream (std::cerr by default). Note that its + // modification is not MT-safe. Also note that concurrent writing to the + // stream from multiple threads can result in interleaved characters. To + // prevent this an object of diag_lock type (see below) must be created prior + // to write operation. + // + LIBBUTL_EXPORT extern std::ostream* diag_stream; + + // Acquire the diagnostics exclusive access mutex in ctor, release in dtor. + // An object of the type must be created prior to writing to diag_stream (see + // above). + // + struct LIBBUTL_EXPORT diag_lock + { + diag_lock (); + ~diag_lock (); + }; + + struct diag_record; + template struct diag_prologue; + template struct diag_mark; + + using diag_epilogue = void (const diag_record&); + + struct LIBBUTL_EXPORT diag_record + { + template + friend const diag_record& + operator<< (const diag_record& r, const T& x) + { + r.os << x; + return r; + } + + diag_record () + : +#ifdef __cpp_lib_uncaught_exceptions + uncaught_ (std::uncaught_exceptions ()), +#endif + empty_ (true), + epilogue_ (nullptr) {} + + template + explicit + diag_record (const diag_prologue& p): diag_record () { *this << p;} + + template + explicit + diag_record (const diag_mark& m): diag_record () { *this << m;} + + ~diag_record () noexcept (false); + + bool + empty () const {return empty_;} + + bool + full () const {return !empty_;} + + void + flush () const; + + void + append (const char* indent, diag_epilogue* e) const + { + // Ignore subsequent epilogues (e.g., from nested marks, etc). + // + if (empty_) + { + epilogue_ = e; + empty_ = false; + } + else if (indent != nullptr) + os << indent; + } + + // Move constructible-only type. + // + // Older versions of libstdc++ don't have the ostringstream move support + // and accuratly detecting its version is non-trivial. So we always use + // the pessimized implementation with libstdc++. Luckily, GCC doesn't seem + // to be needing move due to copy/move elision. + // +#ifdef __GLIBCXX__ + diag_record (diag_record&&); +#else + diag_record (diag_record&& r) + : +#ifdef __cpp_lib_uncaught_exceptions + uncaught_ (r.uncaught_), +#endif + empty_ (r.empty_), + epilogue_ (r.epilogue_), + os (std::move (r.os)) + { + if (!empty_) + { + r.empty_ = true; + r.epilogue_ = nullptr; + } + } +#endif + + diag_record& operator= (diag_record&&) = delete; + + diag_record (const diag_record&) = delete; + diag_record& operator= (const diag_record&) = delete; + + protected: +#ifdef __cpp_lib_uncaught_exceptions + const int uncaught_; +#endif + mutable bool empty_; + mutable diag_epilogue* epilogue_; + + public: + mutable std::ostringstream os; + }; + + template + struct diag_prologue: B + { + diag_prologue (const char* i = "\n ", diag_epilogue* e = nullptr) + : B (), indent_ (i), epilogue_ (e) {} + + template + diag_prologue (A&&... a) + : B (std::forward (a)...), indent_ ("\n "), epilogue_ (nullptr) {} + + template + diag_prologue (diag_epilogue* e, A&&... a) + : B (std::forward (a)...), indent_ ("\n "), epilogue_ (e) {} + + template + diag_prologue (const char* i, diag_epilogue* e, A&&... a) + : B (std::forward (a)...), indent_ (i), epilogue_ (e) {} + + template + diag_record + operator<< (const T& x) const + { + diag_record r; + r.append (indent_, epilogue_); + B::operator() (r); + r << x; + return r; + } + + friend const diag_record& + operator<< (const diag_record& r, const diag_prologue& p) + { + r.append (p.indent_, p.epilogue_); + p (r); + return r; + } + + private: + const char* indent_; + diag_epilogue* epilogue_; + }; + + template + struct diag_mark: B + { + diag_mark (): B () {} + + template + diag_mark (A&&... a): B (std::forward (a)...) {} + + template + diag_record + operator<< (const T& x) const + { + return B::operator() () << x; + } + + friend const diag_record& + operator<< (const diag_record& r, const diag_mark& m) + { + return r << m (); + } + }; + + template + struct diag_noreturn_end: B + { + diag_noreturn_end (): B () {} + + template + diag_noreturn_end (A&&... a): B (std::forward (a)...) {} + + [[noreturn]] friend void + operator<< (const diag_record& r, const diag_noreturn_end& e) + { + // We said that we never return which means this end mark cannot be used + // to "maybe not return". And not returning without any diagnostics is + // probably a mistake. + // + assert (r.full ()); + e.B::operator() (r); + } + }; +} + +#endif // LIBBUTL_DIAGNOSTICS_HXX diff --git a/libbutl/export.hxx b/libbutl/export.hxx new file mode 100644 index 0000000..f0529de --- /dev/null +++ b/libbutl/export.hxx @@ -0,0 +1,41 @@ +// file : libbutl/export.hxx -*- C++ -*- +// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#ifndef LIBBUTL_EXPORT_HXX +#define LIBBUTL_EXPORT_HXX + +// Normally we don't export class templates (but do complete specializations), +// inline functions, and classes with only inline member functions. Exporting +// classes that inherit from non-exported/imported bases (e.g., std::string) +// will end up badly. The only known workarounds are to not inherit or to not +// export. Also, MinGW GCC doesn't like seeing non-exported function being +// used before their inline definition. The workaround is to reorder code. In +// the end it's all trial and error. + +#if defined(LIBBUTL_STATIC) // Using static. +# define LIBBUTL_EXPORT +#elif defined(LIBBUTL_STATIC_BUILD) // Building static. +# define LIBBUTL_EXPORT +#elif defined(LIBBUTL_SHARED) // Using shared. +# ifdef _WIN32 +# define LIBBUTL_EXPORT __declspec(dllimport) +# else +# define LIBBUTL_EXPORT +# endif +#elif defined(LIBBUTL_SHARED_BUILD) // Building shared. +# ifdef _WIN32 +# define LIBBUTL_EXPORT __declspec(dllexport) +# else +# define LIBBUTL_EXPORT +# endif +#else +// If none of the above macros are defined, then we assume we are being used +// by some third-party build system that cannot/doesn't signal the library +// type. Note that this fallback works for both static and shared but in case +// of shared will be sub-optimal compared to having dllimport. +// +# define LIBBUTL_EXPORT // Using static or shared. +#endif + +#endif // LIBBUTL_EXPORT_HXX diff --git a/libbutl/fdstream.cxx b/libbutl/fdstream.cxx new file mode 100644 index 0000000..77e48e0 --- /dev/null +++ b/libbutl/fdstream.cxx @@ -0,0 +1,1085 @@ +// file : libbutl/fdstream.cxx -*- C++ -*- +// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#include + +#ifndef _WIN32 +# include // open(), O_*, fcntl() +# include // close(), read(), write(), lseek(), dup(), pipe(), + // ssize_t, STD*_FILENO +# include // writev(), iovec +# include // S_I* +# include // off_t +#else +# include + +# include // _close(), _read(), _write(), _setmode(), _sopen(), + // _lseek(), _dup(), _pipe(), _get_osfhandle() +# include // _SH_DENYNO +# include // _fileno(), stdin, stdout, stderr +# include // _O_* +# include // S_I* +#endif + +#include // errno, E* + +#include // ios_base::openmode, ios_base::failure +#include // bad_alloc +#include // numeric_limits +#include +#include // memcpy(), memmove() +#include // uncaught_exception() +#include // invalid_argument +#include +#include + +#include + +using namespace std; + +#ifdef _WIN32 +using namespace butl::win32; +#endif + +namespace butl +{ + // throw_ios_failure + // + template + [[noreturn]] static inline typename enable_if::type + throw_ios_failure (error_code e, const char* m) + { + // The idea here is to make an error code to be saved into failure + // exception and to make a string returned by what() to contain the error + // description plus an optional custom message if provided. Unfortunatelly + // there is no way to say that the custom message is absent. Passing an + // empty string results for libstdc++ (as of version 5.3.1) with a + // description like this (note the ': ' prefix): + // + // : No such file or directory + // + // Note that our custom operator<<(ostream, exception) strips this prefix. + // + throw ios_base::failure (m != nullptr ? m : "", e); + } + + template + [[noreturn]] static inline typename enable_if::type + throw_ios_failure (error_code e, const char* m) + { + throw ios_base::failure (m != nullptr ? m : e.message ().c_str ()); + } + + // Throw system_error with generic_category. + // + [[noreturn]] static inline void + throw_ios_failure (int errno_code, const char* m = nullptr) + { + error_code ec (errno_code, generic_category ()); + throw_ios_failure::value> ( + ec, m); + } + +#ifdef _WIN32 + // Throw system_error with system_category. + // + static inline void + throw_ios_system_failure (int system_code) + { + // Here we work around MinGW libstdc++ that interprets Windows system error + // codes (for example those returned by GetLastError()) as errno codes. + // + // Note that the resulting system_error description will have ': Success.' + // suffix that is stripped by our custom operator<<(ostream, exception). + // + error_code ec (0, system_category ()); + string m (win32::error_msg (system_code)); + + throw_ios_failure::value> ( + ec, m.c_str ()); + } +#endif + + // auto_fd + // + void auto_fd:: + close () + { + if (fd_ >= 0) + { + bool r (fdclose (fd_)); + + // If fdclose() failed then no reason to expect it to succeed the next + // time. + // + fd_ = -1; + + if (!r) + throw_ios_failure (errno); + } + } + + // fdbuf + // + fdbuf:: + fdbuf (auto_fd&& fd) + { + if (fd.get () >= 0) + open (move (fd)); + } + + void fdbuf:: + open (auto_fd&& fd) + { + close (); + +#ifndef _WIN32 + int flags (fcntl (fd.get (), F_GETFL)); + + if (flags == -1) + throw_ios_failure (errno); + + non_blocking_ = (flags & O_NONBLOCK) == O_NONBLOCK; +#endif + + setg (buf_, buf_, buf_); + setp (buf_, buf_ + sizeof (buf_) - 1); // Keep space for overflow's char. + + fd_ = move (fd); + } + + streamsize fdbuf:: + showmanyc () + { + if (!is_open ()) + return -1; + + streamsize n (egptr () - gptr ()); + + if (n > 0) + return n; + +#ifndef _WIN32 + if (non_blocking_) + { + ssize_t n (read (fd_.get (), buf_, sizeof (buf_))); + + if (n == -1) + { + if (errno == EAGAIN || errno == EINTR) + return 0; + + throw_ios_failure (errno); + } + + if (n == 0) // EOF. + return -1; + + setg (buf_, buf_, buf_ + n); + return n; + } +#endif + + return 0; + } + + fdbuf::int_type fdbuf:: + underflow () + { + int_type r (traits_type::eof ()); + + if (is_open ()) + { + // The underflow() function interface doesn't support the non-blocking + // semantics as it must return either the next character or EOF. In the + // future we may implement the blocking behavior for a non-blocking file + // descriptor. + // + if (non_blocking_) + throw_ios_failure (ENOTSUP); + + if (gptr () < egptr () || load ()) + r = traits_type::to_int_type (*gptr ()); + } + + return r; + } + + bool fdbuf:: + load () + { + // Doesn't handle blocking mode and so should not be called. + // + assert (!non_blocking_); + +#ifndef _WIN32 + ssize_t n (read (fd_.get (), buf_, sizeof (buf_))); +#else + int n (_read (fd_.get (), buf_, sizeof (buf_))); +#endif + + if (n == -1) + throw_ios_failure (errno); + + setg (buf_, buf_, buf_ + n); + return n != 0; + } + + fdbuf::int_type fdbuf:: + overflow (int_type c) + { + int_type r (traits_type::eof ()); + + if (is_open () && c != traits_type::eof ()) + { + // The overflow() function interface doesn't support the non-blocking + // semantics since being unable to serialize the character is supposed + // to be an error. In the future we may implement the blocking behavior + // for a non-blocking file descriptor. + // + if (non_blocking_) + throw_ios_failure (ENOTSUP); + + // Store last character in the space we reserved in open(). Note + // that pbump() doesn't do any checks. + // + *pptr () = traits_type::to_char_type (c); + pbump (1); + + if (save ()) + r = c; + } + + return r; + } + + int fdbuf:: + sync () + { + if (!is_open ()) + return -1; + + // The sync() function interface doesn't support the non-blocking + // semantics since it should either completely sync the data or fail. In + // the future we may implement the blocking behavior for a non-blocking + // file descriptor. + // + if (non_blocking_) + throw_ios_failure (ENOTSUP); + + return save () ? 0 : -1; + } + + bool fdbuf:: + save () + { + size_t n (pptr () - pbase ()); + + if (n != 0) + { + // Note that for MinGW GCC (5.2.0) _write() returns 0 for a file + // descriptor opened for read-only access (while -1 with errno EBADF is + // expected). This is in contrast with VC's _write() and POSIX's write(). + // +#ifndef _WIN32 + ssize_t m (write (fd_.get (), buf_, n)); +#else + int m (_write (fd_.get (), buf_, n)); +#endif + + if (m == -1) + throw_ios_failure (errno); + + if (n != static_cast (m)) + return false; + + setp (buf_, buf_ + sizeof (buf_) - 1); + } + + return true; + } + + streamsize fdbuf:: + xsputn (const char_type* s, streamsize sn) + { + // The xsputn() function interface doesn't support the non-blocking + // semantics since the only excuse not to fully serialize the data is + // encountering EOF (the default behaviour is defined as a sequence of + // sputc() calls which stops when either sn characters are written or a + // call would have returned EOF). In the future we may implement the + // blocking behavior for a non-blocking file descriptor. + // + if (non_blocking_) + throw_ios_failure (ENOTSUP); + + // To avoid futher 'signed/unsigned comparison' compiler warnings. + // + size_t n (static_cast (sn)); + + // Buffer the data if there is enough space. + // + size_t an (epptr () - pptr ()); // Amount of free space in the buffer. + if (n <= an) + { + memcpy (pptr (), s, n); + pbump (n); + return n; + } + + size_t bn (pptr () - pbase ()); // Buffered data size. + +#ifndef _WIN32 + + ssize_t r; + if (bn > 0) + { + // Write both buffered and new data with a single system call. + // + iovec iov[2] = {{pbase (), bn}, {const_cast (s), n}}; + r = writev (fd_.get (), iov, 2); + } + else + r = write (fd_.get (), s, n); + + if (r == -1) + throw_ios_failure (errno); + + size_t m (static_cast (r)); + + // If the buffered data wasn't fully written then move the unwritten part + // to the beginning of the buffer. + // + if (m < bn) + { + memmove (pbase (), pbase () + m, bn - m); + pbump (-m); // Note that pbump() accepts negatives. + return 0; + } + + setp (buf_, buf_ + sizeof (buf_) - 1); + return m - bn; + +#else + + // On Windows there is no writev() available so sometimes we will make two + // system calls. Fill and flush the buffer, then try to fit the data tail + // into the empty buffer. If the data tail is too long then just write it + // to the file and keep the buffer empty. + // + // We will end up with two _write() calls if the total data size to be + // written exceeds double the buffer size. In this case the buffer filling + // is redundant so let's pretend there is no free space in the buffer, and + // so buffered and new data will be written separatelly. + // + if (bn + n > 2 * (bn + an)) + an = 0; + else + { + memcpy (pptr (), s, an); + pbump (an); + } + + // Flush the buffer. + // + size_t wn (bn + an); + int r (wn > 0 ? _write (fd_.get (), buf_, wn) : 0); + + if (r == -1) + throw_ios_failure (errno); + + size_t m (static_cast (r)); + + // If the buffered data wasn't fully written then move the unwritten part + // to the beginning of the buffer. + // + if (m < wn) + { + memmove (pbase (), pbase () + m, wn - m); + pbump (-m); // Note that pbump() accepts negatives. + return m < bn ? 0 : m - bn; + } + + setp (buf_, buf_ + sizeof (buf_) - 1); + + // Now 'an' holds the size of the data portion written as a part of the + // buffer flush. + // + s += an; + n -= an; + + // Buffer the data tail if it fits the buffer. + // + if (n <= static_cast (epptr () - pbase ())) + { + memcpy (pbase (), s, n); + pbump (n); + return sn; + } + + // The data tail doesn't fit the buffer so write it to the file. + // + r = _write (fd_.get (), s, n); + + if (r == -1) + throw_ios_failure (errno); + + return an + r; +#endif + } + + inline static bool + flag (fdstream_mode m, fdstream_mode flag) + { + return (m & flag) == flag; + } + + inline static auto_fd + mode (auto_fd fd, fdstream_mode m) + { + if (fd.get () >= 0 && + (flag (m, fdstream_mode::text) || + flag (m, fdstream_mode::binary) || + flag (m, fdstream_mode::blocking) || + flag (m, fdstream_mode::non_blocking))) + fdmode (fd.get (), m); + + return fd; + } + + // fdstream_base + // + fdstream_base:: + fdstream_base (auto_fd&& fd, fdstream_mode m) + : fdstream_base (mode (move (fd), m)) // Delegate. + { + } + + static fdopen_mode + translate_mode (ios_base::openmode m) + { + enum + { + in = ios_base::in, + out = ios_base::out, + app = ios_base::app, + bin = ios_base::binary, + trunc = ios_base::trunc, + ate = ios_base::ate + }; + + const fdopen_mode fd_in (fdopen_mode::in); + const fdopen_mode fd_out (fdopen_mode::out); + const fdopen_mode fd_inout (fdopen_mode::in | fdopen_mode::out); + const fdopen_mode fd_app (fdopen_mode::append); + const fdopen_mode fd_trunc (fdopen_mode::truncate); + const fdopen_mode fd_create (fdopen_mode::create); + const fdopen_mode fd_bin (fdopen_mode::binary); + const fdopen_mode fd_ate (fdopen_mode::at_end); + + fdopen_mode r; + switch (m & ~(ate | bin)) + { + case in : r = fd_in ; break; + case out : + case out | trunc : r = fd_out | fd_trunc | fd_create ; break; + case app : + case out | app : r = fd_out | fd_app | fd_create ; break; + case out | in : r = fd_inout ; break; + case out | in | trunc : r = fd_inout | fd_trunc | fd_create ; break; + case out | in | app : + case in | app : r = fd_inout | fd_app | fd_create ; break; + + default: throw invalid_argument ("invalid open mode"); + } + + if (m & ate) + r |= fd_ate; + + if (m & bin) + r |= fd_bin; + + return r; + } + + // ifdstream + // + ifdstream:: + ifdstream (const char* f, openmode m, iostate e) + : ifdstream (f, translate_mode (m | in), e) // Delegate. + { + } + + ifdstream:: + ifdstream (const char* f, fdopen_mode m, iostate e) + : ifdstream (fdopen (f, m | fdopen_mode::in), e) // Delegate. + { + } + + ifdstream:: + ~ifdstream () + { + if (skip_ && is_open () && good ()) + { + // Clear the exception mask to prevent ignore() from throwing. + // + exceptions (goodbit); + ignore (numeric_limits::max ()); + } + + // Underlying file descriptor is closed by fdbuf dtor with errors (if any) + // being ignored. + // + } + + void ifdstream:: + open (const char* f, openmode m) + { + open (f, translate_mode (m | in)); + } + + void ifdstream:: + open (const char* f, fdopen_mode m) + { + open (fdopen (f, m | fdopen_mode::in)); + } + + void ifdstream:: + close () + { + if (skip_ && is_open () && good ()) + ignore (numeric_limits::max ()); + + buf_.close (); + } + + ifdstream& + getline (ifdstream& is, string& s, char delim) + { + ifdstream::iostate eb (is.exceptions ()); + assert (eb & ifdstream::badbit); + + // Amend the exception mask to prevent exceptions being thrown by the C++ + // IO runtime to avoid incompatibility issues due to ios_base::failure ABI + // fiasco (#66145). We will not restore the mask when ios_base::failure is + // thrown by fdbuf since there is no way to "silently" restore it if the + // corresponding bits are in the error state without the exceptions() call + // throwing ios_base::failure. Not restoring exception mask on throwing + // because of badbit should probably be ok since the stream is no longer + // usable. + // + if (eb != ifdstream::badbit) + is.exceptions (ifdstream::badbit); + + std::getline (is, s, delim); + + // Throw if any of the newly set bits are present in the exception mask. + // + if ((is.rdstate () & eb) != ifdstream::goodbit) + throw_ios_failure (EIO, "getline failure"); + + if (eb != ifdstream::badbit) + is.exceptions (eb); // Restore exception mask. + + return is; + } + + // ofdstream + // + ofdstream:: + ofdstream (const char* f, openmode m, iostate e) + : ofdstream (f, translate_mode (m | out), e) // Delegate. + { + } + + ofdstream:: + ofdstream (const char* f, fdopen_mode m, iostate e) + : ofdstream (fdopen (f, m | fdopen_mode::out), e) // Delegate. + { + } + + ofdstream:: + ~ofdstream () + { + // Enforce explicit close(). Note that we may have false negatives but not + // false positives. Specifically, we will fail to enforce if someone is + // using ofdstream in a dtor being called while unwinding the stack due to + // an exception. + // + assert (!is_open () || !good () || uncaught_exception ()); + } + + void ofdstream:: + open (const char* f, openmode m) + { + open (f, translate_mode (m | out)); + } + + void ofdstream:: + open (const char* f, fdopen_mode m) + { + open (fdopen (f, m | fdopen_mode::out)); + } + + // Utility functions + // + auto_fd + fdopen (const char* f, fdopen_mode m, permissions p) + { + mode_t pf (S_IREAD | S_IWRITE | S_IEXEC); + +#ifdef S_IRWXG + pf |= S_IRWXG; +#endif + +#ifdef S_IRWXO + pf |= S_IRWXO; +#endif + + pf &= static_cast (p); + + // Return true if the open mode contains a specific flag. + // + auto mode = [m](fdopen_mode flag) -> bool {return (m & flag) == flag;}; + + int of (0); + bool in (mode (fdopen_mode::in)); + bool out (mode (fdopen_mode::out)); + +#ifndef _WIN32 + + if (in && out) + of |= O_RDWR; + else if (in) + of |= O_RDONLY; + else if (out) + of |= O_WRONLY; + + if (out) + { + if (mode (fdopen_mode::append)) + of |= O_APPEND; + + if (mode (fdopen_mode::truncate)) + of |= O_TRUNC; + } + + if (mode (fdopen_mode::create)) + { + of |= O_CREAT; + + if (mode (fdopen_mode::exclusive)) + of |= O_EXCL; + } + +#ifdef O_LARGEFILE + of |= O_LARGEFILE; +#endif + + int fd (open (f, of | O_CLOEXEC, pf)); + +#else + + if (in && out) + of |= _O_RDWR; + else if (in) + of |= _O_RDONLY; + else if (out) + of |= _O_WRONLY; + + if (out) + { + if (mode (fdopen_mode::append)) + of |= _O_APPEND; + + if (mode (fdopen_mode::truncate)) + of |= _O_TRUNC; + } + + if (mode (fdopen_mode::create)) + { + of |= _O_CREAT; + + if (mode (fdopen_mode::exclusive)) + of |= _O_EXCL; + } + + of |= mode (fdopen_mode::binary) ? _O_BINARY : _O_TEXT; + + // According to Microsoft _sopen() should not change the permissions of an + // existing file. However it does if we pass them (reproduced on Windows + // XP, 7, and 8). And we must pass them if we have _O_CREATE. So we need + // to take care of preserving the permissions ourselves. Note that Wine's + // implementation of _sopen() works properly. + // + bool pass_perm (of & _O_CREAT); + + if (pass_perm && file_exists (path (f))) + { + // If the _O_CREAT flag is set then we need to clear it so that we can + // omit the permissions. But if the _O_EXCL flag is set as well we can't + // do that as fdopen() wouldn't fail as expected. + // + if (of & _O_EXCL) + throw_ios_failure (EEXIST); + + of &= ~_O_CREAT; + pass_perm = false; + } + + // Make sure the file descriptor is not inheritable by default. + // + of |= _O_NOINHERIT; + + int fd (pass_perm + ? _sopen (f, of, _SH_DENYNO, pf) + : _sopen (f, of, _SH_DENYNO)); + +#endif + + if (fd == -1) + throw_ios_failure (errno); + + if (mode (fdopen_mode::at_end)) + { +#ifndef _WIN32 + bool r (lseek (fd, 0, SEEK_END) != static_cast (-1)); +#else + bool r (_lseek (fd, 0, SEEK_END) != -1); +#endif + + // Note that in the case of an error we don't delete the newly created + // file as we have no indication if it is a new one. + // + if (!r) + { + int e (errno); + fdclose (fd); // Doesn't throw, but can change errno. + throw_ios_failure (e); + } + } + + return auto_fd (fd); + } + +#ifndef _WIN32 + + auto_fd + fddup (int fd) + { + // dup() doesn't copy FD_CLOEXEC flag, so we need to do it ourselves. Note + // that the new descriptor can leak into child processes before we copy the + // flag. To prevent this we will acquire the process_spawn_mutex (see + // process-details header) prior to duplicating the descriptor. Also note + // there is dup3() (available on Linux and FreeBSD but not on Max OS) that + // takes flags, but it's usage tends to be hairy (need to preopen a dummy + // file descriptor to pass as a second argument). + // + auto dup = [fd] () -> auto_fd + { + auto_fd nfd (::dup (fd)); + if (nfd.get () == -1) + throw_ios_failure (errno); + + return nfd; + }; + + int f (fcntl (fd, F_GETFD)); + if (f == -1) + throw_ios_failure (errno); + + // If the source descriptor has no FD_CLOEXEC flag set then no flag copy is + // required (as the duplicate will have no flag by default). + // + if ((f & FD_CLOEXEC) == 0) + return dup (); + + slock l (process_spawn_mutex); + auto_fd nfd (dup ()); + + f = fcntl (nfd.get (), F_GETFD); + if (f == -1 || fcntl (nfd.get (), F_SETFD, f | FD_CLOEXEC) == -1) + throw_ios_failure (errno); + + return nfd; + } + + bool + fdclose (int fd) noexcept + { + return close (fd) == 0; + } + + auto_fd + fdnull () noexcept + { + int fd (open ("/dev/null", O_RDWR | O_CLOEXEC)); + + if (fd == -1) + throw_ios_failure (errno); + + return auto_fd (fd); + } + + fdstream_mode + fdmode (int fd, fdstream_mode m) + { + int flags (fcntl (fd, F_GETFL)); + + if (flags == -1) + throw_ios_failure (errno); + + if (flag (m, fdstream_mode::blocking) || + flag (m, fdstream_mode::non_blocking)) + { + m &= fdstream_mode::blocking | fdstream_mode::non_blocking; + + // Should be exactly one blocking mode flag specified. + // + if (m != fdstream_mode::blocking && m != fdstream_mode::non_blocking) + throw invalid_argument ("invalid blocking mode"); + + int new_flags ( + m == fdstream_mode::non_blocking + ? flags | O_NONBLOCK + : flags & ~O_NONBLOCK); + + if (fcntl (fd, F_SETFL, new_flags) == -1) + throw_ios_failure (errno); + } + + return fdstream_mode::binary | + ((flags & O_NONBLOCK) == O_NONBLOCK + ? fdstream_mode::non_blocking + : fdstream_mode::blocking); + } + + fdstream_mode + stdin_fdmode (fdstream_mode m) + { + return fdmode (STDIN_FILENO, m); + } + + fdstream_mode + stdout_fdmode (fdstream_mode m) + { + return fdmode (STDOUT_FILENO, m); + } + + fdstream_mode + stderr_fdmode (fdstream_mode m) + { + return fdmode (STDERR_FILENO, m); + } + + fdpipe + fdopen_pipe (fdopen_mode m) + { + assert (m == fdopen_mode::none || m == fdopen_mode::binary); + + // Note that the pipe file descriptors can leak into child processes before + // we set FD_CLOEXEC flag for them. To prevent this we will acquire the + // process_spawn_mutex (see process-details header) prior to creating the + // pipe. Also note there is pipe2() (available on Linux and FreeBSD but not + // on Max OS) that takes flags. + // + slock l (process_spawn_mutex); + + int pd[2]; + if (pipe (pd) == -1) + throw_ios_failure (errno); + + fdpipe r {auto_fd (pd[0]), auto_fd (pd[1])}; + + for (size_t i (0); i < 2; ++i) + { + int f (fcntl (pd[i], F_GETFD)); + if (f == -1 || fcntl (pd[i], F_SETFD, f | FD_CLOEXEC) == -1) + throw_ios_failure (errno); + } + + return r; + } + +#else + + auto_fd + fddup (int fd) + { + // _dup() doesn't copy _O_NOINHERIT flag, so we need to do it ourselves. + // Note that the new descriptor can leak into child processes before we + // copy the flag. To prevent this we will acquire the process_spawn_mutex + // (see process-details header) prior to duplicating the descriptor. + // + // We can not ammend file descriptors directly (nor obtain the flag value), + // so need to resolve them to Windows HANDLE first. Such handles are closed + // either when CloseHandle() is called for them or when _close() is called + // for the associated file descriptors. Make sure that either the original + // file descriptor or the resulting HANDLE is closed but not both of them. + // + auto handle = [] (int fd) -> HANDLE + { + HANDLE h (reinterpret_cast (_get_osfhandle (fd))); + if (h == INVALID_HANDLE_VALUE) + throw_ios_failure (errno); // EBADF (POSIX value). + + return h; + }; + + auto dup = [fd] () -> auto_fd + { + auto_fd nfd (_dup (fd)); + if (nfd.get () == -1) + throw_ios_failure (errno); + + return nfd; + }; + + DWORD f; + if (!GetHandleInformation (handle (fd), &f)) + throw_ios_system_failure (GetLastError ()); + + // If the source handle is inheritable then no flag copy is required (as + // the duplicate handle will be inheritable by default). + // + if (f & HANDLE_FLAG_INHERIT) + return dup (); + + slock l (process_spawn_mutex); + + auto_fd nfd (dup ()); + if (!SetHandleInformation (handle (nfd.get ()), HANDLE_FLAG_INHERIT, 0)) + throw_ios_system_failure (GetLastError ()); + + return nfd; + } + + bool + fdclose (int fd) noexcept + { + return _close (fd) == 0; + } + + auto_fd + fdnull (bool temp) noexcept + { + // No need to translate \r\n before sending it to void. + // + if (!temp) + { + int fd (_sopen ("nul", _O_RDWR | _O_BINARY | _O_NOINHERIT, _SH_DENYNO)); + + if (fd == -1) + throw_ios_failure (errno); + + return auto_fd (fd); + } + + try + { + // We could probably implement a Windows-specific version of getting + // the temporary file that avoid any allocations and exceptions. + // + path p (path::temp_path ("null")); // Can throw. + int fd (_sopen (p.string ().c_str (), + (_O_CREAT | + _O_RDWR | + _O_BINARY | // Don't translate. + _O_TEMPORARY | // Remove on close. + _O_SHORT_LIVED | // Don't flush to disk. + _O_NOINHERIT), // Don't inherit by child process. + _SH_DENYNO, + _S_IREAD | _S_IWRITE)); + + if (fd == -1) + throw_ios_failure (errno); + + return auto_fd (fd); + } + catch (const bad_alloc&) + { + throw_ios_failure (ENOMEM); + } + catch (const system_error& e) + { + // Make sure that the error denotes errno portable code. + // + assert (e.code ().category () == generic_category ()); + + throw_ios_failure (e.code ().value ()); + } + } + + fdstream_mode + fdmode (int fd, fdstream_mode m) + { + m &= fdstream_mode::text | fdstream_mode::binary; + + // Should be exactly one translation flag specified. + // + // It would have been natural not to change translation mode if none of + // text or binary flags are passed. Unfortunatelly there is no (easy) way + // to obtain the current mode for the file descriptor without setting a + // new one. This is why not specifying one of the modes is an error. + // + if (m != fdstream_mode::binary && m != fdstream_mode::text) + throw invalid_argument ("invalid translation mode"); + + // Note that _setmode() preserves the _O_NOINHERIT flag value. + // + int r (_setmode (fd, m == fdstream_mode::binary ? _O_BINARY : _O_TEXT)); + if (r == -1) + throw_ios_failure (errno); // EBADF or EINVAL (POSIX values). + + return fdstream_mode::blocking | + ((r & _O_BINARY) == _O_BINARY + ? fdstream_mode::binary + : fdstream_mode::text); + } + + fdstream_mode + stdin_fdmode (fdstream_mode m) + { + int fd (_fileno (stdin)); + if (fd == -1) + throw_ios_failure (errno); + + return fdmode (fd, m); + } + + fdstream_mode + stdout_fdmode (fdstream_mode m) + { + int fd (_fileno (stdout)); + if (fd == -1) + throw_ios_failure (errno); + + return fdmode (fd, m); + } + + fdstream_mode + stderr_fdmode (fdstream_mode m) + { + int fd (_fileno (stderr)); + if (fd == -1) + throw_ios_failure (errno); + + return fdmode (fd, m); + } + + fdpipe + fdopen_pipe (fdopen_mode m) + { + assert (m == fdopen_mode::none || m == fdopen_mode::binary); + + int pd[2]; + if (_pipe ( + pd, + 64 * 1024, // Set buffer size to 64K. + _O_NOINHERIT | (m == fdopen_mode::none ? _O_TEXT : _O_BINARY)) == -1) + throw_ios_failure (errno); + + return {auto_fd (pd[0]), auto_fd (pd[1])}; + } +#endif +} diff --git a/libbutl/fdstream.hxx b/libbutl/fdstream.hxx new file mode 100644 index 0000000..e1f5790 --- /dev/null +++ b/libbutl/fdstream.hxx @@ -0,0 +1,643 @@ +// file : libbutl/fdstream.hxx -*- C++ -*- +// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#ifndef LIBBUTL_FDSTREAM_HXX +#define LIBBUTL_FDSTREAM_HXX + +#include +#include +#include +#include // move() +#include // uint16_t + +#include + +#include +#include // permissions + +namespace butl +{ + // RAII type for file descriptors. Note that failure to close the descriptor + // is silently ignored by both the destructor and reset(). + // + // The descriptor can be negative. Such a descriptor is treated as unopened + // and is not closed. + // + struct nullfd_t {constexpr explicit nullfd_t (int) {}}; + constexpr const nullfd_t nullfd (-1); + + class LIBBUTL_EXPORT auto_fd + { + public: + auto_fd (nullfd_t = nullfd) noexcept: fd_ (-1) {} + + explicit + auto_fd (int fd) noexcept: fd_ (fd) {} + + auto_fd (auto_fd&& fd) noexcept: fd_ (fd.release ()) {} + auto_fd& operator= (auto_fd&&) noexcept; + + auto_fd (const auto_fd&) = delete; + auto_fd& operator= (const auto_fd&) = delete; + + ~auto_fd () noexcept; + + int + get () const noexcept {return fd_;} + + void + reset (int fd = -1) noexcept; + + int + release () noexcept + { + int r (fd_); + fd_ = -1; + return r; + } + + // Close an open file descriptor. Throw ios::failure on the underlying OS + // error. Reset the descriptor to -1 whether the exception is thrown or + // not. + // + void + close (); + + private: + int fd_; + }; + + // An [io]fstream that can be initialized with a file descriptor in addition + // to a file name and that also by default enables exceptions on badbit and + // failbit. So instead of a dance like this: + // + // ifstream ifs; + // ifs.exceptions (ifstream::badbit | ifstream::failbit); + // ifs.open (path.string ()); + // + // You can simply do: + // + // ifdstream ifs (path); + // + // Notes and limitations: + // + // - char only + // - input or output but not both + // - no support for put back + // - non-blocking file descriptor is supported only by showmanyc() function + // and only on POSIX + // - throws ios::failure in case of open()/read()/write()/close() errors + // - exception mask has at least badbit + // - after catching an exception caused by badbit the stream is no longer + // used + // - not movable, though can be easily supported + // - passing to constructor auto_fd with a negative file descriptor is valid + // and results in the creation of an unopened object + // + class LIBBUTL_EXPORT fdbuf: public std::basic_streambuf + { + public: + fdbuf () = default; + fdbuf (auto_fd&&); + + // Before we invented auto_fd into fdstreams we keept fdbuf opened on + // faulty close attempt. Now fdbuf is always closed by close() function. + // This semantics change seems to be the right one as there is no reason to + // expect fdclose() to succeed after it has already failed once. + // + void + close () {fd_.close ();} + + auto_fd + release (); + + void + open (auto_fd&&); + + bool + is_open () const {return fd_.get () >= 0;} + + int + fd () const {return fd_.get ();} + + public: + using int_type = std::basic_streambuf::int_type; + using traits_type = std::basic_streambuf::traits_type; + + // basic_streambuf input interface. + // + public: + virtual std::streamsize + showmanyc (); + + virtual int_type + underflow (); + + private: + bool + load (); + + // basic_streambuf output interface. + // + public: + virtual int_type + overflow (int_type); + + virtual int + sync (); + + virtual std::streamsize + xsputn (const char_type*, std::streamsize); + + private: + bool + save (); + + private: + auto_fd fd_; + char buf_[8192]; + bool non_blocking_ = false; + }; + + // File stream mode. + // + // The text/binary flags have the same semantics as those in std::fstream. + // Specifically, this is a noop for POSIX systems where the two modes are + // the same. On Windows, when reading in the text mode the sequence of 0xD, + // 0xA characters is translated into the single OxA character and 0x1A is + // interpreted as EOF. When writing in the text mode the OxA character is + // translated into the 0xD, 0xA sequence. + // + // The skip flag instructs the stream to skip to the end before closing the + // file descriptor. This is primarily useful when working with pipes where + // you may want not to "offend" the other end by closing your end before + // reading all the data. + // + // The blocking/non_blocking flags determine whether the IO operation should + // block or return control if currently there is no data to read or no room + // to write. Only the istream::readsome() function supports the semantics of + // non-blocking operations. We also only support this on POSIX (Windows does + // not provide means for the non-blocking reading from a file descriptor so + // these flags are noop there). IO stream operations other than readsome() + // are illegal for non_blocking mode and result in the badbit being set. + // + enum class fdstream_mode: std::uint16_t + { + text = 0x01, + binary = 0x02, + skip = 0x04, + blocking = 0x08, + non_blocking = 0x10 + }; + + inline fdstream_mode operator& (fdstream_mode, fdstream_mode); + inline fdstream_mode operator| (fdstream_mode, fdstream_mode); + inline fdstream_mode operator&= (fdstream_mode&, fdstream_mode); + inline fdstream_mode operator|= (fdstream_mode&, fdstream_mode); + + // Extended (compared to ios::openmode) file open flags. + // + enum class fdopen_mode: std::uint16_t + { + in = 0x01, // Open for reading. + out = 0x02, // Open for writing. + append = 0x04, // Seek to the end of file before each write. + truncate = 0x08, // Discard the file contents on open. + create = 0x10, // Create a file if not exists. + exclusive = 0x20, // Fail if the file exists and the create flag is set. + binary = 0x40, // Set binary translation mode. + at_end = 0x80, // Seek to the end of stream immediately after open. + + none = 0 // Usefull when build the mode incrementally. + }; + + inline fdopen_mode operator& (fdopen_mode, fdopen_mode); + inline fdopen_mode operator| (fdopen_mode, fdopen_mode); + inline fdopen_mode operator&= (fdopen_mode&, fdopen_mode); + inline fdopen_mode operator|= (fdopen_mode&, fdopen_mode); + + class LIBBUTL_EXPORT fdstream_base + { + protected: + fdstream_base () = default; + fdstream_base (auto_fd&& fd): buf_ (std::move (fd)) {} + fdstream_base (auto_fd&&, fdstream_mode); + + public: + int + fd () const {return buf_.fd ();} + + protected: + fdbuf buf_; + }; + + // iofdstream constructors and open() functions that take openmode as an + // argument mimic the corresponding iofstream functions in terms of the + // openmode mask interpretation. They throw std::invalid_argument for an + // invalid combination of flags (as per the standard). Note that the in and + // out flags are always added implicitly for ifdstream and ofdstream, + // respectively. + // + // iofdstream constructors and open() functions that take fdopen_mode as an + // argument interpret the mask literally just ignoring some flags which are + // meaningless in the absense of others (read more on that in the comment + // for fdopen()). Note that the in and out flags are always added implicitly + // for ifdstream and ofdstream, respectively. + // + // iofdstream constructors and open() functions that take file path as a + // const std::string& or const char* may throw the invalid_path exception. + // + // Passing auto_fd with a negative file descriptor is valid and results in + // the creation of an unopened object. + // + // Also note that open() and close() functions can be successfully called + // for an opened and unopened objects respectively. That is in contrast with + // iofstream that sets failbit in such cases. + // + + // Note that ifdstream destructor will close an open file descriptor but + // will ignore any errors. To detect such errors, call close() explicitly. + // + // This is a sample usage of iofdstreams with process. Note that here it is + // expected that the child process reads from STDIN first and writes to + // STDOUT afterwards. + // + // try + // { + // process pr (args, -1, -1); + // + // try + // { + // // In case of exception, skip and close input after output. + // // + // ifdstream is (move (pr.in_ofd), fdstream_mode::skip); + // ofdstream os (move (pr.out_fd)); + // + // // Write. + // + // os.close (); // Don't block the other end. + // + // // Read. + // + // is.close (); // Skip till end and close. + // + // if (pr.wait ()) + // { + // return ...; // Good. + // } + // + // // Non-zero exit, diagnostics presumably issued, fall through. + // } + // catch (const failure&) + // { + // // IO failure, child exit status doesn't matter. Just wait for the + // // process completion and fall through. + // // + // // Note that this is optional if the process_error handler simply + // // falls through since process destructor will wait (but will ignore + // // any errors). + // // + // pr.wait (); + // } + // + // error << .... ; + // + // // Fall through. + // } + // catch (const process_error& e) + // { + // error << ... << e; + // + // if (e.child ()) + // exit (1); + // + // // Fall through. + // } + // + // throw failed (); + // + class LIBBUTL_EXPORT ifdstream: public fdstream_base, public std::istream + { + public: + // Create an unopened object. + // + explicit + ifdstream (iostate e = badbit | failbit); + + explicit + ifdstream (auto_fd&&, iostate e = badbit | failbit); + ifdstream (auto_fd&&, fdstream_mode m, iostate e = badbit | failbit); + + explicit + ifdstream (const char*, + openmode = in, + iostate e = badbit | failbit); + + explicit + ifdstream (const std::string&, + openmode = in, + iostate e = badbit | failbit); + + explicit + ifdstream (const path&, + openmode = in, + iostate e = badbit | failbit); + + ifdstream (const char*, + fdopen_mode, + iostate e = badbit | failbit); + + ifdstream (const std::string&, + fdopen_mode, + iostate e = badbit | failbit); + + ifdstream (const path&, + fdopen_mode, + iostate e = badbit | failbit); + + ~ifdstream () override; + + void + open (const char*, openmode = in); + + void + open (const std::string&, openmode = in); + + void + open (const path&, openmode = in); + + void + open (const char*, fdopen_mode); + + void + open (const std::string&, fdopen_mode); + + void + open (const path&, fdopen_mode); + + void + open (auto_fd&& fd) {buf_.open (std::move (fd)); clear ();} + + void close (); + auto_fd release (); // Note: no skipping. + bool is_open () const {return buf_.is_open ();} + + private: + bool skip_ = false; + }; + + // Note that ofdstream requires that you explicitly call close() before + // destroying it. Or, more specifically, the ofdstream object should not be + // in the opened state by the time its destructor is called, unless it is in + // the "not good" state (good() == false) or the destructor is being called + // during the stack unwinding due to an exception being thrown + // (std::uncaught_exception() == true). This is enforced with assert() in + // the ofdstream destructor. + // + class LIBBUTL_EXPORT ofdstream: public fdstream_base, public std::ostream + { + public: + // Create an unopened object. + // + explicit + ofdstream (iostate e = badbit | failbit); + + explicit + ofdstream (auto_fd&&, iostate e = badbit | failbit); + ofdstream (auto_fd&&, fdstream_mode m, iostate e = badbit | failbit); + + explicit + ofdstream (const char*, + openmode = out, + iostate e = badbit | failbit); + + explicit + ofdstream (const std::string&, + openmode = out, + iostate e = badbit | failbit); + + explicit + ofdstream (const path&, + openmode = out, + iostate e = badbit | failbit); + + ofdstream (const char*, + fdopen_mode, + iostate e = badbit | failbit); + + ofdstream (const std::string&, + fdopen_mode, + iostate e = badbit | failbit); + + ofdstream (const path&, + fdopen_mode, + iostate e = badbit | failbit); + + ~ofdstream () override; + + void + open (const char*, openmode = out); + + void + open (const std::string&, openmode = out); + + void + open (const path&, openmode = out); + + void + open (const char*, fdopen_mode); + + void + open (const std::string&, fdopen_mode); + + void + open (const path&, fdopen_mode); + + void + open (auto_fd&& fd) {buf_.open (std::move (fd)); clear ();} + + void close () {if (is_open ()) flush (); buf_.close ();} + auto_fd release (); + bool is_open () const {return buf_.is_open ();} + }; + + // The std::getline() replacement that provides a workaround for libstdc++'s + // ios::failure ABI fiasco (#66145) by throwing ios::failure, as it is + // defined at libbutl build time (new ABI on recent distributions) rather + // than libstdc++ build time (still old ABI on most distributions). + // + // Notes: + // + // - This relies of ADL so if the stream is used via the std::istream + // interface, then std::getline() will still be used. To put it another + // way, this is "the best we can do" until GCC folks get their act + // together. + // + // - The fail and eof bits may be left cleared in the stream exception mask + // when the function throws because of badbit. + // + LIBBUTL_EXPORT ifdstream& + getline (ifdstream&, std::string&, char delim = '\n'); + + // Open a file returning an auto_fd that holds its file descriptor on + // success and throwing ios::failure otherwise. + // + // The mode argument should have at least one of the in or out flags set. + // The append and truncate flags are meaningless in the absense of the out + // flag and are ignored without it. The exclusive flag is meaningless in the + // absense of the create flag and is ignored without it. Note also that if + // the exclusive flag is specified then a dangling symbolic link is treated + // as an existing file. + // + // The permissions argument is taken into account only if the file is + // created. Note also that permissions can be adjusted while being set in a + // way specific for the OS. On POSIX systems they are modified with the + // process' umask, so effective permissions are permissions & ~umask. On + // Windows permissions other than ru and wu are unlikelly to have effect. + // + // Also note that on POSIX the FD_CLOEXEC flag is set for the file descriptor + // to prevent its leakage into child processes. On Windows, for the same + // purpose, the _O_NOINHERIT flag is set. Note that the process class, that + // passes such a descriptor to the child, makes it inheritable for a while. + // + LIBBUTL_EXPORT auto_fd + fdopen (const char*, + fdopen_mode, + permissions = permissions::ru | permissions::wu | + permissions::rg | permissions::wg | + permissions::ro | permissions::wo); + + LIBBUTL_EXPORT auto_fd + fdopen (const std::string&, + fdopen_mode, + permissions = permissions::ru | permissions::wu | + permissions::rg | permissions::wg | + permissions::ro | permissions::wo); + + LIBBUTL_EXPORT auto_fd + fdopen (const path&, + fdopen_mode, + permissions = permissions::ru | permissions::wu | + permissions::rg | permissions::wg | + permissions::ro | permissions::wo); + + // Duplicate an open file descriptor. Throw ios::failure on the underlying + // OS error. + // + // Note that on POSIX the FD_CLOEXEC flag is set for the new descriptor if it + // is present for the source one. That's in contrast to POSIX dup() that + // doesn't copy file descriptor flags. Also note that duplicating descriptor + // and setting the flag is not an atomic operation generally, but it is in + // regards to child process spawning (to prevent file descriptor leakage into + // a child process). + // + // Note that on Windows the _O_NOINHERIT flag is set for the new descriptor + // if it is present for the source one. That's in contrast to Windows _dup() + // that doesn't copy the flag. Also note that duplicating descriptor and + // setting the flag is not an atomic operation generally, but it is in + // regards to child process spawning (to prevent file descriptor leakage into + // a child process). + // + LIBBUTL_EXPORT auto_fd + fddup (int fd); + + // Set the translation mode for the file descriptor. Throw invalid_argument + // for an invalid combination of flags. Return the previous mode on success, + // throw ios::failure otherwise. + // + // The text and binary flags are mutually exclusive on Windows. Due to + // implementation details at least one of them should be specified. On POSIX + // system the two modes are the same and so no check is performed. + // + // The blocking and non-blocking flags are mutually exclusive on POSIX + // system. Non-blocking mode is not supported on Windows and so the blocking + // mode is assumed regardless of the flags. + // + LIBBUTL_EXPORT fdstream_mode + fdmode (int, fdstream_mode); + + // Convenience functions for setting the translation mode for standard + // streams. + // + LIBBUTL_EXPORT fdstream_mode stdin_fdmode (fdstream_mode); + LIBBUTL_EXPORT fdstream_mode stdout_fdmode (fdstream_mode); + LIBBUTL_EXPORT fdstream_mode stderr_fdmode (fdstream_mode); + + // Low-level, nothrow file descriptor API. + // + + // Close the file descriptor. Return true on success, set errno and return + // false otherwise. + // + LIBBUTL_EXPORT bool + fdclose (int) noexcept; + + // Open the null device (e.g., /dev/null) that discards all data written to + // it and provides no data for read operations (i.e., yelds EOF on read). + // Return an auto_fd that holds its file descriptor on success and throwing + // ios::failure otherwise. + // + // On Windows the null device is NUL and writing anything substantial to it + // (like redirecting a process' output) is extremely slow, as in, an order + // of magnitude slower than writing to disk. If you are using the descriptor + // yourself this can be mitigated by setting the binary mode (already done + // by fdopen()) and using a buffer of around 64K. However, sometimes you + // have no control of how the descriptor will be used. For instance, it can + // be used to redirect a child's stdout and the way the child sets up its + // stdout is out of your control (on Windows). For such cases, there is an + // emulation via a temporary file. Mostly it functions as a proper null + // device with the file automatically removed once the descriptor is + // closed. One difference, however, would be if you were to both write to + // and read from the descriptor. + // + // Note that on POSIX the FD_CLOEXEC flag is set for the file descriptor to + // prevent its leakage into child processes. On Windows, for the same + // purpose, the _O_NOINHERIT flag is set. + // +#ifndef _WIN32 + LIBBUTL_EXPORT auto_fd + fdnull () noexcept; +#else + LIBBUTL_EXPORT auto_fd + fdnull (bool temp = false) noexcept; +#endif + + struct fdpipe + { + auto_fd in; + auto_fd out; + + void + close () + { + in.close (); + out.close (); + } + }; + + // Create a pipe. Throw ios::failure on the underlying OS error. By default + // both ends of the pipe are opened in the text mode. Pass the binary flag + // to instead open them in the binary mode. Passing a mode other than none + // or binary is illegal. + // + // Note that on Windows both ends of the created pipe are not inheritable. + // In particular, the process class that uses fdpipe underneath makes the + // appropriate end (the one being passed to the child) inheritable. + // + // Note that on POSIX the FD_CLOEXEC flag is set for both ends, so they get + // automatically closed by the child process to prevent undesired behaviors + // (such as child deadlock on read from a pipe due to the write-end leakage + // into the child process). Opening a pipe and setting the flag is not an + // atomic operation generally, but it is in regards to child process spawning + // (to prevent file descriptor leakage into child processes spawned from + // other threads). Also note that you don't need to reset the flag for a pipe + // end being passed to the process class ctor. + // + LIBBUTL_EXPORT fdpipe + fdopen_pipe (fdopen_mode = fdopen_mode::none); +} + +#include + +#endif // LIBBUTL_FDSTREAM_HXX diff --git a/libbutl/fdstream.ixx b/libbutl/fdstream.ixx new file mode 100644 index 0000000..a877699 --- /dev/null +++ b/libbutl/fdstream.ixx @@ -0,0 +1,266 @@ +// file : libbutl/fdstream.ixx -*- C++ -*- +// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#include + +namespace butl +{ + // auto_fd + // + inline void auto_fd:: + reset (int fd) noexcept + { + if (fd_ >= 0) + fdclose (fd_); // Don't check for an error as not much we can do here. + + fd_ = fd; + } + + inline auto_fd& auto_fd:: + operator= (auto_fd&& fd) noexcept + { + reset (fd.release ()); + return *this; + } + + inline auto_fd:: + ~auto_fd () noexcept + { + reset (); + } + + // fdbuf + // + inline auto_fd fdbuf:: + release () + { + return std::move (fd_); + } + + // ifdstream + // + inline ifdstream:: + ifdstream (auto_fd&& fd, iostate e) + : fdstream_base (std::move (fd)), std::istream (&buf_) + { + assert (e & badbit); + exceptions (e); + } + + inline ifdstream:: + ifdstream (iostate e) + : ifdstream (auto_fd (), e) // Delegate. + { + } + + inline ifdstream:: + ifdstream (auto_fd&& fd, fdstream_mode m, iostate e) + : fdstream_base (std::move (fd), m), + std::istream (&buf_), + skip_ ((m & fdstream_mode::skip) == fdstream_mode::skip) + { + assert (e & badbit); + exceptions (e); + } + + inline ifdstream:: + ifdstream (const std::string& f, openmode m, iostate e) + : ifdstream (f.c_str (), m, e) // Delegate. + { + } + + inline ifdstream:: + ifdstream (const path& f, openmode m, iostate e) + : ifdstream (f.string (), m, e) // Delegate. + { + } + + inline ifdstream:: + ifdstream (const std::string& f, fdopen_mode m, iostate e) + : ifdstream (f.c_str (), m, e) // Delegate. + { + } + + inline ifdstream:: + ifdstream (const path& f, fdopen_mode m, iostate e) + : ifdstream (f.string (), m, e) // Delegate. + { + } + + inline void ifdstream:: + open (const std::string& f, openmode m) + { + open (f.c_str (), m); + } + + inline void ifdstream:: + open (const path& f, openmode m) + { + open (f.string (), m); + } + + inline void ifdstream:: + open (const std::string& f, fdopen_mode m) + { + open (f.c_str (), m); + } + + inline void ifdstream:: + open (const path& f, fdopen_mode m) + { + open (f.string (), m); + } + + inline auto_fd ifdstream:: + release () + { + return buf_.release (); + } + + // ofdstream + // + inline ofdstream:: + ofdstream (auto_fd&& fd, iostate e) + : fdstream_base (std::move (fd)), std::ostream (&buf_) + { + assert (e & badbit); + exceptions (e); + } + + inline ofdstream:: + ofdstream (iostate e) + : ofdstream (auto_fd (), e) // Delegate. + { + } + + inline ofdstream:: + ofdstream (auto_fd&& fd, fdstream_mode m, iostate e) + : fdstream_base (std::move (fd), m), std::ostream (&buf_) + { + assert (e & badbit); + exceptions (e); + } + + inline ofdstream:: + ofdstream (const std::string& f, openmode m, iostate e) + : ofdstream (f.c_str (), m, e) // Delegate. + { + } + + inline ofdstream:: + ofdstream (const path& f, openmode m, iostate e) + : ofdstream (f.string (), m, e) // Delegate. + { + } + + inline ofdstream:: + ofdstream (const std::string& f, fdopen_mode m, iostate e) + : ofdstream (f.c_str (), m, e) // Delegate. + { + } + + inline ofdstream:: + ofdstream (const path& f, fdopen_mode m, iostate e) + : ofdstream (f.string (), m, e) // Delegate. + { + } + + inline void ofdstream:: + open (const std::string& f, openmode m) + { + open (f.c_str (), m); + } + + inline void ofdstream:: + open (const path& f, openmode m) + { + open (f.string (), m); + } + + inline void ofdstream:: + open (const std::string& f, fdopen_mode m) + { + open (f.c_str (), m); + } + + inline void ofdstream:: + open (const path& f, fdopen_mode m) + { + open (f.string (), m); + } + + inline auto_fd ofdstream:: + release () + { + if (is_open ()) + flush (); + + return buf_.release (); + } + + // fdopen() + // + inline auto_fd + fdopen (const std::string& f, fdopen_mode m, permissions p) + { + return fdopen (f.c_str (), m, p); + } + + inline auto_fd + fdopen (const path& f, fdopen_mode m, permissions p) + { + return fdopen (f.string (), m, p); + } + + // fdstream_mode + // + inline fdstream_mode + operator& (fdstream_mode x, fdstream_mode y) + { + return x &= y; + } + + inline fdstream_mode + operator| (fdstream_mode x, fdstream_mode y) + { + return x |= y; + } + + inline fdstream_mode + operator&= (fdstream_mode& x, fdstream_mode y) + { + return x = static_cast ( + static_cast (x) & + static_cast (y)); + } + + inline fdstream_mode + operator|= (fdstream_mode& x, fdstream_mode y) + { + return x = static_cast ( + static_cast (x) | + static_cast (y)); + } + + // fdopen_mode + // + inline fdopen_mode operator& (fdopen_mode x, fdopen_mode y) {return x &= y;} + inline fdopen_mode operator| (fdopen_mode x, fdopen_mode y) {return x |= y;} + + inline fdopen_mode + operator&= (fdopen_mode& x, fdopen_mode y) + { + return x = static_cast ( + static_cast (x) & + static_cast (y)); + } + + inline fdopen_mode + operator|= (fdopen_mode& x, fdopen_mode y) + { + return x = static_cast ( + static_cast (x) | + static_cast (y)); + } +} diff --git a/libbutl/filesystem.cxx b/libbutl/filesystem.cxx new file mode 100644 index 0000000..6a64073 --- /dev/null +++ b/libbutl/filesystem.cxx @@ -0,0 +1,1396 @@ +// file : libbutl/filesystem.cxx -*- C++ -*- +// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#include + +#ifndef _WIN32 +# include // rename() +# include // struct dirent, *dir() +# include // symlink(), link(), stat(), rmdir(), unlink() +# include // utimes() +# include // stat +# include // stat(), lstat(), S_I*, mkdir(), chmod() +#else +# include + +# include // _find*(), _unlink(), _chmod() +# include // _mkdir(), _rmdir() +# include // _stat +# include // _stat(), S_I* + +# ifdef _MSC_VER // Unlikely to be fixed in newer versions. +# define S_ISREG(m) (((m) & S_IFMT) == S_IFREG) +# define S_ISDIR(m) (((m) & S_IFMT) == S_IFDIR) +# endif + +# include // lcase() +#endif + +#include // errno, E* + +#include +#include +#include // unique_ptr +#include // pair +#include // reverse_iterator +#include + +#include +#include // throw_generic_error() +#include +#include + +using namespace std; + +namespace butl +{ + bool + file_exists (const char* p, bool fl) + { + auto pe (path_entry (p, fl)); + return pe.first && (pe.second == entry_type::regular || + (!fl && pe.second == entry_type::symlink)); + } + + bool + entry_exists (const char* p, bool fl) + { + return path_entry (p, fl).first; + } + + bool + dir_exists (const char* p) + { + auto pe (path_entry (p, true)); + return pe.first && pe.second == entry_type::directory; + } + +#ifndef _WIN32 + pair + path_entry (const char* p, bool fl) + { + struct stat s; + if ((fl ? stat (p, &s) : lstat (p, &s)) != 0) + { + if (errno == ENOENT || errno == ENOTDIR) + return make_pair (false, entry_type::unknown); + else + throw_generic_error (errno); + } + + auto m (s.st_mode); + entry_type t (entry_type::unknown); + + if (S_ISREG (m)) + t = entry_type::regular; + else if (S_ISDIR (m)) + t = entry_type::directory; + else if (S_ISLNK (m)) + t = entry_type::symlink; + else if (S_ISBLK (m) || S_ISCHR (m) || S_ISFIFO (m) || S_ISSOCK (m)) + t = entry_type::other; + + return make_pair (true, t); + } +#else + pair + path_entry (const char* p, bool) + { + // A path like 'C:', while being a root path in our terminology, is not as + // such for Windows, that maintains current directory for each drive, and + // so C: means the current directory on the drive C. This is not what we + // mean here, so need to append the trailing directory separator in such a + // case. + // + string d; + if (path::traits::root (p, string::traits_type::length (p))) + { + d = p; + d += path::traits::directory_separator; + p = d.c_str (); + } + + DWORD attr (GetFileAttributesA (p)); + if (attr == INVALID_FILE_ATTRIBUTES) // Presumably not exists. + return make_pair (false, entry_type::unknown); + + entry_type t (entry_type::unknown); + + // S_ISLNK/S_IFDIR are not defined for Win32 but it does have symlinks. + // We will consider symlink entry to be of the unknown type. Note that + // S_ISREG() and S_ISDIR() return as they would do for a symlink target. + // + if ((attr & FILE_ATTRIBUTE_REPARSE_POINT) == 0) + { + struct _stat s; + + if (_stat (p, &s) != 0) + { + if (errno == ENOENT || errno == ENOTDIR) + return make_pair (false, entry_type::unknown); + else + throw_generic_error (errno); + } + + auto m (s.st_mode); + + if (S_ISREG (m)) + t = entry_type::regular; + else if (S_ISDIR (m)) + t = entry_type::directory; + // + //else if (S_ISLNK (m)) + // t = entry_type::symlink; + } + + return make_pair (true, t); + } +#endif + + mkdir_status +#ifndef _WIN32 + try_mkdir (const dir_path& p, mode_t m) + { + if (mkdir (p.string ().c_str (), m) != 0) +#else + try_mkdir (const dir_path& p, mode_t) + { + if (_mkdir (p.string ().c_str ()) != 0) +#endif + { + int e (errno); + + // EEXIST means the path already exists but not necessarily as + // a directory. + // + if (e == EEXIST && dir_exists (p)) + return mkdir_status::already_exists; + else + throw_generic_error (e); + } + + return mkdir_status::success; + } + + mkdir_status + try_mkdir_p (const dir_path& p, mode_t m) + { + if (!p.root ()) + { + dir_path d (p.directory ()); + + if (!d.empty () && !dir_exists (d)) + try_mkdir_p (d, m); + } + + return try_mkdir (p, m); + } + + rmdir_status + try_rmdir (const dir_path& p, bool ignore_error) + { + rmdir_status r (rmdir_status::success); + +#ifndef _WIN32 + if (rmdir (p.string ().c_str ()) != 0) +#else + if (_rmdir (p.string ().c_str ()) != 0) +#endif + { + if (errno == ENOENT) + r = rmdir_status::not_exist; + else if (errno == ENOTEMPTY || errno == EEXIST) + r = rmdir_status::not_empty; + else if (!ignore_error) + throw_generic_error (errno); + } + + return r; + } + + void + rmdir_r (const dir_path& p, bool dir, bool ignore_error) + { + // An nftw()-based implementation (for platforms that support it) + // might be a faster way. + // + for (const dir_entry& de: dir_iterator (p)) + { + path ep (p / de.path ()); //@@ Would be good to reuse the buffer. + + if (de.ltype () == entry_type::directory) + rmdir_r (path_cast (move (ep)), true, ignore_error); + else + try_rmfile (ep, ignore_error); + } + + if (dir) + { + rmdir_status r (try_rmdir (p)); + + if (r != rmdir_status::success && !ignore_error) + throw_generic_error (r == rmdir_status::not_empty + ? ENOTEMPTY + : ENOENT); + } + } + + rmfile_status + try_rmfile (const path& p, bool ignore_error) + { + rmfile_status r (rmfile_status::success); + +#ifndef _WIN32 + if (unlink (p.string ().c_str ()) != 0) +#else + if (_unlink (p.string ().c_str ()) != 0) +#endif + { + // Strangely on Linux unlink() removes a dangling symlink but returns + // ENOENT. + // + if (errno == ENOENT || errno == ENOTDIR) + r = rmfile_status::not_exist; + else if (!ignore_error) + throw_generic_error (errno); + } + + return r; + } + +#ifndef _WIN32 + void + mksymlink (const path& target, const path& link, bool) + { + if (symlink (target.string ().c_str (), link.string ().c_str ()) == -1) + throw_generic_error (errno); + } + + void + mkhardlink (const path& target, const path& link, bool) + { + if (::link (target.string ().c_str (), link.string ().c_str ()) == -1) + throw_generic_error (errno); + } + +#else + + void + mksymlink (const path&, const path&, bool) + { + throw_generic_error (ENOSYS, "symlinks not supported"); + } + + void + mkhardlink (const path& target, const path& link, bool dir) + { + if (!dir) + { + if (!CreateHardLinkA (link.string ().c_str (), + target.string ().c_str (), + nullptr)) + throw_system_error (GetLastError ()); + } + else + throw_generic_error (ENOSYS, "directory hard links not supported"); + } +#endif + + // For I/O operations cpfile() can throw ios_base::failure exception that is + // not derived from system_error for old versions of g++ (as of 4.9). From + // the other hand cpfile() must throw system_error only. Let's catch + // ios_base::failure and rethrow as system_error in such a case. + // + template + static inline typename enable_if::type + cpfile (const path& from, const path& to, + cpflags fl, + permissions perm, + auto_rmfile& rm) + { + ifdstream ifs (from, fdopen_mode::binary); + + fdopen_mode om (fdopen_mode::out | + fdopen_mode::truncate | + fdopen_mode::create | + fdopen_mode::binary); + + if ((fl & cpflags::overwrite_content) != cpflags::overwrite_content) + om |= fdopen_mode::exclusive; + + ofdstream ofs (fdopen (to, om, perm)); + + rm = auto_rmfile (to); + + // Throws ios::failure on fdbuf read/write failures. + // + // Note that the eof check is important: if the stream is at eof (empty + // file) then this write will fail. + // + if (ifs.peek () != ifdstream::traits_type::eof ()) + ofs << ifs.rdbuf (); + + ifs.close (); // Throws ios::failure on failure. + ofs.close (); // Throws ios::failure on flush/close failure. + } + + template + static inline typename enable_if::type + cpfile (const path& from, const path& to, + cpflags fl, + permissions perm, + auto_rmfile& rm) + { + try + { + cpfile (from, to, fl, perm, rm); + } + catch (const ios_base::failure& e) + { + // While we try to preserve the original error information, we can not + // make the description to be exactly the same, for example + // + // Is a directory + // + // becomes + // + // Is a directory: Input/output error + // + // Note that our custom operator<<(ostream, exception) doesn't strip this + // suffix. This is a temporary code after all. + // + throw_generic_error (EIO, e.what ()); + } + } + + void + cpfile (const path& from, const path& to, cpflags fl) + { + permissions perm (path_permissions (from)); + auto_rmfile rm; + + cpfile::value> ( + from, to, fl, perm, rm); + + if ((fl & cpflags::overwrite_permissions) == + cpflags::overwrite_permissions) + path_permissions (to, perm); + + rm.cancel (); + } + + // Figuring out whether we have the nanoseconds in struct stat. Some + // platforms (e.g., FreeBSD), may provide some "compatibility" #define's, + // so use the second argument to not end up with the same signatures. + // + template + inline constexpr auto + mnsec (const S* s, bool) -> decltype(s->st_mtim.tv_nsec) + { + return s->st_mtim.tv_nsec; // POSIX (GNU/Linux, Solaris). + } + + template + inline constexpr auto + mnsec (const S* s, int) -> decltype(s->st_mtimespec.tv_nsec) + { + return s->st_mtimespec.tv_nsec; // *BSD, MacOS. + } + + template + inline constexpr auto + mnsec (const S* s, float) -> decltype(s->st_mtime_n) + { + return s->st_mtime_n; // AIX 5.2 and later. + } + + // Things are not going to end up well with only seconds resolution so + // let's make it a compile error. + // + // template + // inline constexpr int + // mnsec (...) {return 0;} + + template + inline constexpr auto + ansec (const S* s, bool) -> decltype(s->st_atim.tv_nsec) + { + return s->st_atim.tv_nsec; // POSIX (GNU/Linux, Solaris). + } + + template + inline constexpr auto + ansec (const S* s, int) -> decltype(s->st_atimespec.tv_nsec) + { + return s->st_atimespec.tv_nsec; // *BSD, MacOS. + } + + template + inline constexpr auto + ansec (const S* s, float) -> decltype(s->st_atime_n) + { + return s->st_atime_n; // AIX 5.2 and later. + } + + // template + // inline constexpr int + // ansec (...) {return 0;} + + void + mventry (const path& from, const path& to, cpflags fl) + { + assert ((fl & cpflags::overwrite_permissions) == + cpflags::overwrite_permissions); + + bool ovr ((fl & cpflags::overwrite_content) == cpflags::overwrite_content); + + const char* f (from.string ().c_str ()); + const char* t (to.string ().c_str ()); + +#ifndef _WIN32 + + if (!ovr && path_entry (to).first) + throw_generic_error (EEXIST); + + if (::rename (f, t) == 0) // POSIX implementation. + return; + + // If source and destination paths are on different file systems we need to + // move the file ourselves. + // + if (errno != EXDEV) + throw_generic_error (errno); + + // Note that cpfile() follows symlinks, so we need to remove destination if + // exists. + // + try_rmfile (to); + + // Note that permissions are copied unconditionally to a new file. + // + cpfile (from, to, cpflags::none); + + // Copy file access and modification times. + // + struct stat s; + if (stat (f, &s) != 0) + throw_generic_error (errno); + + timeval times[2]; + times[0].tv_sec = s.st_atime; + times[0].tv_usec = ansec (&s, true) / 1000; + times[1].tv_sec = s.st_mtime; + times[1].tv_usec = mnsec (&s, true) / 1000; + + if (utimes (t, times) != 0) + throw_generic_error (errno); + + // Finally, remove the source file. + // + try_rmfile (from); + +#else + + // While ::rename() is present on Windows, it is not POSIX but ISO C + // implementation, that doesn't fit our needs well. + // + auto te (path_entry (to)); + + if (!ovr && te.first) + throw_generic_error (EEXIST); + + bool td (te.first && te.second == entry_type::directory); + + auto fe (path_entry (from)); + bool fd (fe.first && fe.second == entry_type::directory); + + // If source and destination filesystem entries exist, they both must be + // either directories or not directories. + // + if (fe.first && te.first && fd != td) + throw_generic_error (ENOTDIR); + + DWORD mfl (fd ? 0 : (MOVEFILE_COPY_ALLOWED | MOVEFILE_REPLACE_EXISTING)); + + if (MoveFileExA (f, t, mfl)) + return; + + // If the destination already exists, then MoveFileExA() succeeds only if + // it is a regular file or a symlink. Lets also support an empty directory + // special case to comply with POSIX. If the destination is an empty + // directory we will just remove it and retry the move operation. + // + // Note that under Wine we endup with ERROR_ACCESS_DENIED error code in + // that case, and with ERROR_ALREADY_EXISTS when run natively. + // + DWORD ec (GetLastError ()); + if ((ec == ERROR_ALREADY_EXISTS || ec == ERROR_ACCESS_DENIED) && td && + try_rmdir (path_cast (to)) != rmdir_status::not_empty && + MoveFileExA (f, t, mfl)) + return; + + throw_system_error (ec); + +#endif + } + + timestamp + file_mtime (const char* p) + { +#ifndef _WIN32 + struct stat s; + if (stat (p, &s) != 0) + { + if (errno == ENOENT || errno == ENOTDIR) + return timestamp_nonexistent; + else + throw_generic_error (errno); + } + + if (!S_ISREG (s.st_mode)) + return timestamp_nonexistent; + + return system_clock::from_time_t (s.st_mtime) + + chrono::duration_cast ( + chrono::nanoseconds (mnsec (&s, true))); +#else + + WIN32_FILE_ATTRIBUTE_DATA s; + + if (!GetFileAttributesExA (p, GetFileExInfoStandard, &s)) + { + DWORD ec (GetLastError ()); + + if (ec == ERROR_FILE_NOT_FOUND || + ec == ERROR_PATH_NOT_FOUND || + ec == ERROR_INVALID_NAME || + ec == ERROR_INVALID_DRIVE || + ec == ERROR_BAD_PATHNAME || + ec == ERROR_BAD_NETPATH) + return timestamp_nonexistent; + + throw_system_error (ec); + } + + if ((s.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) != 0) + return timestamp_nonexistent; + + // Time in FILETIME is in 100 nanosecond "ticks" since "Windows epoch" + // (1601-01-01T00:00:00Z). To convert it to "UNIX epoch" + // (1970-01-01T00:00:00Z) we need to subtract 11644473600 seconds. + // + const FILETIME& t (s.ftLastWriteTime); + + uint64_t ns ((static_cast (t.dwHighDateTime) << 32) | + t.dwLowDateTime); + + ns -= 11644473600ULL * 10000000; // Now in UNIX epoch. + ns *= 100; // Now in nanoseconds. + + return timestamp ( + chrono::duration_cast ( + chrono::nanoseconds (ns))); +#endif + } + + permissions + path_permissions (const path& p) + { +#ifndef _WIN32 + struct stat s; + if (stat (p.string ().c_str (), &s) != 0) +#else + struct _stat s; + if (_stat (p.string ().c_str (), &s) != 0) +#endif + throw_generic_error (errno); + + // VC++ has no S_IRWXU defined. MINGW GCC <= 4.9 has no S_IRWXG, S_IRWXO + // defined. + // + // We could extrapolate user permissions to group/other permissions if + // S_IRWXG/S_IRWXO are undefined. That is, we could consider their absence + // as meaning that the platform does not distinguish between permissions + // for different kinds of users. Let's wait for a use-case first. + // + mode_t f (S_IREAD | S_IWRITE | S_IEXEC); + +#ifdef S_IRWXG + f |= S_IRWXG; +#endif + +#ifdef S_IRWXO + f |= S_IRWXO; +#endif + + return static_cast (s.st_mode & f); + } + + void + path_permissions (const path& p, permissions f) + { + mode_t m (S_IREAD | S_IWRITE | S_IEXEC); + +#ifdef S_IRWXG + m |= S_IRWXG; +#endif + +#ifdef S_IRWXO + m |= S_IRWXO; +#endif + + m &= static_cast (f); + +#ifndef _WIN32 + if (chmod (p.string ().c_str (), m) == -1) +#else + if (_chmod (p.string ().c_str (), m) == -1) +#endif + throw_generic_error (errno); + } + + // dir_{entry,iterator} + // +#ifndef _WIN32 + + // dir_entry + // + dir_iterator:: + ~dir_iterator () + { + if (h_ != nullptr) + closedir (h_); // Ignore any errors. + } + + dir_iterator& dir_iterator:: + operator= (dir_iterator&& x) + { + if (this != &x) + { + e_ = move (x.e_); + + if (h_ != nullptr && closedir (h_) == -1) + throw_generic_error (errno); + + h_ = x.h_; + x.h_ = nullptr; + } + return *this; + } + + entry_type dir_entry:: + type (bool link) const + { + path_type p (b_ / p_); + struct stat s; + if ((link + ? stat (p.string ().c_str (), &s) + : lstat (p.string ().c_str (), &s)) != 0) + { + throw_generic_error (errno); + } + + entry_type r; + + if (S_ISREG (s.st_mode)) + r = entry_type::regular; + else if (S_ISDIR (s.st_mode)) + r = entry_type::directory; + else if (S_ISLNK (s.st_mode)) + r = entry_type::symlink; + else + r = entry_type::other; + + return r; + } + + // dir_iterator + // + struct dir_deleter + { + void operator() (DIR* p) const {if (p != nullptr) closedir (p);} + }; + + dir_iterator:: + dir_iterator (const dir_path& d) + { + unique_ptr h (opendir (d.string ().c_str ())); + h_ = h.get (); + + if (h_ == nullptr) + throw_generic_error (errno); + + next (); + + if (h_ != nullptr) + e_.b_ = d; + + h.release (); + } + + template + inline /*constexpr*/ entry_type d_type (const D* d, decltype(d->d_type)*) + { + switch (d->d_type) + { +#ifdef DT_DIR + case DT_DIR: return entry_type::directory; +#endif +#ifdef DT_REG + case DT_REG: return entry_type::regular; +#endif +#ifdef DT_LNK + case DT_LNK: return entry_type::symlink; +#endif +#ifdef DT_BLK + case DT_BLK: +#endif +#ifdef DT_CHR + case DT_CHR: +#endif +#ifdef DT_FIFO + case DT_FIFO: +#endif +#ifdef DT_SOCK + case DT_SOCK: +#endif + return entry_type::other; + + default: return entry_type::unknown; + } + } + + template + inline constexpr entry_type d_type (...) {return entry_type::unknown;} + + void dir_iterator:: + next () + { + for (;;) + { + errno = 0; + if (struct dirent* de = readdir (h_)) + { + // We can accept some overhead for '.' and '..' (relying on short + // string optimization) in favor of a more compact code. + // + path p (de->d_name); + + // Skip '.' and '..'. + // + if (p.current () || p.parent ()) + continue; + + e_.p_ = move (p); + e_.t_ = d_type (de, nullptr); + e_.lt_ = entry_type::unknown; + } + else if (errno == 0) + { + // End of stream. + // + closedir (h_); + h_ = nullptr; + } + else + throw_generic_error (errno); + + break; + } + } + +#else + + // dir_entry + // + dir_iterator:: + ~dir_iterator () + { + if (h_ != -1) + _findclose (h_); // Ignore any errors. + } + + dir_iterator& dir_iterator:: + operator= (dir_iterator&& x) + { + if (this != &x) + { + e_ = move (x.e_); + + if (h_ != -1 && _findclose (h_) == -1) + throw_generic_error (errno); + + h_ = x.h_; + x.h_ = -1; + } + return *this; + } + + entry_type dir_entry:: + type (bool) const + { + // Note that we currently do not support symlinks (yes, there is symlink + // support since Vista). + // + path_type p (b_ / p_); + + struct _stat s; + if (_stat (p.string ().c_str (), &s) != 0) + throw_generic_error (errno); + + entry_type r; + if (S_ISREG (s.st_mode)) + r = entry_type::regular; + else if (S_ISDIR (s.st_mode)) + r = entry_type::directory; + else + r = entry_type::other; + + return r; + } + + // dir_iterator + // + struct auto_dir + { + explicit + auto_dir (intptr_t& h): h_ (&h) {} + + auto_dir (const auto_dir&) = delete; + auto_dir& operator= (const auto_dir&) = delete; + + ~auto_dir () + { + if (h_ != nullptr && *h_ != -1) + _findclose (*h_); + } + + void release () {h_ = nullptr;} + + private: + intptr_t* h_; + }; + + dir_iterator:: + dir_iterator (const dir_path& d) + { + auto_dir h (h_); + e_.b_ = d; // Used by next() to call _findfirst(). + + next (); + h.release (); + } + + void dir_iterator:: + next () + { + for (;;) + { + bool r; + _finddata_t fi; + + if (h_ == -1) + { + // The call is made from the constructor. Any other call with h_ == -1 + // is illegal. + // + + // Check to distinguish non-existent vs empty directories. + // + if (!dir_exists (e_.b_)) + throw_generic_error (ENOENT); + + h_ = _findfirst ((e_.b_ / path ("*")).string ().c_str (), &fi); + r = h_ != -1; + } + else + r = _findnext (h_, &fi) == 0; + + if (r) + { + // We can accept some overhead for '.' and '..' (relying on short + // string optimization) in favor of a more compact code. + // + path p (fi.name); + + // Skip '.' and '..'. + // + if (p.current () || p.parent ()) + continue; + + e_.p_ = move (p); + + // We do not support symlinks at the moment. + // + e_.t_ = fi.attrib & _A_SUBDIR + ? entry_type::directory + : entry_type::regular; + + e_.lt_ = entry_type::unknown; + } + else if (errno == ENOENT) + { + // End of stream. + // + if (h_ != -1) + { + _findclose (h_); + h_ = -1; + } + } + else + throw_generic_error (errno); + + break; + } + } +#endif + + // Match the name [ni, ne) to the pattern [pi, pe). Ranges can be empty. + // + static bool + match (string::const_iterator pi, string::const_iterator pe, + string::const_iterator ni, string::const_iterator ne) + { + using reverse_iterator = std::reverse_iterator; + + reverse_iterator rpi (pe); + reverse_iterator rpe (pi); + + reverse_iterator rni (ne); + reverse_iterator rne (ni); + + // Match the pattern suffix (follows the last *) to the name trailing + // characters. + // + char pc; + for (; rpi != rpe && (pc = *rpi) != '*' && rni != rne; ++rpi, ++rni) + { +#ifndef _WIN32 + if (*rni != pc && pc != '?') +#else + if (lcase (*rni) != lcase (pc) && pc != '?') +#endif + return false; + } + + // If we got to the (reversed) end of the pattern (no * is encountered) + // than we are done. The success depends on if we got to the (reversed) end + // of the name as well. + // + if (rpi == rpe) + return rni == rne; + + // If we didn't reach * in the pattern then we reached the (reversed) end + // of the name. That means we have unmatched non-star characters in the + // pattern, and so match failed. + // + if (pc != '*') + { + assert (rni == rne); + return false; + } + + // Match the pattern prefix (ends with the first *) to the name leading + // characters. If they mismatch we failed. Otherwise if this is an only * + // in the pattern (matches whatever is left in the name) then we succeed, + // otherwise we perform backtracking (recursively). + // + pe = rpi.base (); + ne = rni.base (); + + // Compare the pattern and the name char by char until the name suffix or + // * is encountered in the pattern (whichever happens first). Fail if a + // char mismatches. + // + for (; (pc = *pi) != '*' && ni != ne; ++pi, ++ni) + { +#ifndef _WIN32 + if (*ni != pc && pc != '?') +#else + if (lcase (*ni) != lcase (pc) && pc != '?') +#endif + return false; + } + + // If we didn't get to * in the pattern then we got to the name suffix. + // That means that the pattern has unmatched non-star characters, and so + // match failed. + // + if (pc != '*') + { + assert (ni == ne); + return false; + } + + // If * that we have reached is the last one, then it matches whatever is + // left in the name (including an empty range). + // + if (++pi == pe) + return true; + + // Perform backtracking. + // + // From now on, we will call the pattern not-yet-matched part (starting + // the leftmost * and ending the rightmost one inclusively) as pattern, and + // the name not-yet-matched part as name. + // + // Here we sequentially assume that * that starts the pattern matches the + // name leading part (staring from an empty one and iterating till the full + // name). So if, at some iteration, the pattern trailing part (that follows + // the leftmost *) matches the name trailing part, then the pattern matches + // the name. + // + bool r; + for (; !(r = match (pi, pe, ni, ne)) && ni != ne; ++ni) ; + return r; + } + + bool + path_match (const string& pattern, const string& name) + { + // Implementation notes: + // + // - This has a good potential of becoming hairy quickly so need to strive + // for an elegant way to implement this. + // + // - Most patterns will contains a single * wildcard with a prefix and/or + // suffix (e.g., *.txt, foo*, f*.txt). Something like this is not very + // common: *foo*. + // + // So it would be nice to have a clever implementation that first + // "anchors" itself with a literal prefix and/or suffix and only then + // continue with backtracking. In other words, reduce: + // + // *.txt vs foo.txt -> * vs foo + // foo* vs foo.txt -> * vs .txt + // f*.txt vs foo.txt -> * vs oo + // + + auto pi (pattern.rbegin ()); + auto pe (pattern.rend ()); + + auto ni (name.rbegin ()); + auto ne (name.rend ()); + + // The name doesn't match the pattern if it is of a different type than the + // pattern is. + // + bool pd (pi != pe && path::traits::is_separator (*pi)); + bool nd (ni != ne && path::traits::is_separator (*ni)); + + if (pd != nd) + return false; + + // Skip trailing separators if present. + // + if (pd) + { + ++pi; + ++ni; + } + + return match (pattern.begin (), pi.base (), name.begin (), ni.base ()); + } + + // Iterate over directory sub-entries, recursively and including itself if + // requested. Note that recursive iterating goes depth-first which make + // sense for the cleanup use cases (@@ maybe this should be controllable + // since for directory creation it won't make sense). + // + // Prior to recursively opening a directory for iterating the preopen + // callback function is called. If false is returned, then the directory is + // not traversed but still returned by the next() call. + // + // Note that iterating over non-existent directory is not en error. The + // subsequent next() call returns false for such a directory. + // + using preopen = std::function; + + class recursive_dir_iterator + { + public: + recursive_dir_iterator (dir_path p, + bool recursive, + bool self, + bool fs, + preopen po) + : start_ (move (p)), + recursive_ (recursive), + self_ (self), + follow_symlinks_ (fs), + preopen_ (move (po)) + { + open (dir_path (), self_); + } + + // Non-copyable, non-movable type. + // + recursive_dir_iterator (const recursive_dir_iterator&) = delete; + recursive_dir_iterator& operator= (const recursive_dir_iterator&) = delete; + + // Return false if no more entries left. Otherwise save the next entry path + // and return true. The path is relative against the directory being + // traversed and contains a trailing separator for sub-directories. Throw + // std::system_error in case of a failure (insufficient permissions, + // dangling symlink encountered, etc). + // + bool + next (path& p) + { + if (iters_.empty ()) + return false; + + auto& i (iters_.back ()); + + // If we got to the end of directory sub-entries, then go one level up + // and return this directory path. + // + if (i.first == dir_iterator ()) + { + path d (move (i.second)); + iters_.pop_back (); + + // Return the path unless it is the last one (the directory we started + // to iterate from) and the self flag is not set. + // + if (iters_.empty () && !self_) + return false; + + p = move (d); + return true; + } + + const dir_entry& de (*i.first); + + // Append separator if a directory. Note that dir_entry::type() can + // throw. + // + entry_type et (follow_symlinks_ ? de.type () : de.ltype ()); + path pe (et == entry_type::directory + ? path_cast (i.second / de.path ()) + : i.second / de.path ()); + + ++i.first; + + if (recursive_ && pe.to_directory ()) + { + open (path_cast (move (pe)), true); + return next (p); + } + + p = move (pe); + return true; + } + + private: + void + open (dir_path p, bool preopen) + { + // We should consider a racing condition here. The directory can be + // removed before we create an iterator for it. In this case we just do + // nothing, so the directory is silently skipped. + // + try + { + // If preopen_() returns false, then the directory will not be + // traversed (as we leave iterator with end semantics) but still be + // returned by the next() call as a sub-entry. + // + dir_iterator i; + if (!preopen || preopen_ (p)) + { + dir_path d (start_ / p); + i = dir_iterator (!d.empty () ? d : dir_path (".")); + } + + iters_.emplace_back (move (i), move (p)); + } + catch (const system_error& e) + { + // Ignore non-existent directory (ENOENT or ENOTDIR). Rethrow for any + // other error. We consider ENOTDIR as a variety of removal, with a + // new filesystem entry being created afterwards. + // + // Make sure that the error denotes errno portable code. + // + assert (e.code ().category () == generic_category ()); + + int ec (e.code ().value ()); + if (ec != ENOENT && ec != ENOTDIR) + throw; + } + } + + private: + dir_path start_; + bool recursive_; + bool self_; + bool follow_symlinks_; + preopen preopen_; + small_vector, 1> iters_; + }; + + // Search for paths matching the pattern and call the specified function for + // each matching path. Return false if the underlying func() call returns + // false. Otherwise the function conforms to the path_search() description. + // + static const string any_dir ("*/"); + + static bool + search ( + path pattern, + dir_path pattern_dir, + const dir_path start_dir, + bool follow_symlinks, + const function& func) + { + // Fast-forward the leftmost pattern non-wildcard components. So, for + // example, search for foo/f* in /bar/ becomes search for f* in /bar/foo/. + // + { + auto b (pattern.begin ()); + auto e (pattern.end ()); + auto i (b); + for (; i != e && (*i).find_first_of ("*?") == string::npos; ++i) ; + + // If the pattern has no wildcards then we reduce to checking for the + // filesystem entry existence. It matches if exists and is of the proper + // type. + // + if (i == e) + { + path p (pattern_dir / pattern); + auto pe (path_entry (start_dir / p, true)); + + if (pe.first && + ((pe.second == entry_type::directory) == p.to_directory ())) + return func (move (p), string (), false); + + return true; + } + else if (i != b) // There are non-wildcard components, so fast-forward. + { + path p (b, i); + pattern = pattern.leaf (p); + pattern_dir /= path_cast (move (p)); + } + } + + assert (!pattern.empty ()); + + // The pattern leftmost component. Will use it to match the start directory + // sub-entries. + // + path pc (pattern.begin (), ++pattern.begin ()); + string pcr (pc.representation ()); + + // Note that if the pattern has multiple components (is not a simple path), + // then the leftmost one has a trailing separator, and so will match + // sub-directories only. + // + bool simple (pattern.simple ()); + + // Note that we rely on "small function object" optimization here. + // + recursive_dir_iterator i ( + start_dir / pattern_dir, + pcr.find ("**") != string::npos, // Recursive. + pcr.find ("***") != string::npos, // Self-inclusive. + follow_symlinks, + [&pattern_dir, &func] (const dir_path& p) -> bool // Preopen. + { + return func (pattern_dir / p, any_dir, true); + }); + + // Canonicalize the pattern component collapsing consecutive stars (used to + // express that it is recursive) into a single one. + // + size_t j (0); + size_t n (pcr.size ()); + for (size_t i (0); i < n; ++i) + { + char c (pcr[i]); + if (!(c == '*' && i > 0 && pcr[i - 1] == '*')) + pcr[j++] = c; + } + + if (j != n) + pcr.resize (j); + + // Note that the callback function can be called for the same directory + // twice: first time as intermediate match from iterator's preopen() call, + // and then, if the first call succeed, from the iterating loop (possibly + // as the final match). + // + path p; + while (i.next (p)) + { + // Skip sub-entry if its name doesn't match the pattern leftmost + // component. + // + // Matching the directory we are iterating through (as for a pattern + // component containing ***) is a bit tricky. This directory is + // represented by the iterator as an empty path, and so we need to + // compute it (the leaf would actually be enough) for matching. This + // leaf can be aquired from the pattern_dir / start_dir path except the + // case when both directories are empty. This is the case when we search + // in the current directory (start_dir is empty) with a pattern that + // starts with *** wildcard (for example f***/bar). All we can do here is + // to fallback to path::current_directory() call. Note that this will be + // the only call per path_search() as the next time pattern_dir will not + // be empty. + // + const path& se (!p.empty () + ? p + : path_cast (!pattern_dir.empty () + ? pattern_dir + : !start_dir.empty () + ? start_dir + : path::current_directory ())); + + if (!path_match (pcr, se.leaf ().representation ())) + continue; + + // If the callback function returns false, then we stop the entire search + // for the final match, or do not search below the path for the + // intermediate one. + // + if (!func (pattern_dir / p, pcr, !simple)) + { + if (simple) // Final match. + return false; + else + continue; + } + + // If the pattern is not a simple one, and it's leftmost component + // matches the sub-entry, then the sub-entry is a directory (see the note + // above), and we search in it using the trailing part of the pattern. + // + if (!simple && !search (pattern.leaf (pc), + pattern_dir / path_cast (move (p)), + start_dir, + follow_symlinks, + func)) + return false; + } + + return true; + } + + void + path_search ( + const path& pattern, + const function& func, + const dir_path& start, + bool follow_symlinks) + { + search (pattern, + dir_path (), + pattern.relative () ? start : dir_path (), + follow_symlinks, + func); + } +} diff --git a/libbutl/filesystem.hxx b/libbutl/filesystem.hxx new file mode 100644 index 0000000..bf1b3af --- /dev/null +++ b/libbutl/filesystem.hxx @@ -0,0 +1,567 @@ +// file : libbutl/filesystem.hxx -*- C++ -*- +// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#ifndef LIBBUTL_FILESYSTEM_HXX +#define LIBBUTL_FILESYSTEM_HXX + +#ifndef _WIN32 +# include // DIR +#else +# include // intptr_t +#endif + +// VC's sys/types.h header file doesn't define mode_t type. So let's define it +// ourselves according to the POSIX specification. +// +#ifndef _MSC_VER +# include // mode_t +#else + typedef int mode_t; +#endif + +#include // ptrdiff_t +#include // uint16_t +#include // move(), pair +#include +#include + +#include + +#include +#include + +namespace butl +{ + // Return true if the path is to an existing regular file. Note that by + // default this function follows symlinks. + // + LIBBUTL_EXPORT bool + file_exists (const char*, bool follow_symlinks = true); + + inline bool + file_exists (const path& p, bool fs = true) { + return file_exists (p.string ().c_str (), fs);} + + // Return true if the path is to an existing directory. Note that this + // function follows symlinks. + // + LIBBUTL_EXPORT bool + dir_exists (const char*); + + inline bool + dir_exists (const path& p) {return dir_exists (p.string ().c_str ());} + + // Return true if the path is to an existing file system entry. Note that by + // default this function doesn't follow symlinks. + // + LIBBUTL_EXPORT bool + entry_exists (const char*, bool follow_symlinks = false); + + inline bool + entry_exists (const path& p, bool fs = false) { + return entry_exists (p.string ().c_str (), fs);} + + // Filesystem entry type. + // + enum class entry_type + { + unknown, + regular, + directory, + symlink, + other + }; + + // Return a flag indicating if the path is to an existing file system entry + // and its type if so. Note that by default this function doesn't follow + // symlinks. + // + LIBBUTL_EXPORT std::pair + path_entry (const char*, bool follow_symlinks = false); + + inline std::pair + path_entry (const path& p, bool fs = false) { + return path_entry (p.string ().c_str (), fs);} + + // Return true if the directory is empty. Note that the path must exist + // and be a directory. + // + LIBBUTL_EXPORT bool + dir_empty (const dir_path&); + + // Try to create a directory unless it already exists. If you expect + // the directory to exist and performance is important, then you + // should first call dir_exists() above since that's what this + // implementation will do to make sure the path is actually a + // directory. + // + // You should also probably use the default mode 0777 and let the + // umask mechanism adjust it to the user's preferences. + // + // Errors are reported by throwing std::system_error. + // + enum class mkdir_status {success, already_exists}; + + LIBBUTL_EXPORT mkdir_status + try_mkdir (const dir_path&, mode_t = 0777); + + // The '-p' version of the above (i.e., it creates the parent + // directories if necessary). + // + LIBBUTL_EXPORT mkdir_status + try_mkdir_p (const dir_path&, mode_t = 0777); + + // Try to remove the directory returning not_exist if it does not exist + // and not_empty if it is not empty. Unless ignore_error is true, all + // other errors are reported by throwing std::system_error. + // + enum class rmdir_status {success, not_exist, not_empty}; + + LIBBUTL_EXPORT rmdir_status + try_rmdir (const dir_path&, bool ignore_error = false); + + // The '-r' (recursive) version of the above. Note that it will + // never return not_empty. + // + LIBBUTL_EXPORT rmdir_status + try_rmdir_r (const dir_path&, bool ignore_error = false); + + // As above but throws rather than returns not_exist if the directory + // does not exist (unless ignore_error is true), so check before calling. + // If the second argument is false, then the directory itself is not removed. + // + LIBBUTL_EXPORT void + rmdir_r (const dir_path&, bool dir = true, bool ignore_error = false); + + // Try to remove the file (or symlinks) returning not_exist if + // it does not exist. Unless ignore_error is true, all other + // errors are reported by throwing std::system_error. + // + enum class rmfile_status {success, not_exist}; + + LIBBUTL_EXPORT rmfile_status + try_rmfile (const path&, bool ignore_error = false); + + // Automatically try to remove the path on destruction unless cancelled. + // Since the non-cancelled destruction will normally happen as a result + // of an exception, the failure to remove the path is silently ignored. + // + template + struct auto_rm + { + explicit + auto_rm (P p = P ()): path_ (std::move (p)) {} + + void + cancel () {path_ = P ();} + + const P& + path () const {return path_;} + + // Movable-only type. Move-assignment cancels the lhs object. + // + auto_rm (auto_rm&&); + auto_rm& operator= (auto_rm&&); + auto_rm (const auto_rm&) = delete; + auto_rm& operator= (const auto_rm&) = delete; + + ~auto_rm (); + + private: + P path_; + }; + + using auto_rmfile = auto_rm; + using auto_rmdir = auto_rm; // Note: recursive (rm_r). + + // Create a symbolic link to a file (default) or directory (third argument + // is true). Throw std::system_error on failures. + // + // Note that Windows symlinks are currently not supported. + // + LIBBUTL_EXPORT void + mksymlink (const path& target, const path& link, bool dir = false); + + // Create a symbolic link to a directory. Throw std::system_error on + // failures. + // + inline void + mksymlink (const dir_path& target, const dir_path& link) + { + mksymlink (target, link, true); + } + + // Create a hard link to a file (default) or directory (third argument is + // true). Throw std::system_error on failures. + // + // Note that on Linix, FreeBSD and some other platforms the target can not + // be a directory. While Windows support directories (via junktions), this + // is currently not implemented. + // + LIBBUTL_EXPORT void + mkhardlink (const path& target, const path& link, bool dir = false); + + // Create a hard link to a directory. Throw std::system_error on failures. + // + inline void + mkhardlink (const dir_path& target, const dir_path& link) + { + mkhardlink (target, link, true); + } + + // File copy flags. + // + enum class cpflags: std::uint16_t + { + overwrite_content = 0x1, + overwrite_permissions = 0x2, + + none = 0 + }; + + inline cpflags operator& (cpflags, cpflags); + inline cpflags operator| (cpflags, cpflags); + inline cpflags operator&= (cpflags&, cpflags); + inline cpflags operator|= (cpflags&, cpflags); + + // Copy a regular file, including its permissions. Throw std::system_error + // on failure. Fail if the destination file exists and the overwrite_content + // flag is not set. Leave permissions of an existing destination file intact + // unless the overwrite_permissions flag is set. Delete incomplete copies + // before throwing. + // + // Note that in case of overwriting, the existing destination file gets + // truncated (not deleted) prior to being overwritten. As a side-effect, + // hard link to the destination file will still reference the same file + // system node after the copy. + // + // Also note that if the overwrite_content flag is not set and the + // destination is a dangling symbolic link, then this function will still + // fail. + // + LIBBUTL_EXPORT void + cpfile (const path& from, const path& to, cpflags = cpflags::none); + + // Copy a regular file into (inside) an existing directory. + // + inline void + cpfile_into (const path& from, + const dir_path& into, + cpflags fl = cpflags::none) + { + cpfile (from, into / from.leaf (), fl); + } + + // Rename a filesystem entry (file, symlink, or directory). Throw + // std::system_error on failure. + // + // If the source path refers to a directory, then the destination path must + // either not exist, or refer to an empty directory. If the source path + // refers to an entry that is not a directory, then the destination path must + // not exist or not refer to a directory. + // + // If the source path refers to a symlink, then the link is renamed. If the + // destination path refers to a symlink, then the link will be overwritten. + // + // If the source and destination paths are on different file systems (or + // different drives on Windows) and the underlying OS does not support move + // for the source entry, then fail unless the source paths refers to a file + // or a file symlink. In this case fall back to copying the source file + // (content, permissions, access and modification times) and removing the + // source entry afterwards. + // + // Note that the operation is atomic only on POSIX, only if source and + // destination paths are on the same file system, and only if the + // overwrite_content flag is specified. + // + LIBBUTL_EXPORT void + mventry (const path& from, + const path& to, + cpflags = cpflags::overwrite_permissions); + + // Move a filesystem entry into (inside) an existing directory. + // + inline void + mventry_into (const path& from, + const dir_path& into, + cpflags f = cpflags::overwrite_permissions) + { + mventry (from, into / from.leaf (), f); + } + + // Raname file or file symlink. + // + inline void + mvfile (const path& from, + const path& to, + cpflags f = cpflags::overwrite_permissions) + { + mventry (from, to, f); + } + + inline void + mvfile_into (const path& from, + const dir_path& into, + cpflags f = cpflags::overwrite_permissions) + { + mventry_into (from, into, f); + } + + // Raname directory or directory symlink. + // + inline void + mvdir (const dir_path& from, + const dir_path& to, + cpflags f = cpflags::overwrite_permissions) + { + mventry (from, to, f); + } + + inline void + mvdir_into (const path& from, + const dir_path& into, + cpflags f = cpflags::overwrite_permissions) + { + mventry_into (from, into, f); + } + + // Return timestamp_nonexistent if the entry at the specified path + // does not exist or is not a path. All other errors are reported + // by throwing std::system_error. Note that this function resolves + // symlinks. + // + LIBBUTL_EXPORT timestamp + file_mtime (const char*); + + inline timestamp + file_mtime (const path& p) {return file_mtime (p.string ().c_str ());} + + // Path permissions. + // + enum class permissions: std::uint16_t + { + // Note: matching POSIX values. + // + xo = 0001, + wo = 0002, + ro = 0004, + + xg = 0010, + wg = 0020, + rg = 0040, + + xu = 0100, + wu = 0200, + ru = 0400, + + none = 0 + }; + + inline permissions operator& (permissions, permissions); + inline permissions operator| (permissions, permissions); + inline permissions operator&= (permissions&, permissions); + inline permissions operator|= (permissions&, permissions); + + // Get path permissions. Throw std::system_error on failure. Note that this + // function resolves symlinks. + // + LIBBUTL_EXPORT permissions + path_permissions (const path&); + + // Set path permissions. Throw std::system_error on failure. Note that this + // function resolves symlinks. + // + LIBBUTL_EXPORT void + path_permissions (const path&, permissions); + + // Directory entry iteration. + // + class LIBBUTL_EXPORT dir_entry + { + public: + typedef butl::path path_type; + + // Symlink target type in case of the symlink, ltype() otherwise. + // + entry_type + type () const; + + entry_type + ltype () const; + + // Entry path (excluding the base). To get the full path, do + // base () / path (). + // + const path_type& + path () const {return p_;} + + const dir_path& + base () const {return b_;} + + dir_entry () = default; + dir_entry (entry_type t, path_type p, dir_path b) + : t_ (t), p_ (std::move (p)), b_ (std::move (b)) {} + + private: + entry_type + type (bool link) const; + + private: + friend class dir_iterator; + + mutable entry_type t_ = entry_type::unknown; // Lazy evaluation. + mutable entry_type lt_ = entry_type::unknown; // Lazy evaluation. + path_type p_; + dir_path b_; + }; + + class LIBBUTL_EXPORT dir_iterator + { + public: + typedef dir_entry value_type; + typedef const dir_entry* pointer; + typedef const dir_entry& reference; + typedef std::ptrdiff_t difference_type; + typedef std::input_iterator_tag iterator_category; + + ~dir_iterator (); + dir_iterator () = default; + + explicit + dir_iterator (const dir_path&); + + dir_iterator (const dir_iterator&) = delete; + dir_iterator& operator= (const dir_iterator&) = delete; + + dir_iterator (dir_iterator&& x); + dir_iterator& operator= (dir_iterator&&); + + dir_iterator& operator++ () {next (); return *this;} + + reference operator* () const {return e_;} + pointer operator-> () const {return &e_;} + + friend bool operator== (const dir_iterator&, const dir_iterator&); + friend bool operator!= (const dir_iterator&, const dir_iterator&); + + private: + void + next (); + + private: + dir_entry e_; + +#ifndef _WIN32 + DIR* h_ = nullptr; +#else + intptr_t h_ = -1; +#endif + }; + + // Range-based for loop support. + // + // for (const auto& de: dir_iterator (dir_path ("/tmp"))) ... + // + // Note that the "range" (which is the "begin" iterator), is no + // longer usable. In other words, don't do this: + // + // dir_iterator i (...); + // for (...: i) ... + // ++i; // Invalid. + // + inline dir_iterator begin (dir_iterator&); + inline dir_iterator end (const dir_iterator&); + + // Wildcard pattern match and search (aka glob). + // + + // Return true if name matches pattern. Both must be single path components, + // possibly with a trailing directory separator to indicate a directory. + // + // If the pattern ends with a directory separator, then it only matches a + // directory name (i.e., ends with a directory separator, but potentially + // different). Otherwise, it only matches a non-directory name (no trailing + // directory separator). + // + // Currently the following wildcard characters are supported: + // + // * - match any number of characters (including zero) + // ? - match any single character + // + LIBBUTL_EXPORT bool + path_match (const std::string& pattern, const std::string& name); + + // Search for paths matching the pattern calling the specified function for + // each matching path (see below for details). + // + // If the pattern is relative, then search in the start directory. If the + // start directory is empty, then search in the current working directory. + // Searching in non-existent directories is not an error. Throw + // std::system_error in case of a failure (insufficient permissions, etc). + // + // The pattern may contain multiple components that include wildcards. On + // Windows the drive letter may not be a wildcard. + // + // In addition to the wildcard characters listed in path_match(), + // path_search() also recognizes the ** and *** wildcard sequences. If a + // path component contains **, then it is matched just like * but in all the + // subdirectories, recursively. The *** wildcard behaves like ** but also + // matches the start directory itself. + // + // So, for example, foo/bar-**.txt will return all the files matching the + // bar-*.txt pattern in all the subdirectoris of foo/. And foo/f***/ will + // return all the subdirectories matching the f*/ pattern plus foo/ itself. + // + // Note that having multiple recursive components in the pattern we can end + // up with calling func() multiple times (once per such a component) for the + // same path. For example the search with pattern f***/b**/ starting in + // directory foo, that has the foo/fox/box/ structure, will result in + // calling func(foo/fox/box/) twice: first time for being a child of fox/, + // second time for being a child of foo/. + // + // The callback function is called for both intermediate matches (interm is + // true) and final matches (interm is false). Pattern is what matched the + // last component in the path and is empty if the last component is not a + // pattern (final match only; say as in */foo.txt). + // + // If the callback function returns false for an intermediate path, then no + // further search is performed at or below this path. If false is returned + // for a final match, then the entire search is stopped. + // + // The path can be moved for the final match or for an intermediate match + // but only if false is returned. + // + // As an example, consider pattern f*/bar/b*/*.txt and path + // foo/bar/baz/x.txt. The sequence of calls in this case will be: + // + // (foo/, f*/, true) + // (foo/bar/baz/, b*/, true) + // (foo/bar/baz/x.txt, *.txt, false) + // + // If the pattern contains a recursive wildcard, then the callback function + // can be called for the same directory twice: first time as an intermediate + // match with */ pattern to decide if to recursively traverse the directory, + // and the second time if the directory matches the pattern component (either + // as an intermediate or a final match). As an example, consider pattern + // b**/c* and directory tree a/b/c/. The sequence of calls in this case will + // be: + // + // (a/, */, true) + // (a/b/, */ true) + // (a/b/c/, */, true) + // (a/b/, b*/, true) + // (a/b/c/, c*/, false) + // + LIBBUTL_EXPORT void + path_search (const path& pattern, + const std::function&, + const dir_path& start = dir_path (), + bool follow_symlinks = true); +} + +#include + +#endif // LIBBUTL_FILESYSTEM_HXX diff --git a/libbutl/filesystem.ixx b/libbutl/filesystem.ixx new file mode 100644 index 0000000..43fef20 --- /dev/null +++ b/libbutl/filesystem.ixx @@ -0,0 +1,144 @@ +// file : libbutl/filesystem.ixx -*- C++ -*- +// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +namespace butl +{ + inline bool + dir_empty (const dir_path& d) + { + return dir_iterator (d) == dir_iterator (); + } + + inline rmdir_status + try_rmdir_r (const dir_path& p, bool ignore_error) + { + bool e (dir_exists (p)); //@@ What if it exists but is not a directory? + + if (e) + rmdir_r (p, true, ignore_error); + + return e ? rmdir_status::success : rmdir_status::not_exist; + } + + // auto_rm + // + template + inline auto_rm

:: + auto_rm (auto_rm&& x) + : path_ (std::move (x.path_)) + { + x.cancel (); + } + + template + inline auto_rm

& auto_rm

:: + operator= (auto_rm&& x) + { + if (this != &x) + { + path_ = std::move (x.path_); + x.cancel (); + } + + return *this; + } + + template <> + inline auto_rm:: + ~auto_rm () {if (!path_.empty ()) try_rmfile (path_, true);} + + template <> + inline auto_rm:: + ~auto_rm () {if (!path_.empty ()) try_rmdir_r (path_, true);} + + // cpflags + // + inline cpflags operator& (cpflags x, cpflags y) {return x &= y;} + inline cpflags operator| (cpflags x, cpflags y) {return x |= y;} + inline cpflags operator&= (cpflags& x, cpflags y) + { + return x = static_cast ( + static_cast (x) & + static_cast (y)); + } + + inline cpflags operator|= (cpflags& x, cpflags y) + { + return x = static_cast ( + static_cast (x) | + static_cast (y)); + } + + // permissions + // + inline permissions operator& (permissions x, permissions y) {return x &= y;} + inline permissions operator| (permissions x, permissions y) {return x |= y;} + inline permissions operator&= (permissions& x, permissions y) + { + return x = static_cast ( + static_cast (x) & + static_cast (y)); + } + + inline permissions operator|= (permissions& x, permissions y) + { + return x = static_cast ( + static_cast (x) | + static_cast (y)); + } + + // dir_entry + // + inline entry_type dir_entry:: + ltype () const + { + return t_ != entry_type::unknown ? t_ : (t_ = type (false)); + } + + inline entry_type dir_entry:: + type () const + { + entry_type t (ltype ()); + return t != entry_type::symlink + ? t + : lt_ != entry_type::unknown ? lt_ : (lt_ = type (true)); + } + + // dir_iterator + // + inline dir_iterator:: + dir_iterator (dir_iterator&& x) + : e_ (std::move (x.e_)), h_ (x.h_) + { +#ifndef _WIN32 + x.h_ = nullptr; +#else + x.h_ = -1; +#endif + } + + inline bool + operator== (const dir_iterator& x, const dir_iterator& y) + { + return x.h_ == y.h_; + } + + inline bool + operator!= (const dir_iterator& x, const dir_iterator& y) + { + return !(x == y); + } + + inline dir_iterator + begin (dir_iterator& i) + { + return std::move (i); + } + + inline dir_iterator + end (const dir_iterator&) + { + return dir_iterator (); + } +} diff --git a/libbutl/ft/exception.hxx b/libbutl/ft/exception.hxx new file mode 100644 index 0000000..a5f6156 --- /dev/null +++ b/libbutl/ft/exception.hxx @@ -0,0 +1,39 @@ +// file : libbutl/ft/exception.hxx -*- C++ -*- +// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#ifndef LIBBUTL_FT_EXCEPTION_HXX +#define LIBBUTL_FT_EXCEPTION_HXX + +#include // _LIBCPP_VERSION +#include + +// __cpp_lib_uncaught_exceptions +// +#ifndef __cpp_lib_uncaught_exceptions + // + // VC has it since 1900. + // +# if defined(_MSC_VER) +# if _MSC_VER >= 1900 +# define __cpp_lib_uncaught_exceptions 201411 +# endif + // + // Clang's libc++ seems to have it for a while (but not before 1200) so we + // assume it's there from 1200. But not for MacOS, where it is explicitly + // marked as unavailable until MacOS 10.12. + // +# elif defined(_LIBCPP_VERSION) +# if _LIBCPP_VERSION >= 1200 && \ + (!defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__) || \ + __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ >= 101200) +# define __cpp_lib_uncaught_exceptions 201411 +# endif + // + // GCC libstdc++ has it since GCC 6 and it defines the feature test macro. + // We will also use this for any other runtime. + // +# endif +#endif + +#endif // LIBBUTL_FT_EXCEPTION_HXX diff --git a/libbutl/ft/lang.hxx b/libbutl/ft/lang.hxx new file mode 100644 index 0000000..5daf9c9 --- /dev/null +++ b/libbutl/ft/lang.hxx @@ -0,0 +1,29 @@ +// file : libbutl/ft/lang.hxx -*- C++ -*- +// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#ifndef LIBBUTL_FT_LANG_HXX +#define LIBBUTL_FT_LANG_HXX + +// __cpp_thread_local (extension) +// +// If this macro is undefined then one may choose to fallback to __thread. +// Note, however, that it only for values that do not require dynamic +// (runtime) initialization. +// +#ifndef __cpp_thread_local + // + // Apparently Apple's Clang "temporarily disabled" C++11 thread_local until + // they can implement a "fast" version, which reportedly happened in XCode + // 8. + // +# if defined(__apple_build_version__) +# if __apple_build_version__ >= 8000000 +# define __cpp_thread_local 201103 +# endif +# else +# define __cpp_thread_local 201103 +# endif +#endif + +#endif // LIBBUTL_FT_LANG_HXX diff --git a/libbutl/ft/shared_mutex.hxx b/libbutl/ft/shared_mutex.hxx new file mode 100644 index 0000000..6c6d415 --- /dev/null +++ b/libbutl/ft/shared_mutex.hxx @@ -0,0 +1,58 @@ +// file : libbutl/ft/shared_mutex.hxx -*- C++ -*- +// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#ifndef LIBBUTL_FT_SHARED_MUTEX_HXX +#define LIBBUTL_FT_SHARED_MUTEX_HXX + +#include // _LIBCPP_VERSION +#include + +// __cpp_lib_shared_mutex +// +#ifndef __cpp_lib_shared_mutex + // + // VC has it since 1900. + // +# if defined(_MSC_VER) +# if _MSC_VER >= 1900 +# define __cpp_lib_shared_mutex 201505 +# endif + // + // Clang's libc++ seems to have it for a while (but not before 1200) so we + // assume it's there from 1200. It's also only enabled after C++14. But not + // for MacOS, where it is explicitly marked as unavailable until MacOS + // 10.12. + // +# elif defined(_LIBCPP_VERSION) && defined(_LIBCPP_STD_VER) +# if _LIBCPP_VERSION >= 1200 && \ + _LIBCPP_STD_VER > 14 && \ + (!defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__) || \ + __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ >= 101200) +# define __cpp_lib_shared_mutex 201505 +# endif + // + // GCC libstdc++ has it since GCC 6 and it defines the feature test macro. + // We will also use this for any other runtime. + // +# endif +#endif + +// __cpp_lib_shared_timed_mutex +// +#ifndef __cpp_lib_shared_timed_mutex + // + // On MacOS shared_timed_mutex is marked as unavailable until MacOS + // 10.12. + // +# if defined(_LIBCPP_VERSION) +# if !defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__) || \ + __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ >= 101200 +# define __cpp_lib_shared_timed_mutex 201402 +# endif +# else +# define __cpp_lib_shared_timed_mutex 201402 +# endif +#endif + +#endif // LIBBUTL_FT_SHARED_MUTEX_HXX diff --git a/libbutl/manifest-forward.hxx b/libbutl/manifest-forward.hxx new file mode 100644 index 0000000..68864a1 --- /dev/null +++ b/libbutl/manifest-forward.hxx @@ -0,0 +1,15 @@ +// file : libbutl/manifest-forward.hxx -*- C++ -*- +// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#ifndef LIBBUTL_MANIFEST_FORWARD_HXX +#define LIBBUTL_MANIFEST_FORWARD_HXX + +namespace butl +{ + class manifest_parser; + class manifest_serializer; + class manifest_name_value; +} + +#endif // LIBBUTL_MANIFEST_FORWARD_HXX diff --git a/libbutl/manifest-parser.cxx b/libbutl/manifest-parser.cxx new file mode 100644 index 0000000..ae92d79 --- /dev/null +++ b/libbutl/manifest-parser.cxx @@ -0,0 +1,387 @@ +// file : libbutl/manifest-parser.cxx -*- C++ -*- +// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#include + +#include +#include + +using namespace std; + +namespace butl +{ + using parsing = manifest_parsing; + using name_value = manifest_name_value; + + name_value manifest_parser:: + next () + { + if (s_ == end) + return name_value {"", "", line, column, line, column}; + + xchar c (skip_spaces ()); + + // Here is the problem: if we are in the 'body' state (that is, + // we are parsing inside the manifest) and we see the special + // empty name, then before returning the "start" pair for the + // next manifest, we have to return the "end" pair. One way + // would be to cache the "start" pair and return it on the + // next call of next(). But that would require quite a bit + // of extra logic. The alternative is to detect the beginning + // of the empty name before parsing too far. This way, the + // next call to next() will start parsing where we left of + // and return the "start" pair naturally. + // + if (s_ == body && c == ':') + { + s_ = start; + return name_value {"", "", c.line, c.column, c.line, c.column}; + } + + // Regardless of the state, what should come next is a name, + // potentially the special empty one. + // + name_value r; + parse_name (r); + + skip_spaces (); + c = get (); + + if (eos (c)) + { + // This is ok as long as the name is empty. + // + if (!r.name.empty ()) + throw parsing (name_, c.line, c.column, "':' expected after name"); + + s_ = end; + + // The "end" pair. + // + r.value_line = r.name_line; + r.value_column = r.name_column; + return r; + } + + if (c != ':') + throw parsing (name_, c.line, c.column, "':' expected after name"); + + skip_spaces (); + parse_value (r); + + c = peek (); + + // The character after the value should be either a newline or eos. + // + assert (c == '\n' || eos (c)); + + if (c == '\n') + get (); + + // Now figure out whether what we've got makes sense, depending + // on the state we are in. + // + if (s_ == start) + { + // Start of the (next) manifest. The first pair should be the + // special empty name/format version. + // + if (!r.name.empty ()) + throw parsing (name_, r.name_line, r.name_column, + "format version pair expected"); + + // The version value is only mandatory for the first manifest in + // a sequence. + // + if (r.value.empty ()) + { + if (version_.empty ()) + throw parsing (name_, r.value_line, r.value_column, + "format version value expected"); + r.value = version_; + } + else + { + version_ = r.value; // Update with the latest. + + if (version_ != "1") + throw parsing (name_, r.value_line, r.value_column, + "unsupported format version " + version_); + } + + s_ = body; + } + else + { + // Parsing the body of the manifest. + // + + // Should have been handled by the special case above. + // + assert (!r.name.empty ()); + } + + return r; + } + + void manifest_parser:: + parse_name (name_value& r) + { + xchar c (peek ()); + + r.name_line = c.line; + r.name_column = c.column; + + for (; !eos (c); c = peek ()) + { + if (c == ':' || c == ' ' || c == '\t' || c == '\n') + break; + + r.name += c; + get (); + } + } + + void manifest_parser:: + parse_value (name_value& r) + { + xchar c (peek ()); + + r.value_line = c.line; + r.value_column = c.column; + + string& v (r.value); + string::size_type n (0); // Size of last non-space character (simple mode). + + // Detect the multi-line mode introductor. + // + bool ml (false); + if (c == '\\') + { + get (); + xchar p (peek ()); + + if (p == '\n') + { + get (); // Newline is not part of the value so skip it. + c = peek (); + ml = true; + } + else if (eos (p)) + ml = true; + else + unget (c); + } + + // Multi-line value starts from the line that follows the name. + // + if (ml) + { + r.value_line = c.line; + r.value_column = c.column; + } + + // The nl flag signals that the preceding character was a "special + // newline", that is, a newline that was part of the milti-line mode + // introductor or an escape sequence. + // + for (bool nl (ml); !eos (c); c = peek ()) + { + // Detect the special "\n\\\n" sequence. In the multi-line mode, + // this is a "terminator". In the simple mode, this is a way to + // specify a newline. + // + // The key idea here is this: if we "swallowed" any characters + // (i.e., called get() without a matching unget()), then we + // have to restart the loop in order to do all the tests for + // the next character. Also, for this to work, we can only + // add one character to v, which limits us to maximum three + // characters look-ahead: one in v, one "ungot", and one + // peeked. + // + // The first block handles the special sequence that starts with + // a special newline. In multi-line mode, this is an "immediate + // termination" where we "use" the newline from the introductor. + // Note also that in the simple mode the special sequence can + // only start with a special (i.e., escaped) newline. + // + if (nl) + { + nl = false; + + if (c == '\\') + { + get (); + xchar c1 (peek ()); + + if (c1 == '\n' || eos (c1)) + { + if (ml) + break; + else + { + if (c1 == '\n') + get (); + + v += '\n'; // Literal newline. + n = v.size (); + continue; // Restart from the next character. + } + } + else + unget (c); // Fall through. + } + } + + if (c == '\n') + { + if (ml) + { + get (); + xchar c1 (peek ()); + + if (c1 == '\\') + { + get (); + xchar c2 (peek ()); + + if (c2 == '\n' || eos (c2)) + break; + else + { + v += '\n'; + unget (c1); + continue; // Restart from c1 (slash). + } + } + else + unget (c); // Fall through. + } + else + break; // Simple value terminator. + } + + // Detect the newline escape sequence. The same look-ahead + // approach as above. + // + if (c == '\\') + { + get (); + xchar c1 (peek ()); + + if (c1 == '\n' || eos (c1)) + { + if (c1 == '\n') + { + get (); + nl = true; // This is a special newline. + } + continue; // Restart from the next character. + } + else if (c1 == '\\') + { + get (); + xchar c2 (peek ()); + + if (c2 == '\n' || eos (c1)) + { + v += '\\'; + n = v.size (); + // Restart from c2 (newline/eos). + } + else + { + v += '\\'; + n = v.size (); + unget (c1); // Restart from c1 (second slash). + } + + continue; + } + else + unget (c); // Fall through. + } + + get (); + v += c; + + if (!ml && c != ' ' && c != '\t') + n = v.size (); + } + + // Cut off trailing whitespaces. + // + if (!ml) + v.resize (n); + } + + manifest_parser::xchar manifest_parser:: + skip_spaces () + { + xchar c (peek ()); + bool start (c.column == 1); + + for (; !eos (c); c = peek ()) + { + switch (c) + { + case ' ': + case '\t': + break; + case '\n': + { + // Skip empty lines. + // + if (!start) + return c; + + break; + } + case '#': + { + // We only recognize '#' as a start of a comment at the beginning + // of the line (sans leading spaces). + // + if (!start) + return c; + + get (); + + // Read until newline or eos. + // + for (c = peek (); !eos (c) && c != '\n'; c = peek ()) + get (); + + continue; + } + default: + return c; // Not a space. + } + + get (); + } + + return c; + } + + // manifest_parsing + // + + static string + format (const string& n, uint64_t l, uint64_t c, const string& d) + { + ostringstream os; + if (!n.empty ()) + os << n << ':'; + os << l << ':' << c << ": error: " << d; + return os.str (); + } + + manifest_parsing:: + manifest_parsing (const string& n, uint64_t l, uint64_t c, const string& d) + : runtime_error (format (n, l, c, d)), + name (n), line (l), column (c), description (d) + { + } +} diff --git a/libbutl/manifest-parser.hxx b/libbutl/manifest-parser.hxx new file mode 100644 index 0000000..840549f --- /dev/null +++ b/libbutl/manifest-parser.hxx @@ -0,0 +1,94 @@ +// file : libbutl/manifest-parser.hxx -*- C++ -*- +// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#ifndef LIBBUTL_MANIFEST_PARSER_HXX +#define LIBBUTL_MANIFEST_PARSER_HXX + +#include +#include +#include // uint64_t +#include // runtime_error + +#include + +#include + +namespace butl +{ + class LIBBUTL_EXPORT manifest_parsing: public std::runtime_error + { + public: + manifest_parsing (const std::string& name, + std::uint64_t line, + std::uint64_t column, + const std::string& description); + + std::string name; + std::uint64_t line; + std::uint64_t column; + std::string description; + }; + + class manifest_name_value + { + public: + std::string name; + std::string value; + + std::uint64_t name_line; + std::uint64_t name_column; + + std::uint64_t value_line; + std::uint64_t value_column; + + bool + empty () const {return name.empty () && value.empty ();} + }; + + class LIBBUTL_EXPORT manifest_parser: protected butl::char_scanner + { + public: + manifest_parser (std::istream& is, const std::string& name) + : char_scanner (is), name_ (name) {} + + const std::string& + name () const {return name_;} + + // The first returned pair is special "start-of-manifest" with + // empty name and value being the format version: {"", ""}. + // After that we have a sequence of ordinary pairs which are + // the manifest. At the end of the manifest we have the special + // "end-of-manifest" pair with empty name and value: {"", ""}. + // After that we can either get another start-of-manifest pair + // (in which case the whole sequence repeats from the beginning) + // or we get another end-of-manifest pair which signals the end + // of stream (aka EOF). To put it another way, the parse sequence + // always has the following form: + // + // ({"", ""} {"", ""}* {"", ""})* {"", ""} + // + manifest_name_value + next (); + + private: + void + parse_name (manifest_name_value&); + + void + parse_value (manifest_name_value&); + + // Skip spaces and return the first peeked non-space character. + // + xchar + skip_spaces (); + + private: + const std::string name_; + + enum {start, body, end} s_ = start; + std::string version_; // Current format version. + }; +} + +#endif // LIBBUTL_MANIFEST_PARSER_HXX diff --git a/libbutl/manifest-serializer.cxx b/libbutl/manifest-serializer.cxx new file mode 100644 index 0000000..b152882 --- /dev/null +++ b/libbutl/manifest-serializer.cxx @@ -0,0 +1,238 @@ +// file : libbutl/manifest-serializer.cxx -*- C++ -*- +// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#include + +#include +#include + +using namespace std; + +namespace butl +{ + using serialization = manifest_serialization; + + void manifest_serializer:: + next (const string& n, const string& v) + { + switch (s_) + { + case start: + { + if (!n.empty ()) + throw serialization (name_, "format version pair expected"); + + if (v.empty ()) + { + // End of manifests. + // + os_.flush (); + s_ = end; + break; + } + + if (v != "1") + throw serialization (name_, "unsupported format version " + v); + + os_ << ':'; + + if (v != version_) + { + os_ << ' ' << v; + version_ = v; + } + + os_ << endl; + s_ = body; + break; + } + case body: + { + if (n.empty ()) + { + if (!v.empty ()) + throw serialization (name_, "non-empty value in end pair"); + + s_ = start; + break; + } + + check_name (n); + + os_ << n << ':'; + + if (!v.empty ()) + { + os_ << ' '; + + // Use the multi-line mode in any of the following cases: + // + // - name is too long (say longer than 37 (78/2 - 2) characters; + // we cannot start on the next line since that would start the + // multi-line mode) + // - value contains newlines + // - value contains leading/trailing whitespaces + // + if (n.size () > 37 || + v.find ('\n') != string::npos || + v.front () == ' ' || v.front () == '\t' || + v.back () == ' ' || v.back () == '\t') + { + os_ << "\\" << endl; // Multi-line mode introductor. + + // Chunk the value into fragments separated by newlines. + // + for (size_t i (0), p (v.find ('\n')); ; p = v.find ('\n', i)) + { + if (p == string::npos) + { + // Last chunk. + // + write_value (0, v.c_str () + i, v.size () - i); + break; + } + + write_value (0, v.c_str () + i, p - i); + os_ << endl; + i = p + 1; + } + + os_ << endl << "\\"; // Multi-line mode terminator. + } + else + write_value (n.size () + 2, v.c_str (), v.size ()); + } + + os_ << endl; + break; + } + case end: + { + throw serialization (name_, "serialization after eos"); + } + } + } + + void manifest_serializer:: + comment (const string& t) + { + if (s_ == end) + throw serialization (name_, "serialization after eos"); + + os_ << '#'; + + if (!t.empty ()) + os_ << ' ' << t; + + os_ << endl; + } + + void manifest_serializer:: + check_name (const string& n) + { + if (n[0] == '#') + throw serialization (name_, "name starts with '#'"); + + for (char c: n) + { + switch (c) + { + case ' ': + case '\t': + case '\n': throw serialization (name_, "name contains whitespace"); + case ':': throw serialization (name_, "name contains ':'"); + default: break; + } + } + } + + void manifest_serializer:: + write_value (size_t cl, const char* s, size_t n) + { + char c ('\0'); + + // The idea is to break on the 77th character (i.e., write it + // on the next line) which means we have written 76 characters + // on this line plus 2 for '\' and '\n', which gives us 78. + // + for (const char* e (s + n); s != e; s++, cl++) + { + c = *s; + bool br (false); // Break the line. + + // If this is a whitespace, see if it's a good place to break the + // line. + // + if (c == ' ' || c == '\t') + { + // Find the next whitespace (or the end) and see if it is a better + // place. + // + for (const char* w (s + 1); ; w++) + { + if (w == e || *w == ' ' || *w == '\t') + { + // Is this whitespace past where we need to break? Also see + // below the "hard" break case for why we use 78 at the end. + // + if (cl + static_cast (w - s) > (w != e ? 77 : 78)) + { + // Only break if this whitespace is close enough to + // the end of the line. + // + br = (cl > 57); + } + + break; + } + } + } + + // Do we have to do a "hard" break (i.e., without a whitespace)? + // If there is just one character left, then instead of writing + // '\' and then the character on the next line, we might as well + // write it on this line. + // + if (cl == (s + 1 != e ? 77 : 78)) + br = true; + + if (br) + { + os_ << '\\' << endl; + cl = 0; + } + + os_ << c; + } + + // What comes next is always a newline. I the last character that + // we have written is a backslash, escape it. + // + if (c == '\\') + os_ << '\\'; + } + + // manifest_serialization + // + + static string + format (const string& n, const string& d) + { + string r; + if (!n.empty ()) + { + r += n; + r += ": "; + } + r += "error: "; + r += d; + return r; + } + + manifest_serialization:: + manifest_serialization (const string& n, const string& d) + : runtime_error (format (n, d)), name (n), description (d) + { + } +} diff --git a/libbutl/manifest-serializer.hxx b/libbutl/manifest-serializer.hxx new file mode 100644 index 0000000..863fef6 --- /dev/null +++ b/libbutl/manifest-serializer.hxx @@ -0,0 +1,75 @@ +// file : libbutl/manifest-serializer.hxx -*- C++ -*- +// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#ifndef LIBBUTL_MANIFEST_SERIALIZER_HXX +#define LIBBUTL_MANIFEST_SERIALIZER_HXX + +#include +#include +#include // size_t +#include // runtime_error + +#include + +namespace butl +{ + class LIBBUTL_EXPORT manifest_serialization: public std::runtime_error + { + public: + manifest_serialization (const std::string& name, + const std::string& description); + + std::string name; + std::string description; + }; + + class LIBBUTL_EXPORT manifest_serializer + { + public: + manifest_serializer (std::ostream& os, const std::string& name) + : os_ (os), name_ (name) {} + + const std::string& + name () const {return name_;} + + // The first name-value pair should be the special "start-of-manifest" + // with empty name and value being the format version. After that we + // have a sequence of ordinary pairs which are the manifest. At the + // end of the manifest we have the special "end-of-manifest" pair + // with empty name and value. After that we can either have another + // start-of-manifest pair (in which case the whole sequence repeats + // from the beginning) or we get another end-of-manifest pair which + // signals the end of stream. + // + void + next (const std::string& name, const std::string& value); + + // Write a comment. The supplied text is prefixed with "# " and + // terminated with a newline. + // + void + comment (const std::string&); + + private: + void + check_name (const std::string&); + + // Write 'n' characters from 's' (assuming there are no newlines) + // split into multiple lines at or near the 78 characters + // boundary. The first line starts at the 'column' offset. + // + void + write_value (std::size_t column, const char* s, std::size_t n); + + private: + enum {start, body, end} s_ = start; + std::string version_; // Current format version. + + private: + std::ostream& os_; + const std::string name_; + }; +} + +#endif // LIBBUTL_MANIFEST_SERIALIZER_HXX diff --git a/libbutl/multi-index.hxx b/libbutl/multi-index.hxx new file mode 100644 index 0000000..448e1f7 --- /dev/null +++ b/libbutl/multi-index.hxx @@ -0,0 +1,59 @@ +// file : libbutl/multi-index.hxx -*- C++ -*- +// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#ifndef LIBBUTL_MULTI_INDEX_HXX +#define LIBBUTL_MULTI_INDEX_HXX + +#include // declval() +#include // hash + +namespace butl +{ + // Google the "Emulating Boost.MultiIndex with Standard Containers" blog + // post for details. + // + + template + struct map_key + { + mutable const T* p; + + map_key (const T* v = 0): p (v) {} + bool operator< (const map_key& x) const {return *p < *x.p;} + bool operator== (const map_key& x) const {return *p == *x.p;} + }; + + template + struct map_iterator_adapter: I + { + typedef const typename I::value_type::second_type value_type; + typedef value_type* pointer; + typedef value_type& reference; + + map_iterator_adapter () {} + map_iterator_adapter (I i): I (i) {} + + map_iterator_adapter& + operator= (I i) {static_cast (*this) = i; return *this;} + + reference operator* () const {return I::operator* ().second;} + pointer operator-> () const {return &I::operator-> ()->second;} + }; +} + +namespace std +{ + template + struct hash>: hash + { + size_t + operator() (butl::map_key x) const + noexcept (noexcept (declval> () (*x.p))) + { + return hash::operator() (*x.p); + } + }; +} + +#endif // LIBBUTL_MULTI_INDEX_HXX diff --git a/libbutl/optional.hxx b/libbutl/optional.hxx new file mode 100644 index 0000000..c1bc582 --- /dev/null +++ b/libbutl/optional.hxx @@ -0,0 +1,96 @@ +// file : libbutl/optional.hxx -*- C++ -*- +// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#ifndef LIBBUTL_OPTIONAL_HXX +#define LIBBUTL_OPTIONAL_HXX + +#include // move() +#include // hash + +namespace butl +{ + // Simple optional class template while waiting for std::optional. + // + struct nullopt_t {constexpr explicit nullopt_t (int) {}}; + constexpr const nullopt_t nullopt (1); + + template + class optional + { + public: + typedef T value_type; + + constexpr optional (): value_ (), null_ (true) {} // VC14 needs value_(). + constexpr optional (nullopt_t): value_ (), null_ (true) {} + constexpr optional (const T& v): value_ (v), null_ (false) {} + constexpr optional (T&& v): value_ (std::move (v)), null_ (false) {} + + optional& operator= (nullopt_t) {value_ = T (); null_ = true; return *this;} + optional& operator= (const T& v) {value_ = v; null_ = false; return *this;} + optional& operator= (T&& v) {value_ = std::move (v); null_ = false; return *this;} + + T& value () {return value_;} + const T& value () const {return value_;} + + T* operator-> () {return &value_;} + const T* operator-> () const {return &value_;} + + T& operator* () {return value_;} + const T& operator* () const {return value_;} + + bool has_value () const {return !null_;} + explicit operator bool () const {return !null_;} + + private: + T value_; + bool null_; + }; + + template + inline auto + operator== (const optional& x, const optional& y) -> decltype (*x == *y) + { + return static_cast (x) == static_cast (y) && (!x || *x == *y); + } + + template + inline auto + operator!= (const optional& x, const optional& y) -> decltype (x == y) + { + return !(x == y); + } + + template + inline auto + operator< (const optional& x, const optional& y) -> decltype (*x < *y) + { + bool px (x), py (y); + return px < py || (px && py && *x < *y); + } + + template + inline auto + operator> (const optional& x, const optional& y) -> decltype (*x > *y) + { + return y < x; + } +} + +namespace std +{ + template + struct hash>: hash + { + using argument_type = butl::optional; + + size_t + operator() (const butl::optional& o) const + noexcept (noexcept (hash {} (*o))) + { + return o ? hash::operator() (*o) : static_cast (-3333); + } + }; +} + +#endif // LIBBUTL_OPTIONAL_HXX diff --git a/libbutl/pager.cxx b/libbutl/pager.cxx new file mode 100644 index 0000000..3910443 --- /dev/null +++ b/libbutl/pager.cxx @@ -0,0 +1,207 @@ +// file : libbutl/pager.cxx -*- C++ -*- +// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#include + +#ifndef _WIN32 +# include // STDOUT_FILENO +# include // ioctl() + +# include +# include // this_thread::sleep_for() +#else +# include +#endif + +#include // strchr() +#include // move() + +#include // operator<<(ostream, exception), + // throw_generic_error() +#include // fdclose() + +using namespace std; + +namespace butl +{ + pager:: + pager (const string& name, + bool verbose, + const string* pager, + const vector* pager_options) + // Create successfully exited process. Will "wait" for it if fallback to + // non-interactive execution path. + // + : p_ (process_exit (0)) + { + // If we are using the default pager, try to get the terminal width + // so that we can center the output. + // + if (pager == nullptr) + { + size_t col (0); + +#ifndef _WIN32 +# ifdef TIOCGWINSZ + struct winsize w; + if (ioctl (STDOUT_FILENO, TIOCGWINSZ, &w) == 0) + col = static_cast (w.ws_col); +# endif +#else + // This works properly on PowerShell, while for the regular console + // (cmd.exe) it always returns 80 columns. + // + CONSOLE_SCREEN_BUFFER_INFO w; + if (GetConsoleScreenBufferInfo (GetStdHandle (STD_OUTPUT_HANDLE), &w)) + col = static_cast (w.srWindow.Right - w.srWindow.Left + 1); +#endif + if (col > 80) + indent_.assign ((col - 80) / 2, ' '); + } + + vector args; + string prompt; + + if (pager != nullptr) + { + if (pager->empty ()) + return; // No pager should be used. + + args.push_back (pager->c_str ()); + } + else + { + // By default try less (again, no pun intended). + // + prompt = "-Ps" + name + " (press q to quit, h for help)"; + + args.push_back ("less"); + args.push_back ("-R"); // Handle ANSI color. + + args.push_back (prompt.c_str ()); + } + + // Add extra pager options. + // + if (pager_options != nullptr) + for (const string& o: *pager_options) + args.push_back (o.c_str ()); + + args.push_back (nullptr); + + if (verbose) + { + for (const char* const* p (args.data ()); *p != nullptr; ++p) + { + if (p != args.data ()) + cerr << ' '; + + // Quote if empty or contains spaces. + // + bool q (**p == '\0' || strchr (*p, ' ') != nullptr); + + if (q) + cerr << '"'; + + cerr << *p; + + if (q) + cerr << '"'; + } + cerr << endl; + } + + // Ignore errors and go without a pager unless the pager was specified + // by the user. + // + try + { + p_ = process (args.data (), -1); // Redirect child's STDIN to a pipe. + + // Wait a bit and see if the pager has exited before reading anything + // (e.g., because exec() couldn't find the program). If you know a + // cleaner way to handle this, let me know (and no, a select()-based + // approach doesn't work; the pipe is buffered and therefore is always + // ready for writing). + // + // MINGW GCC 4.9 doesn't implement this_thread so use Win32 Sleep(). + // +#ifndef _WIN32 + this_thread::sleep_for (chrono::milliseconds (50)); +#else + Sleep (50); +#endif + + bool r; + if (p_.try_wait (r)) + { + p_.out_fd.reset (); + + if (pager != nullptr) + throw_generic_error (ECHILD); + } + else + os_.open (move (p_.out_fd)); + } + catch (const process_error& e) + { + if (e.child) + { + cerr << args[0] << ": unable to execute: " << e << endl; + exit (1); + } + + // Ignore unless it was a user-specified pager. + // + if (pager != nullptr) + throw; // Re-throw as system_error. + } + + // Setup the indentation machinery. + // + if (!indent_.empty ()) + buf_ = stream ().rdbuf (this); + } + + bool pager:: + wait (bool ie) + { + // Teardown the indentation machinery. + // + if (buf_ != nullptr) + { + stream ().rdbuf (buf_); + buf_ = nullptr; + } + + // Prevent ofdstream::close() from throwing in the ignore errors mode. + // + if (ie) + os_.exceptions (ofdstream::goodbit); + + os_.close (); + return p_.wait (ie); + } + + pager::int_type pager:: + overflow (int_type c) + { + if (prev_ == '\n' && c != '\n') // Don't indent blanks. + { + auto n (static_cast (indent_.size ())); + + if (buf_->sputn (indent_.c_str (), n) != n) + return traits_type::eof (); + } + + prev_ = c; + return buf_->sputc (c); + } + + int pager:: + sync () + { + return buf_->pubsync (); + } +} diff --git a/libbutl/pager.hxx b/libbutl/pager.hxx new file mode 100644 index 0000000..deca922 --- /dev/null +++ b/libbutl/pager.hxx @@ -0,0 +1,88 @@ +// file : libbutl/pager.hxx -*- C++ -*- +// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#ifndef LIBBUTL_PAGER_HXX +#define LIBBUTL_PAGER_HXX + +#include +#include +#include + +#include + +#include +#include + +namespace butl +{ + // Try to run the output through a pager program, such as more or less (no + // pun intended, less is used by default). If the default pager program is + // used, then the output is indented so that 80-character long lines will + // appear centered in the terminal. If the default pager program fails to + // start, then the output is sent directly to STDOUT. + // + // If the pager program is specified and is empty, then no pager is used + // and the output is sent directly to STDOUT. + // + // Throw std::system_error if there are problems with the pager program. + // + // Typical usage: + // + // try + // { + // pager p ("help for foo"); + // ostream& os (p.stream ()); + // + // os << "Foo is such and so ..."; + // + // if (!p.wait ()) + // ... // Pager program returned non-zero status. + // } + // catch (const std::system_error& e) + // { + // cerr << "pager error: " << e << endl; + // } + // + class LIBBUTL_EXPORT pager: protected std::streambuf + { + public: + ~pager () {wait (true);} + + // If verbose is true, then print (to STDERR) the pager command line. + // + pager (const std::string& name, + bool verbose = false, + const std::string* pager = nullptr, + const std::vector* pager_options = nullptr); + + std::ostream& + stream () {return os_.is_open () ? os_ : std::cout;} + + bool + wait (bool ignore_errors = false); + + // The streambuf output interface that implements indentation. You can + // override it to implement custom output pre-processing. + // + protected: + using int_type = std::streambuf::int_type; + using traits_type = std::streambuf::traits_type; + + virtual int_type + overflow (int_type); + + virtual int + sync (); + + private: + process p_; + ofdstream os_; + + std::string indent_; + int_type prev_ = '\n'; // Previous character. + std::streambuf* buf_ = nullptr; + }; +} + +#endif // LIBBUTL_PAGER_HXX diff --git a/libbutl/path-io.hxx b/libbutl/path-io.hxx new file mode 100644 index 0000000..719456d --- /dev/null +++ b/libbutl/path-io.hxx @@ -0,0 +1,29 @@ +// file : libbutl/path-io.hxx -*- C++ -*- +// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#ifndef LIBBUTL_PATH_IO_HXX +#define LIBBUTL_PATH_IO_HXX + +#include + +#include + +namespace butl +{ + // This is the default path IO implementation. The reason it is + // separate is because one often wants a custom implementation. + // For example, we may want to print paths as relative to the + // working directory. Or we may want to print '~' for the home + // directory prefix. Or we may want to print dir_path with a + // trailing '/'. + // + template + inline std::basic_ostream& + operator<< (std::basic_ostream& os, basic_path const& p) + { + return os << p.string (); + } +} + +#endif // LIBBUTL_PATH_IO_HXX diff --git a/libbutl/path-map.hxx b/libbutl/path-map.hxx new file mode 100644 index 0000000..7a966ab --- /dev/null +++ b/libbutl/path-map.hxx @@ -0,0 +1,120 @@ +// file : libbutl/path-map.hxx -*- C++ -*- +// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#ifndef LIBBUTL_PATH_MAP_HXX +#define LIBBUTL_PATH_MAP_HXX + +#include // min() + +#include +#include + +namespace butl +{ + // prefix_map for filesystem paths + // + // Important: the paths should be normalized but can use different case + // on case-insensitive platforms. + // + // Note that the path's representation of POSIX root ('/') is + // inconsistent in that we have a trailing delimiter at the end of + // the path (its "proper" representation would have been an empty + // string but that would have clashed with empty paths). To work + // around this snag, this implementation, during key comparison, + // detects '/' and treats it as empty. Note that the map will + // still store the key as you have first inserted it. So if you + // want a particular representation (i.e., empty or '/'), pre- + // populate the map with it. + // + template + struct compare_prefix> + { + typedef basic_path key_type; + + typedef C delimiter_type; + typedef typename key_type::string_type string_type; + typedef typename key_type::size_type size_type; + typedef typename key_type::traits traits_type; + + explicit + compare_prefix (delimiter_type) {} + + bool + operator() (const key_type& x, const key_type& y) const + { + const string_type& xs (x.string ()); + const string_type& ys (y.string ()); + + return compare (xs.c_str (), + root (xs) ? 0 : xs.size (), + ys.c_str (), + root (ys) ? 0 : ys.size ()) < 0; + } + + bool + prefix (const key_type& p, const key_type& k) const + { + const string_type& ps (p.string ()); + const string_type& ks (k.string ()); + + return prefix (root (ps) ? string_type () : ps, + root (ks) ? string_type () : ks); + } + + protected: + bool + prefix (const string_type& p, const string_type& k) const + { + // The same code as in prefix_map but using our compare(). + // + size_type pn (p.size ()), kn (k.size ()); + return pn == 0 || // Empty key is always a prefix. + (pn <= kn && + compare (p.c_str (), pn, k.c_str (), pn == kn ? pn : pn + 1) == 0); + } + + int + compare (const C* x, size_type xn, + const C* y, size_type yn) const + { + size_type n (std::min (xn, yn)); + int r (traits_type::compare (x, n, y, n)); + + if (r == 0) + { + // Pretend there is a delimiter characters at the end of the + // shorter string. + // + char xc (xn > n ? x[n] : (xn++, traits_type::directory_separator)); + char yc (yn > n ? y[n] : (yn++, traits_type::directory_separator)); + r = traits_type::compare (&xc, 1, &yc, 1); + + // If we are still equal, then compare the lengths. + // + if (r == 0) + r = (xn == yn ? 0 : (xn < yn ? -1 : 1)); + } + + return r; + } + + static bool + root (const string_type& p) + { + return p.size () == 1 && key_type::traits::is_separator (p[0]); + } + }; + + // Note that the delimiter character is not used (is_delimiter() from + // path_traits is used instead). + // + template + using path_map = prefix_map; + + template + using dir_path_map = + prefix_map; +} + +#endif // LIBBUTL_PATH_MAP_HXX diff --git a/libbutl/path.cxx b/libbutl/path.cxx new file mode 100644 index 0000000..61088bf --- /dev/null +++ b/libbutl/path.cxx @@ -0,0 +1,395 @@ +// file : libbutl/path.cxx -*- C++ -*- +// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#include + +#ifdef _WIN32 +# include + +# include // _find*() +# include // _MAX_PATH, _wgetenv() +# include // _[w]getcwd(), _[w]chdir() +# include // SHGetFolderPath*(), CSIDL_PROFILE +# include // SUCCEEDED() +#else +# include // struct passwd, getpwuid_r() +# include // EINVAL +# include // mbstowcs(), wcstombs(), realpath(), getenv() +# include // PATH_MAX +# include // getcwd(), chdir() +# include // strlen(), strcpy() +# include // stat(), S_IS* +# include // stat + +# include +#endif + +#include +#include +#include // strcpy() + +#include + +#include // throw_*_error() +#include + +#ifndef _WIN32 +# ifndef PATH_MAX +# define PATH_MAX 4096 +# endif +#endif + +using namespace std; + +#ifdef _WIN32 +using namespace butl::win32; +#endif + +namespace butl +{ + char const* invalid_path_base:: + what () const throw () + { + return "invalid filesystem path"; + } + + // + // char + // + + template <> + LIBBUTL_EXPORT path_traits::string_type path_traits:: + current_directory () + { +#ifdef _WIN32 + char cwd[_MAX_PATH]; + if (_getcwd (cwd, _MAX_PATH) == 0) + throw_generic_error (errno); + cwd[0] = toupper (cwd[0]); // Canonicalize. +#else + char cwd[PATH_MAX]; + if (getcwd (cwd, PATH_MAX) == 0) + throw_generic_error (errno); +#endif + + return cwd; + } + + template <> + LIBBUTL_EXPORT void path_traits:: + current_directory (string_type const& s) + { +#ifdef _WIN32 + // A path like 'C:', while being a root path in our terminology, is not as + // such for Windows, that maintains current directory for each drive, and + // so "change current directory to C:" means "change the process current + // directory to current directory on the C drive". Changing it to the root + // one of the drive requires the trailing directory separator to be + // present. + // + string_type const& d (!root (s) + ? s + : string_type (s + directory_separator)); + + if (_chdir (d.c_str ()) != 0) + throw_generic_error (errno); +#else + if (chdir (s.c_str ()) != 0) + throw_generic_error (errno); +#endif + } + +#ifndef _WIN32 + static const char* + temp_directory () + { + const char* dir (nullptr); + const char* env[] = {"TMPDIR", "TMP", "TEMP", "TEMPDIR", nullptr}; + + for (auto e (env); dir == nullptr && *e != nullptr; ++e) + dir = getenv (*e); + + if (dir == nullptr) + dir = "/tmp"; + + struct stat s; + if (stat (dir, &s) != 0) + throw_generic_error (errno); + + if (!S_ISDIR (s.st_mode)) + throw_generic_error (ENOTDIR); + + return dir; + } + + static string + home () + { + if (const char* h = getenv ("HOME")) + return h; + + // Struct passwd has 5 members that will use this buffer. Two are the + // home directory and shell paths. The other three are the user login + // name, password, and real name (comment). We expect them to fit into + // PATH_MAX * 2. + // + char buf[PATH_MAX * 4]; + + passwd pw; + passwd* rpw; + + int r (getpwuid_r (getuid (), &pw, buf, sizeof (buf), &rpw)); + if (r == -1) + throw_generic_error (errno); + + if (r == 0 && rpw == nullptr) + // According to POSIX errno should be left unchanged if an entry is not + // found. + // + throw_generic_error (ENOENT); + + return pw.pw_dir; + } +#endif + + template <> + LIBBUTL_EXPORT path_traits::string_type path_traits:: + temp_directory () + { +#ifdef _WIN32 + char d[_MAX_PATH + 1]; + if (GetTempPathA (_MAX_PATH + 1, d) == 0) + throw_system_error (GetLastError ()); + + return d; +#else + return butl::temp_directory (); +#endif + } + + static atomic temp_name_count; + + template <> + LIBBUTL_EXPORT path_traits::string_type path_traits:: + temp_name (string_type const& prefix) + { + return prefix + + "-" + to_string (process::current_id ()) + + "-" + to_string (temp_name_count++); + } + + template <> + LIBBUTL_EXPORT path_traits::string_type path_traits:: + home_directory () + { +#ifndef _WIN32 + return home (); +#else + // Could be set by, e.g., MSYS and Cygwin shells. + // + if (const char* h = getenv ("HOME")) + return h; + + char h[_MAX_PATH]; + HRESULT r (SHGetFolderPathA (NULL, CSIDL_PROFILE, NULL, 0, h)); + + if (!SUCCEEDED (r)) + throw_system_error (r); + + return h; +#endif + } + +#ifndef _WIN32 + template <> + LIBBUTL_EXPORT void path_traits:: + realize (string_type& s) + { + char r[PATH_MAX]; + if (realpath (s.c_str (), r) == nullptr) + { + // @@ If there were a message in invalid_basic_path, we could have said + // what exactly is wrong with the path. + // + if (errno == EACCES || errno == ENOENT || errno == ENOTDIR) + throw invalid_basic_path (s); + else + throw_generic_error (errno); + } + + s = r; + } +#endif + + // + // wchar_t + // + + template <> + LIBBUTL_EXPORT path_traits::string_type path_traits:: + current_directory () + { +#ifdef _WIN32 + wchar_t wcwd[_MAX_PATH]; + if (_wgetcwd (wcwd, _MAX_PATH) == 0) + throw_generic_error (errno); + wcwd[0] = toupper (wcwd[0]); // Canonicalize. +#else + char cwd[PATH_MAX]; + if (getcwd (cwd, PATH_MAX) == 0) + throw_generic_error (errno); + + wchar_t wcwd[PATH_MAX]; + if (mbstowcs (wcwd, cwd, PATH_MAX) == size_type (-1)) + throw_generic_error (EINVAL); +#endif + + return wcwd; + } + + template <> + LIBBUTL_EXPORT void path_traits:: + current_directory (string_type const& s) + { +#ifdef _WIN32 + // Append the trailing directory separator for the root directory (read + // the comment in path_traits::current_directory() for + // justification). + // + string_type const& d (!root (s) + ? s + : string_type (s + directory_separator)); + + if (_wchdir (d.c_str ()) != 0) + throw_generic_error (errno); +#else + char ns[PATH_MAX + 1]; + + if (wcstombs (ns, s.c_str (), PATH_MAX) == size_type (-1)) + throw_generic_error (EINVAL); + + ns[PATH_MAX] = '\0'; + + if (chdir (ns) != 0) + throw_generic_error (errno); +#endif + } + + template <> + LIBBUTL_EXPORT path_traits::string_type path_traits:: + temp_directory () + { +#ifdef _WIN32 + wchar_t d[_MAX_PATH + 1]; + if (GetTempPathW (_MAX_PATH + 1, d) == 0) + throw_system_error (GetLastError ()); +#else + wchar_t d[PATH_MAX]; + + // The usage of mbstowcs() supposes the program's C-locale is set to the + // proper locale before the call (can be done with setlocale(LC_ALL, "...") + // call). Otherwise mbstowcs() fails with EILSEQ errno for non-ASCII + // directory paths. + // + size_t r (mbstowcs (d, butl::temp_directory (), PATH_MAX)); + + if (r == size_t (-1)) + throw_generic_error (EINVAL); + + if (r == PATH_MAX) + throw_generic_error (ENOTSUP); +#endif + + return d; + } + + template <> + LIBBUTL_EXPORT path_traits::string_type path_traits:: + temp_name (string_type const& prefix) + { + return prefix + + L"-" + to_wstring (process::current_id ()) + + L"-" + to_wstring (temp_name_count++); + } + + template <> + LIBBUTL_EXPORT path_traits::string_type path_traits:: + home_directory () + { +#ifndef _WIN32 + wchar_t d[PATH_MAX]; + size_t r (mbstowcs (d, home ().c_str (), PATH_MAX)); + + if (r == size_t (-1)) + throw_generic_error (EINVAL); + + if (r == PATH_MAX) + throw_generic_error (ENOTSUP); + + return d; +#else + // Could be set by, e.g., MSYS and Cygwin shells. + // + if (const wchar_t* h = _wgetenv (L"HOME")) + return h; + + wchar_t h[_MAX_PATH]; + HRESULT r (SHGetFolderPathW (NULL, CSIDL_PROFILE, NULL, 0, h)); + + if (!SUCCEEDED (r)) + throw_system_error (r); + + return h; +#endif + } + +#ifndef _WIN32 + template <> + LIBBUTL_EXPORT void path_traits:: + realize (string_type&) + { + assert (false); // Implement if/when needed. + } +#endif + +#ifdef _WIN32 + template <> + LIBBUTL_EXPORT bool + basic_path_append_actual_name (string& r, + const string& d, + const string& n) + { + assert (d.size () + n.size () + 1 < _MAX_PATH); + + char p[_MAX_PATH]; + strcpy (p, d.c_str ()); + p[d.size ()] = '\\'; + strcpy (p + d.size () + 1, n.c_str ()); + + // It could be that using FindFirstFile() is faster. + // + _finddata_t fi; + intptr_t h (_findfirst (p, &fi)); + + if (h == -1 && errno == ENOENT) + return false; + + if (h == -1 || _findclose (h) == -1) + throw_generic_error (errno); + + r += fi.name; + return true; + } + + template <> + LIBBUTL_EXPORT bool + basic_path_append_actual_name (wstring&, + const wstring&, + const wstring&) + { + assert (false); // Implement if/when needed. + return false; + } +#endif +} diff --git a/libbutl/path.hxx b/libbutl/path.hxx new file mode 100644 index 0000000..56e43cf --- /dev/null +++ b/libbutl/path.hxx @@ -0,0 +1,1136 @@ +// file : libbutl/path.hxx -*- C++ -*- +// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#ifndef LIBBUTL_PATH_HXX +#define LIBBUTL_PATH_HXX + +#include +#include // ptrdiff_t +#include // move(), swap() +#include +#include +#include // hash + +#include + +#include + +namespace butl +{ + // Wish list/ideas for improvements. + // + // - Ability to convert to directory/leaf/base in-place, without dynamic + // allocation. One idea is something like this: + // + // p -= "/*"; // directory + // p -= "*/"; // leaf + // p -= ".*"; // base + // + // - Faster normalize() implementation. In many cases (e.g., in build2) + // the path is either already normal or the difference is just slashes + // (i.e., there are no '.' or '..' components). So a fast path case + // might be in order. + // + // - We duplicate the interface for path and dir_path while most of it + // is common. Also, we can implicit-cast dir_path& to path& and use + // non-dir-adapted implementation (see where we call K::cast()). + // + + struct LIBBUTL_EXPORT invalid_path_base: public std::exception + { + virtual char const* + what () const throw (); + }; + + template + struct invalid_basic_path: invalid_path_base + { + using string_type = std::basic_string; + + string_type path; + + invalid_basic_path (const C* p): path (p) {} + invalid_basic_path (const string_type& p): path (p) {} + }; + + template + struct path_traits + { + using string_type = std::basic_string; + using size_type = typename string_type::size_type; + + // Canonical directory and path seperators. + // +#ifdef _WIN32 + static const C directory_separator = '\\'; + static const C path_separator = ';'; +#else + static C const directory_separator = '/'; + static C const path_separator = ':'; +#endif + + // Canonical and alternative directory separators. Canonical should be + // first. + // +#ifdef _WIN32 + static constexpr const char* const directory_separators = "\\/"; +#else + static constexpr const char* const directory_separators = "/"; +#endif + + // Directory separator tests. On some platforms there could be multiple + // seperators. For example, on Windows we check for both '/' and '\'. + // + static bool + is_separator (C c) + { +#ifdef _WIN32 + return c == '\\' || c == '/'; +#else + return c == '/'; +#endif + } + + // Return 1-based index in directory_separators string or 0 if not a + // separator. + // + static size_type + separator_index (C c) + { +#ifdef _WIN32 + return c == '\\' ? 1 : c == '/' ? 2 : 0; +#else + return c == '/' ? 1 : 0; +#endif + } + + static bool + absolute (const string_type& s) + { + return absolute (s.c_str (), s.size ()); + } + + static bool + absolute (const C* s, size_type n) + { +#ifdef _WIN32 + return n > 1 && s[1] == ':'; +#else + return n != 0 && is_separator (s[0]); +#endif + } + + static bool + current (const string_type& s) + { + return current (s.c_str (), s.size ()); + } + + static bool + current (const C* s, size_type n) + { + return n == 1 && s[0] == '.'; + } + + static bool + parent (const string_type& s) + { + return parent (s.c_str (), s.size ()); + } + + static bool + parent (const C* s, size_type n) + { + return n == 2 && s[0] == '.' && s[1] == '.'; + } + + static bool + root (const string_type& s) + { + return root (s.c_str (), s.size ()); + } + + static bool + root (const C* s, size_type n) + { +#ifdef _WIN32 + return n == 2 && s[1] == ':'; +#else + return n == 1 && is_separator (s[0]); +#endif + } + + static size_type + find_separator (string_type const& s, + size_type pos = 0, + size_type n = string_type::npos) + { + if (n == string_type::npos) + n = s.size (); + + const C* r (find_separator (s.c_str () + pos, n - pos)); + return r != nullptr ? r - s.c_str () : string_type::npos; + } + + static const C* + find_separator (const C* s, size_type n) + { + for (const C* e (s + n); s != e; ++s) + { + if (is_separator (*s)) + return s; + } + + return nullptr; + } + + static size_type + rfind_separator (string_type const& s, size_type pos = string_type::npos) + { + if (pos == string_type::npos) + pos = s.size (); + else + pos++; + + const C* r (rfind_separator (s.c_str (), pos)); + return r != nullptr ? r - s.c_str () : string_type::npos; + } + + static const C* + rfind_separator (const C* s, size_type n) + { + for (; n != 0; --n) + { + if (is_separator (s[n - 1])) + return s + n - 1; + } + + return nullptr; + } + + // Return the position of '.' or npos if there is no extension. + // + static size_type + find_extension (string_type const& s) + { + const C* r (find_extension (s.c_str (), s.size ())); + return r != nullptr ? r - s.c_str () : string_type::npos; + } + + static const C* + find_extension (const C* s, size_type n) + { + size_type i (n); + + for (; i > 0; --i) + { + C c (s[i - 1]); + + if (c == '.') + break; + + if (is_separator (c)) + { + i = 0; + break; + } + } + + // Weed out paths like ".txt" (and "/.txt") and "txt.". + // + if (i > 1 && !is_separator (s[i - 2]) && i != n) + return s + i - 1; + else + return nullptr; + } + + // Return the start of the leaf (last path component) in the path. Note + // that the leaf will include the trailing separator, if any (i.e., the + // leaf of /tmp/bar/ is bar/). + // + static size_type + find_leaf (string_type const& s) + { + const C* r (find_leaf (s.c_str (), s.size ())); + return r != nullptr ? r - s.c_str () : string_type::npos; + } + + static const C* + find_leaf (const C* s, size_type n) + { + const C* p; + return n == 0 + ? nullptr + : (p = rfind_separator (s, n - 1)) == nullptr ? s : ++p; + } + + static int + compare (string_type const& l, string_type const& r) + { + return compare (l.c_str (), l.size (), r.c_str (), r.size ()); + } + + // @@ Currently for case-insensitive filesystems (Windows) compare() + // works properly only for ASCII. + // + static int + compare (const C* l, size_type ln, const C* r, size_type rn) + { + for (size_type i (0), n (ln < rn ? ln : rn); i != n; ++i) + { +#ifdef _WIN32 + C lc (lcase (l[i])), rc (lcase (r[i])); +#else + C lc (l[i]), rc (r[i]); +#endif + if (is_separator (lc) && is_separator (rc)) + continue; + + if (lc < rc) return -1; + if (lc > rc) return 1; + } + + return ln < rn ? -1 : (ln > rn ? 1 : 0); + } + + static void + canonicalize (string_type& s) + { + //canonicalize (s.data (), s.size ()); // C++17 + + for (size_t i (0), n (s.size ()); i != n; ++i) + if (is_separator (s[i]) && s[i] != directory_separator) + s[i] = directory_separator; + } + + static void + canonicalize (C* s, size_type n) + { + for (const C* e (s + n); s != e; ++s) + if (is_separator (*s) && *s != directory_separator) + *s = directory_separator; + } + + // Get/set current working directory. Throw std::system_error to report + // the underlying OS errors. + // + static string_type + current_directory (); + + static void + current_directory (string_type const&); + + // Return the user home directory. Throw std::system_error to report the + // underlying OS errors. + // + static string_type + home_directory (); + + // Return the temporary directory. Throw std::system_error to report the + // underlying OS errors. + // + static string_type + temp_directory (); + + // Return a temporary name. The name is constructed by starting with the + // prefix followed by the process id following by a unique counter value + // inside the process (MT-safe). Throw std::system_error to report the + // underlying OS errors. + // + static string_type + temp_name (string_type const& prefix); + + // Make the path real (by calling realpath(3)). Throw invalid_basic_path + // if the path is invalid (e.g., some components do not exist) and + // std::system_error to report other underlying OS errors. + // +#ifndef _WIN32 + static void + realize (string_type&); +#endif + + // Utilities. + // +#ifdef _WIN32 + static C + tolower (C); + + static C + toupper (C); +#endif + }; + + // This implementation of a filesystem path has two types: path, which can + // represent any path (file, directory, etc.) and dir_path, which is derived + // from path. The internal representation of directories maintains a + // trailing slash. However, it is ignored in path comparison, size, and + // string spelling. For example: + // + // path p1 ("foo"); // File path. + // path p2 ("bar/"); // Directory path. + // + // path p3 (p1 / p2); // Throw: p1 is not a directory. + // path p4 (p2 / p1); // Ok, file "bar/foo". + // path p5 (p2 / p2); // Ok, directory "bar/bar/". + // + // dir_path d1 ("foo"); // Directory path "foo/". + // dir_path d2 ("bar\\"); // Directory path "bar\". + // + // dir_path d3 (d2 / d1); // "bar\\foo/" + // + // (p4 == d3); // true + // d3.string (); // "bar\\foo" + // d3.representation (); // "bar\\foo/" + // + template + class basic_path; + + template struct any_path_kind; + template struct dir_path_kind; + + using path = basic_path>; + using dir_path = basic_path>; + using invalid_path = invalid_basic_path; + + // Cast from one path kind to another. Note that no checking is performed + // (e.g., that there is a trailing slash if casting to dir_path) but the + // representation is adjusted if necessary (e.g., the trailing slash is + // added to dir_path if missing). + // + template P path_cast (const basic_path&); + template P path_cast (basic_path&&); + + // Low-level path data storage. It is also by the implementation to pass + // around initialized/valid paths. + // + template + struct path_data + { + using string_type = std::basic_string; + using size_type = typename string_type::size_type; + using difference_type = typename string_type::difference_type; + + // The idea is as follows: path_ is always the "traditional" form; that + // is, "/" for the root directory and "/tmp" (no trailing slash) for the + // rest. This means we can return/store references to path_. + // + // Then we have tsep_ ("trailing separator") which is the size difference + // between path_ and its "pure" part, that is, without any trailing + // slashes, even for "/". So: + // + // tsep_ == -1 -- trailing slash in path_ (the "/" case) + // tsep_ == 0 -- no trailing slash + // + // Finally, to represent non-root ("/") trailing slashes we use positive + // tsep_ values. In this case tsep_ is interpreted as a 1-based index in + // the path_traits::directory_separators string. + // + // Notes: + // - If path_ is empty, then tsep_ can only be 0. + // - We could have used a much narrower integer for tsep_. + // + string_type path_; + difference_type tsep_; + + size_type + _size () const {return path_.size () + (tsep_ < 0 ? -1 : 0);} + + void + _swap (path_data& d) {path_.swap (d.path_); std::swap (tsep_, d.tsep_);} + + void + _clear () {path_.clear (); tsep_ = 0;} + + // Constructors. + // + path_data (): tsep_ (0) {} + + path_data (string_type&& p, difference_type ts) + : path_ (std::move (p)), tsep_ (path_.empty () ? 0 : ts) {} + + explicit + path_data (string_type&& p) + : path_ (std::move (p)), tsep_ (0) + { + size_type n (path_.size ()), i; + + if (n != 0 && (i = path_traits::separator_index (path_[n - 1])) != 0) + { + if (n == 1) // The "/" case. + tsep_ = -1; + else + { + tsep_ = i; + path_.pop_back (); + } + } + } + }; + + template + struct any_path_kind + { + class base_type: protected path_data // In essence protected path_data. + { + protected: + using path_data::path_data; + + base_type () = default; + base_type (path_data&& d): path_data (std::move (d)) {} + }; + + using dir_type = basic_path>; + + // Init and cast. + // + // If exact is true, return the path if the initialization was successful, + // that is, the passed string is a valid path and no modifications were + // necessary. Otherwise, return the empty object and leave the passed + // string untouched. + // + // If extact is false, throw invalid_path if the string is not a valid + // path (e.g., uses an unsupported path notation on Windows). + // + using data_type = path_data; + using string_type = std::basic_string; + + static data_type + init (string_type&&, bool exact = false); + + static void + cast (data_type&) {} + }; + + template + struct dir_path_kind + { + using base_type = basic_path>; + using dir_type = basic_path>; + + // Init and cast. + // + using data_type = path_data; + using string_type = std::basic_string; + + static data_type + init (string_type&&, bool exact = false); + + static void + cast (data_type&); + }; + + template + class basic_path: public K::base_type + { + public: + using string_type = std::basic_string; + using size_type = typename string_type::size_type; + using difference_type = typename string_type::difference_type; + using traits = path_traits; + + struct iterator; + using reverse_iterator = std::reverse_iterator; + + using base_type = typename K::base_type; + using dir_type = typename K::dir_type; + + // Create a special empty path. Note that we have to provide our own + // implementation rather than using '=default' to make clang allow + // default-initialized const instances of this type. + // + basic_path () {} + + // Constructors that initialize a path from a string argument throw the + // invalid_path exception if the string is not a valid path (e.g., uses + // unsupported path notations on Windows). + // + explicit + basic_path (C const* s): base_type (K::init (s)) {} + + basic_path (C const* s, size_type n) + : base_type (K::init (string_type (s, n))) {} + + explicit + basic_path (string_type s): base_type (K::init (std::move (s))) {} + + basic_path (const string_type& s, size_type n) + : base_type (K::init (string_type (s, 0, n))) {} + + basic_path (const string_type& s, size_type p, size_type n) + : base_type (K::init (string_type (s, p, n))) {} + + // Create a path using the exact string representation. If the string is + // not a valid path or if it would require a modification, then empty path + // is created instead and the passed string rvalue-reference is left + // untouched. Note that no exception is thrown if the path is invalid. See + // also representation()&& below. + // + enum exact_type {exact}; + basic_path (string_type&& s, exact_type) + : base_type (K::init (std::move (s), true)) {} + + // Create a path as a sub-path identified by the [begin, end) range of + // components. + // + basic_path (const iterator& begin, const iterator& end); + + basic_path (const reverse_iterator& rbegin, const reverse_iterator& rend) + : basic_path (rend.base (), rbegin.base ()) {} + + void + swap (basic_path& p) {this->_swap (p);} + + void + clear () {this->_clear ();} + + // Get/set current working directory. Throw std::system_error to report + // the underlying OS errors. + // + static dir_type + current_directory () {return dir_type (traits::current_directory ());} + + static void + current_directory (basic_path const&); + + // Return the user home directory. Throw std::system_error to report the + // underlying OS errors. + // + static dir_type + home_directory () {return dir_type (traits::home_directory ());} + + // Return the temporary directory. Throw std::system_error to report the + // underlying OS errors. + // + static dir_type + temp_directory () {return dir_type (traits::temp_directory ());} + + // Return a temporary path. The path is constructed by starting with the + // temporary directory and then appending a path component consisting of + // the prefix followed by the process id following by a unique counter + // value inside the process. Throw std::system_error to report the + // underlying OS errors. + // + static basic_path + temp_path (const string_type& prefix) + { + return temp_directory () / traits::temp_name (prefix); + } + + public: + bool + empty () const {return this->path_.empty ();} + + // Note that size does not include the trailing separator except for + // the root case. + // + size_type + size () const {return this->path_.size ();} + + // Return true if this path doesn't have any directories. Note that "/foo" + // is not a simple path (it is "foo" in root directory) while "/" is (it + // is the root directory). + // + bool + simple () const; + + bool + absolute () const; + + bool + relative () const {return !absolute ();} + + bool + root () const; + + // The following predicates return true for the "." and ".." paths, + // respectively. Note that the result doesn't depend on the presence or + // spelling of the trailing directory separator. + // + // Also note that the path must literally match the specified values rather + // than be semantically current or parent. For example for paths "foo/.." + // or "bar/../.." the predicates return false. + // + bool + current () const; + + bool + parent () const; + + // Test, based on the presence/absence of the trailing separator, if the + // path is to a directory. + // + bool + to_directory () const {return this->tsep_ != 0;} + + // Return true if *this is a sub-path of the specified path (i.e., + // the specified path is a prefix). Expects both paths to be + // normalized. Note that this function returns true if the paths + // are equal. Empty path is considered a prefix of any path. + // + bool + sub (const basic_path&) const; + + // Return true if *this is a super-path of the specified path (i.e., + // the specified path is a suffix). Expects both paths to be + // normalized. Note that this function returns true if the paths + // are equal. Empty path is considered a suffix of any path. + // + bool + sup (const basic_path&) const; + + public: + // Return the path without the directory part. Leaf of a directory is + // itself a directory (contains trailing slash). Leaf of a root is the + // path itself. + // + basic_path + leaf () const; + + // Return the path without the specified directory part. Throws + // invalid_path if the directory is not a prefix of *this. Expects both + // paths to be normalized. + // + basic_path + leaf (basic_path const&) const; + + // Return the directory part of the path or empty path if there is no + // directory. Directory of a root is an empty path. + // + dir_type + directory () const; + + // Return the directory part of the path without the specified leaf part. + // Throws invalid_path if the leaf is not a suffix of *this. Expects both + // paths to be normalized. + // + dir_type + directory (basic_path const&) const; + + // Return the root directory of the path or empty path if the directory is + // not absolute. + // + dir_type + root_directory () const; + + // Return the path without the extension, if any. + // + basic_path + base () const; + + // Return the extension or NULL if not present. If not empty, then the + // result starts with the character past the dot. + // + string_type + extension () const; + + // Return the in-place pointer to extension or NULL if not present. If not + // NULL, then the result points to the character past the dot but it is + // legal to decrement it once to obtain the value with the dot. + // + const C* + extension_cstring () const; + + // Return a path relative to the specified path that is equivalent + // to *this. Throws invalid_path if a relative path cannot be derived + // (e.g., paths are on different drives on Windows). + // + basic_path + relative (basic_path) const; + + // Iteration over path components. + // + public: + struct iterator + { + using value_type = string_type ; + using pointer = string_type*; + using reference = string_type ; + using size_type = typename string_type::size_type; + using difference_type = std::ptrdiff_t ; + using iterator_category = std::bidirectional_iterator_tag ; + + using data_type = path_data; + + iterator (): p_ (nullptr) {} + iterator (const data_type* p, size_type b, size_type e) + : p_ (p), b_ (b), e_ (e) {} + + iterator& + operator++ () + { + const string_type& s (p_->path_); + + // Position past trailing separator, if any. + // + b_ = e_ != string_type::npos && ++e_ != s.size () + ? e_ + : string_type::npos; + + // Find next trailing separator. + // + e_ = b_ != string_type::npos ? traits::find_separator (s, b_) : b_; + + return *this; + } + + iterator& + operator-- () + { + const string_type& s (p_->path_); + + // Find the new end. + // + e_ = b_ == string_type::npos // Past end? + ? (traits::is_separator (s.back ()) // Have trailing slash? + ? s.size () - 1 + : string_type::npos) + : b_ - 1; + + // Find the new begin. + // + b_ = e_ == 0 // Empty component? + ? string_type::npos + : traits::rfind_separator (s, e_ != string_type::npos ? e_ - 1 : e_); + + b_ = b_ == string_type::npos // First component? + ? 0 + : b_ + 1; + + return *this; + } + + iterator + operator++ (int) {iterator r (*this); operator++ (); return r;} + + iterator + operator-- (int) {iterator r (*this); operator-- (); return r;} + + string_type + operator* () const + { + return string_type (p_->path_, + b_, + e_ != string_type::npos ? e_ - b_ : e_); + } + + // Return the directory separator after this component or '\0' if there + // is none. This, for example, can be used to determine if the last + // component is a directory. + // + C + separator () const + { + return e_ != string_type::npos + ? p_->path_[e_] + : (p_->tsep_ > 0 + ? path_traits::directory_separators[p_->tsep_ - 1] + : 0); + } + + pointer operator-> () const = delete; + + friend bool + operator== (const iterator& x, const iterator& y) + { + return x.p_ == y.p_ && x.b_ == y.b_ && x.e_ == y.e_; + } + + friend bool + operator!= (const iterator& x, const iterator& y) {return !(x == y);} + + private: + friend class basic_path; + + // b - first character of component + // e - separator after component (or npos if none) + // b == npos && e == npos - one past last component (end) + // + const data_type* p_; + size_type b_; + size_type e_; + }; + + iterator begin () const; + iterator end () const; + + reverse_iterator rbegin () const {return reverse_iterator (end ());} + reverse_iterator rend () const {return reverse_iterator (begin ());} + + public: + // Canonicalize the path and return *this. Canonicalization involves + // converting all directory separators to the canonical form. Note that + // multiple directory separators are not collapsed. + // + basic_path& + canonicalize (); + + // Normalize the path and return *this. Normalization involves collapsing + // the '.' and '..' directories if possible, collapsing multiple + // directory separators, and converting all directory separators to the + // canonical form. If cur_empty is true then collapse relative paths + // representing the current directory (for example, '.', './', 'foo/..') + // to an empty path. Otherwise convert it to the canonical form (./ on + // POSIX systems). Note that a non-empty path cannot become an empty one + // in the latter case. + // + // If actual is true, then for case-insensitive filesystems obtain the + // actual spelling of the path. Only an absolute path can be actualized. + // If a path component does not exist, then its (and all subsequent) + // spelling is unchanged. This is a potentially expensive operation. + // Normally one can assume that "well-known" directories (current, home, + // etc.) are returned in their actual spelling. + // + basic_path& + normalize (bool actual = false, bool cur_empty = false); + + // Make the path absolute using the current directory unless it is already + // absolute. Return *this. + // + basic_path& + complete (); + + // Make the path real, that is, absolute, normalized, and with resolved + // symlinks. On POSIX systems this is accomplished with the call to + // realpath(3). On Windows -- complete() and normalize(). Return *this. + // + basic_path& + realize (); + + public: + basic_path& + operator/= (basic_path const&); + + // Combine a single path component (must not contain directory separators) + // as a string, without first constructing the path object. + // + basic_path& + operator/= (string_type const&); + + basic_path& + operator/= (const C*); + + // Append to the end of the path (normally an extension, etc). + // + basic_path& + operator+= (string_type const&); + + basic_path& + operator+= (const C*); + + basic_path& + operator+= (C); + + void + append (const C*, size_type); + + // Note that comparison is case-insensitive if the filesystem is not + // case-sensitive (e.g., Windows). And it ignored trailing slashes + // except for the root case. + // + template + int + compare (const basic_path& x) const { + return traits::compare (this->path_, x.path_);} + + public: + // Path string and representation. The string does not contain the + // trailing slash except for the root case. In other words, it is the + // "traditional" spelling of the path that can be passed to system calls, + // etc. Representation, on the other hand is the "precise" spelling that + // includes the trailing slash, if any. One cannot always round-trip a + // path using string() but can using representation(). Note also that + // representation() returns a copy while string() returns a (tracking) + // reference. + // + const string_type& + string () const& {return this->path_;} + + string_type + representation () const&; + + // Moves the underlying path string out of the path object. The path + // object becomes empty. Usage: std::move (p).string (). + // + string_type + string () && {string_type r; r.swap (this->path_); return r;} + + string_type + representation () &&; + + // Trailing directory separator or '\0' if there is none. + // + C + separator () const; + + // As above but return it as a (potentially empty) string. + // + string_type + separator_string () const; + + // If possible, return a POSIX version of the path. For example, for a + // Windows path in the form foo\bar this function will return foo/bar. If + // it is not possible to create a POSIX version for this path (e.g., + // c:\foo), this function will throw the invalid_path exception. + // + string_type + posix_string () const&; + + string_type + posix_representation () const&; + + string_type + posix_string () &&; + + string_type + posix_representation () &&; + + // Implementation details. + // + protected: + using data_type = path_data; + + // Direct initialization without init()/cast(). + // + explicit + basic_path (data_type&& d): base_type (std::move (d)) {} + + using base_type::_size; + + // Common implementation for operator/= and operator+=. + // + void + combine (const C*, size_type, difference_type); + + void + combine (const C*, size_type); + + // Friends. + // + template + friend class basic_path; + + template + friend basic_path + path_cast_impl (const basic_path&, basic_path*); + + template + friend basic_path + path_cast_impl (basic_path&&, basic_path*); + }; + + template + inline basic_path + operator/ (const basic_path& x, const basic_path& y) + { + basic_path r (x); + r /= y; + return r; + } + + template + inline basic_path + operator+ (const basic_path& x, const std::basic_string& y) + { + basic_path r (x); + r += y; + return r; + } + + template + inline basic_path + operator+ (const basic_path& x, const C* y) + { + basic_path r (x); + r += y; + return r; + } + + template + inline basic_path + operator+ (const basic_path& x, C y) + { + basic_path r (x); + r += y; + return r; + } + + template + inline bool + operator== (const basic_path& x, const basic_path& y) + { + return x.compare (y) == 0; + } + + template + inline bool + operator!= (const basic_path& x, const basic_path& y) + { + return !(x == y); + } + + template + inline bool + operator< (const basic_path& x, const basic_path& y) + { + return x.compare (y) < 0; + } + + // Additional operators for certain path kind combinations. + // + template + inline basic_path> + operator/ (const basic_path>& x, + const basic_path>& y) + { + basic_path> r (x); + r /= y; + return r; + } + + + // Note that the result of (foo / "bar") is always a path, even if foo + // is dir_path. An idiom to force it dir_path is this: + // + // dir_path foo_bar (dir_path (foo) /= "bar"); + // + template + inline basic_path> + operator/ (const basic_path& x, const std::basic_string& y) + { + basic_path> r (x); + r /= y; + return r; + } + + template + inline basic_path> + operator/ (const basic_path& x, const C* y) + { + basic_path> r (x); + r /= y; + return r; + } + + // For operator<< (ostream) see the path-io header. +} + +namespace std +{ + template + struct hash>: hash> + { + using argument_type = butl::basic_path; + + size_t + operator() (const butl::basic_path& p) const noexcept + { + return hash>::operator() (p.string ()); + } + }; +} + +#include +#include + +#endif // LIBBUTL_PATH_HXX diff --git a/libbutl/path.ixx b/libbutl/path.ixx new file mode 100644 index 0000000..c140d1e --- /dev/null +++ b/libbutl/path.ixx @@ -0,0 +1,508 @@ +// file : libbutl/path.ixx -*- C++ -*- +// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#ifdef _WIN32 +# include // towlower(), towupper() +#endif + +namespace butl +{ +#ifdef _WIN32 + template <> + inline char path_traits:: + tolower (char c) + { + return lcase (c); + } + + template <> + inline wchar_t path_traits:: + tolower (wchar_t c) + { + return std::towlower (c); + } + + template <> + inline char path_traits:: + toupper (char c) + { + return ucase (c); + } + + template <> + inline wchar_t path_traits:: + toupper (wchar_t c) + { + return std::towupper (c); + } +#endif + + template + inline basic_path + path_cast_impl (const basic_path& p, basic_path*) + { + typename basic_path::data_type d ( + typename basic_path::string_type (p.path_), p.tsep_); + K1::cast (d); + return basic_path (std::move (d)); + } + + template + inline basic_path + path_cast_impl (basic_path&& p, basic_path*) + { + typename basic_path::data_type d (std::move (p.path_), p.tsep_); + K1::cast (d); + return basic_path (std::move (d)); + } + + template + inline P + path_cast (const basic_path& p) + { + return path_cast_impl (p, static_cast (nullptr)); + } + + template + inline P + path_cast (basic_path&& p) + { + return path_cast_impl (std::move (p), static_cast (nullptr)); + } + + template + inline bool basic_path:: + simple () const + { + return empty () || + traits::rfind_separator (this->path_, _size () - 1) == string_type::npos; + } + + template + inline bool basic_path:: + absolute () const + { + return traits::absolute (this->path_); + } + + template + inline bool basic_path:: + current () const + { + return traits::current (this->path_); + } + + template + inline bool basic_path:: + parent () const + { + return traits::parent (this->path_); + } + + template + inline bool basic_path:: + root () const + { + return traits::root (this->path_); + } + + template + inline bool basic_path:: + sub (const basic_path& p) const + { + // The thinking here is that we can use the full string representations + // (including the trailing slash in "/"). + // + const string_type& ps (p.path_); + size_type pn (ps.size ()); + + if (pn == 0) + return true; + + const string_type& s (this->path_); + size_type n (s.size ()); + + // The second condition guards against the /foo-bar vs /foo case. + // + return n >= pn && + traits::compare (s.c_str (), pn, ps.c_str (), pn) == 0 && + (traits::is_separator (ps.back ()) || // p ends with a separator + n == pn || // *this == p + traits::is_separator (s[pn])); // next char is a separator + } + + template + inline bool basic_path:: + sup (const basic_path& p) const + { + // The thinking here is that we can use the full string representations + // (including the trailing slash in "/"). + // + const string_type& ps (p.path_); + size_type pn (ps.size ()); + + if (pn == 0) + return true; + + const string_type& s (this->path_); + size_type n (s.size ()); + + // The second condition guards against the /foo-bar vs bar case. + // + return n >= pn && + traits::compare (s.c_str () + n - pn, pn, ps.c_str (), pn) == 0 && + (n == pn || // *this == p + traits::is_separator (s[n - pn - 1])); // previous char is a separator + } + + template + inline basic_path basic_path:: + leaf () const + { + const string_type& s (this->path_); + size_type n (_size ()); + + size_type p (n != 0 + ? traits::rfind_separator (s, n - 1) + : string_type::npos); + + return p != string_type::npos + ? basic_path (data_type (string_type (s, p + 1), this->tsep_)) + : *this; + } + + template + inline typename basic_path::dir_type basic_path:: + directory () const + { + const string_type& s (this->path_); + size_type n (_size ()); + + size_type p (n != 0 + ? traits::rfind_separator (s, n - 1) + : string_type::npos); + + return p != string_type::npos + ? dir_type (data_type (string_type (s, 0, p + 1))) // Include slash. + : dir_type (); + } + + template + inline auto basic_path:: + begin () const -> iterator + { + const string_type& s (this->path_); + + size_type b (s.empty () ? string_type::npos : 0); + size_type e (b == 0 ? traits::find_separator (s) : b); + + return iterator (this, b, e); + } + + template + inline auto basic_path:: + end () const -> iterator + { + return iterator (this, string_type::npos, string_type::npos); + } + + template + inline basic_path:: + basic_path (const iterator& b, const iterator& e) + : base_type ( + b == e + ? data_type () + // We need to include the trailing separator but it is implied if + // e == end(). + // + : (e.b_ != string_type::npos + ? data_type (string_type (b.p_->path_, b.b_, e.b_ - b.b_)) + : data_type (string_type (b.p_->path_, b.b_), b.p_->tsep_))) + { + //assert (b.p_ == e.p_); + } + + template + inline basic_path& basic_path:: + canonicalize () + { + traits::canonicalize (this->path_); + + if (this->tsep_ > 1) // Non-canonical trailing separator. + this->tsep_ = 1; + + return *this; + } + + template + inline basic_path& basic_path:: + complete () + { + if (relative ()) + *this = current_directory () / *this; + + return *this; + } + + template + inline basic_path& basic_path:: + realize () + { +#ifdef _WIN32 + // This is not exactly the semantics of realpath(3). In particular, we + // don't fail if the path does not exist. But we could have seeing that + // we actualize it. + // + complete (); + normalize (true); +#else + traits::realize (this->path_); // Note: we retain the trailing slash. +#endif + return *this; + } + + template + inline typename basic_path::dir_type basic_path:: + root_directory () const + { +#ifdef _WIN32 + // Note: on Windows we may have "c:" but still need to return "c:\". + // + const string_type& s (this->path_); + + return absolute () + ? dir_type ( + s.size () > 2 + ? data_type (string_type (s, 0, 3)) + : data_type (string_type (s), this->tsep_ != 0 ? this->tsep_ : 1)) + : dir_type (); +#else + return absolute () + ? dir_type (data_type ("/", -1)) + : dir_type (); +#endif + + } + + template + inline basic_path basic_path:: + base () const + { + const string_type& s (this->path_); + size_type p (traits::find_extension (s)); + + return p != string_type::npos + ? basic_path (data_type (string_type (s, 0, p), this->tsep_)) + : *this; + } + + template + inline typename basic_path::string_type basic_path:: + extension () const + { + const string_type& s (this->path_); + size_type p (traits::find_extension (s)); + return p != string_type::npos + ? string_type (s.c_str () + p + 1) + : string_type (); + } + + template + inline const C* basic_path:: + extension_cstring () const + { + const string_type& s (this->path_); + size_type p (traits::find_extension (s)); + return p != string_type::npos ? s.c_str () + p + 1 : nullptr; + } + +#ifndef _WIN32 + template + inline typename basic_path::string_type basic_path:: + posix_string () const& + { + return string (); + } + + template + inline typename basic_path::string_type basic_path:: + posix_string () && + { + return std::move (*this).string (); + } + + template + inline typename basic_path::string_type basic_path:: + posix_representation () const& + { + return representation (); + } + + template + inline typename basic_path::string_type basic_path:: + posix_representation () && + { + return std::move (*this).representation (); + } +#endif + + template + inline void basic_path:: + combine (const C* r, size_type rn, difference_type rts) + { + //assert (rn != 0); + + string_type& l (this->path_); + difference_type& ts (this->tsep_); + + // Handle the separator. LHS should be empty or already have one. + // + switch (ts) + { + case 0: if (!l.empty ()) throw invalid_basic_path (l); break; + case -1: break; // Already in the string. + default: l += path_traits::directory_separators[ts - 1]; + } + + l.append (r, rn); + ts = rts; // New trailing separator from RHS. + } + + template + inline void basic_path:: + combine (const C* r, size_type rn) + { + // If we do (dir_path / path) then we will end up with path. What should + // we end up if we do (dir_path / "foo") vs (dir_path / "foo/")? We cannot + // choose at runtime what kind of path to return. One (elaborate) option + // would be to handle the trailing slash but also call K::cast() so that + // dir_path gets the canonical trailing slash if one wasn't there. + // + // For now we won't allow the slash and will always add the canonical one + // for dir_path (via cast()). + // + if (traits::find_separator (r, rn) != nullptr) + throw invalid_basic_path (r); + + combine (r, rn, 0); + K::cast (*this); + } + + template + inline basic_path& basic_path:: + operator/= (basic_path const& r) + { + if (r.absolute () && !empty ()) // Allow ('' / '/foo'). + throw invalid_basic_path (r.path_); + + if (!r.empty ()) + combine (r.path_.c_str (), r.path_.size (), r.tsep_); + + return *this; + } + + template + inline basic_path& basic_path:: + operator/= (string_type const& r) + { + if (size_type rn = r.size ()) + combine (r.c_str (), rn); + + return *this; + } + + template + inline basic_path& basic_path:: + operator/= (const C* r) + { + if (size_type rn = string_type::traits_type::length (r)) + combine (r, rn); + + return *this; + } + + template + inline void basic_path:: + append (const C* r, size_type rn) + { + //assert (this->tsep_ != -1); // Append to root? + this->path_.append (r, rn); + } + + template + inline basic_path& basic_path:: + operator+= (string_type const& s) + { + append (s.c_str (), s.size ()); + return *this; + } + + template + inline basic_path& basic_path:: + operator+= (const C* s) + { + append (s, string_type::traits_type::length (s)); + return *this; + } + + template + inline basic_path& basic_path:: + operator+= (C c) + { + append (&c, 1); + return *this; + } + + template + inline auto basic_path:: + representation () const& -> string_type + { + string_type r (this->path_); + + if (this->tsep_ > 0) + r += path_traits::directory_separators[this->tsep_ - 1]; + + return r; + } + + template + inline auto basic_path:: + representation () && -> string_type + { + string_type r; + r.swap (this->path_); + + if (this->tsep_ > 0) + r += path_traits::directory_separators[this->tsep_ - 1]; + + return r; + } + + template + inline C basic_path:: + separator () const + { + return (this->tsep_ == 0 ? 0 : + this->tsep_ == -1 ? this->path_[0] : + path_traits::directory_separators[this->tsep_ - 1]); + } + + template + inline auto basic_path:: + separator_string () const -> string_type + { + C c (separator ()); + return c == 0 ? string_type () : string_type (1, c); + } + + template + inline void dir_path_kind:: + cast (data_type& d) + { + // Add trailing slash if one isn't already there. + // + if (!d.path_.empty () && d.tsep_ == 0) + d.tsep_ = 1; // Canonical separator is always first. + } +} diff --git a/libbutl/path.txx b/libbutl/path.txx new file mode 100644 index 0000000..17673f8 --- /dev/null +++ b/libbutl/path.txx @@ -0,0 +1,377 @@ +// file : libbutl/path.txx -*- C++ -*- +// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#include +#include + +#ifdef _WIN32 +# include // replace() +#endif + +namespace butl +{ + template + basic_path basic_path:: + leaf (basic_path const& d) const + { + size_type dn (d.path_.size ()); + + if (dn == 0) + return *this; + + const string_type& s (this->path_); + + if (!sub (d)) + throw invalid_basic_path (s); + + // If there is implied trailing slash, add it to count. Unless it is + // "matched" by the implied slash on the other side. + // + if (d.tsep_ > 0 && dn < s.size ()) + dn++; + + // Preserve trailing slash. + // + return basic_path (data_type (string_type (s, dn, s.size () - dn), + this->tsep_)); + } + + template + typename basic_path::dir_type basic_path:: + directory (basic_path const& l) const + { + size_type ln (l.path_.size ()); + + const string_type& s (this->path_); + + if (ln == 0) + { + if (this->tsep_ == 0) // Must be a directory. + throw invalid_basic_path (s); + + return dir_type (data_type (string_type (s), this->tsep_)); + } + + if (!sup (l)) + throw invalid_basic_path (s); + + return dir_type ( + data_type (string_type (s, 0, s.size () - ln))); // Include slash. + } + +#ifdef _WIN32 + template + typename basic_path::string_type basic_path:: + posix_string () const& + { + if (absolute ()) + throw invalid_basic_path (this->path_); + + string_type r (string ()); + replace (r.begin (), r.end (), '\\', '/'); + return r; + } + + template + typename basic_path::string_type basic_path:: + posix_string () && + { + if (absolute ()) + throw invalid_basic_path (this->path_); + + string_type r (std::move (*this).string ()); + replace (r.begin (), r.end (), '\\', '/'); + return r; + } + + template + typename basic_path::string_type basic_path:: + posix_representation () const& + { + if (absolute ()) + throw invalid_basic_path (this->path_); + + string_type r (representation ()); + replace (r.begin (), r.end (), '\\', '/'); + return r; + } + + template + typename basic_path::string_type basic_path:: + posix_representation () && + { + if (absolute ()) + throw invalid_basic_path (this->path_); + + string_type r (std::move (*this).representation ()); + replace (r.begin (), r.end (), '\\', '/'); + return r; + } +#endif + + template + basic_path basic_path:: + relative (basic_path d) const + { + dir_type r; + + for (;; d = d.directory ()) + { + if (sub (d)) + break; + + r /= ".."; + + // Roots of the paths do not match. + // + if (d.root ()) + throw invalid_basic_path (this->path_); + } + + return r / leaf (d); + } + +#ifdef _WIN32 + // Find the actual spelling of a name in the specified dir. If the name is + // found, append it to the result and return true. Otherwise, return false. + // Throw system_error in case of other failures. Result and dir can be the + // same instance. + // + template + bool + basic_path_append_actual_name (std::basic_string& result, + const std::basic_string& dir, + const std::basic_string& name); +#endif + + template + basic_path& basic_path:: + normalize (bool actual, bool cur_empty) + { + if (empty ()) + return *this; + + bool abs (absolute ()); + assert (!actual || abs); // Only absolue can be actualized. + + string_type& s (this->path_); + difference_type& ts (this->tsep_); + + typedef std::vector paths; + paths ps; + + bool tsep (ts != 0); // Trailing directory separator. + { + size_type n (_size ()); + + for (size_type b (0), e (traits::find_separator (s, 0, n)); + ; + e = traits::find_separator (s, b, n)) + { + ps.push_back ( + string_type (s, b, (e == string_type::npos ? n : e) - b)); + + if (e == string_type::npos) + break; + + ++e; + + // Skip consecutive directory separators. + // + while (e != n && traits::is_separator (s[e])) + ++e; + + if (e == n) + break; + + b = e; + } + + // If the last component is "." or ".." then this is a directory. + // + if (!tsep) + { + const string_type& l (ps.back ()); + if (traits::current (l) || traits::parent (l)) + tsep = true; + } + } + + // Collapse "." and "..". + // + paths r; + + for (typename paths::iterator i (ps.begin ()), e (ps.end ()); i != e; ++i) + { + string_type& s (*i); + + if (traits::current (s)) + continue; + + // If '..' then pop the last directory from r unless it is '..'. + // + if (traits::parent (s) && !r.empty () && !traits::parent (r.back ())) + { + // Cannot go past the root directory. + // + if (abs && r.size () == 1) + throw invalid_basic_path (this->path_); + + r.pop_back (); + continue; + } + + r.push_back (std::move (s)); + } + + // Reassemble the path, actualizing each component if requested. + // + string_type p; + + for (typename paths::const_iterator b (r.begin ()), i (b), e (r.end ()); + i != e;) + { +#ifdef _WIN32 + if (actual) + { + if (i == b) + { + // The first component (the drive letter) we have to actualize + // ourselves. Capital seems to be canonical. This is, for example, + // what getcwd() returns. + // + p = *i; + p[0] = traits::toupper (p[0]); + } + else + { + if (!basic_path_append_actual_name (p, p, *i)) + { + p += *i; + actual = false; // Ignore for all subsequent components. + } + } + } + else +#endif + p += *i; + + if (++i != e) + p += traits::directory_separator; + } + + if (tsep) + { + if (p.empty ()) + { + // Distinguish "/"-empty and "."-empty. + // + if (abs) + { + p += traits::directory_separator; + ts = -1; + } + else if (!cur_empty) // Collapse to canonical current directory. + { + p.assign (1, '.'); + ts = 1; // Canonical separator is always first. + } + else // Collapse to empty path. + ts = 0; + } + else + ts = 1; // Canonical separator is always first. + } + else + ts = 0; + + s.swap (p); + return *this; + } + + template + void basic_path:: + current_directory (basic_path const& p) + { + const string_type& s (p.string ()); + + if (s.empty ()) + throw invalid_basic_path (s); + + traits::current_directory (s); + } + + template + auto any_path_kind:: + init (string_type&& s, bool exact) -> data_type + { + using size_type = typename string_type::size_type; + using difference_type = typename string_type::difference_type; + + size_type n (s.size ()); + +#ifdef _WIN32 + // We do not support any special Windows path name notations like in C:abc, + // /, \, /abc, \abc, \\?\c:\abc, \\server\abc and \\?\UNC\server\abc (more + // about them in "Naming Files, Paths, and Namespaces" MSDN article). + // + if ((n > 2 && s[1] == ':' && s[2] != '\\' && s[2] != '/') || + (n > 0 && (s[0] == '\\' || s[0] == '/'))) + { + if (exact) + return data_type (); + else + throw invalid_basic_path (s); + } +#endif + + // Strip trailing slashes. + // + size_type m (n), di (0); + for (size_type i; + m != 0 && (i = path_traits::separator_index (s[m - 1])) != 0; + --m) di = i; + + difference_type ts (0); + if (size_t k = n - m) + { + // We can only accomodate one trailing slash in the exact mode. + // + if (exact && k > 1) + return data_type (); + + if (m == 0) // The "/" case. + { + ++m; // Keep one slash in the string. + ts = -1; + } + else + ts = di; + + s.resize (m); + } + + return data_type (std::move (s), ts); + } + + template + auto dir_path_kind:: + init (string_type&& s, bool exact) -> data_type + { + // If we don't already have the separator then this can't be the exact + // initialization. + // + if (exact && !s.empty () && !path_traits::is_separator (s.back ())) + return data_type (); + + data_type r (any_path_kind::init (std::move (s), exact)); + + // Unless the result is empty, make sure we have the trailing slash. + // + if (!r.path_.empty () && r.tsep_ == 0) + r.tsep_ = 1; // Canonical separator is always first. + + return r; + } +} diff --git a/libbutl/prefix-map.hxx b/libbutl/prefix-map.hxx new file mode 100644 index 0000000..0404f72 --- /dev/null +++ b/libbutl/prefix-map.hxx @@ -0,0 +1,138 @@ +// file : libbutl/prefix-map.hxx -*- C++ -*- +// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#ifndef LIBBUTL_PREFIX_MAP_HXX +#define LIBBUTL_PREFIX_MAP_HXX + +#include +#include +#include // move() +#include // min() + +namespace butl +{ + // A map of hierarchical "paths", e.g., 'foo.bar' or 'foo/bar' with + // the ability to retrieve a range of entries that have a specific + // prefix. The '.' and '/' above are the delimiter characters. + // + // Note that as a special rule, the default implementation of + // compare_prefix treats empty key as everyone's prefix even if + // the paths don't start with the delimiter (useful to represent + // a "root path"). + // + // Implementation-wise, the idea is to pretend that each key ends + // with the delimiter. This way we automatically avoid matching + // 'foobar' as having a prefix 'foo'. + // + template + struct compare_prefix; + + template + struct compare_prefix> + { + typedef std::basic_string K; + + typedef C delimiter_type; + typedef typename K::size_type size_type; + typedef typename K::traits_type traits_type; + + explicit + compare_prefix (delimiter_type d): d_ (d) {} + + bool + operator() (const K& x, const K& y) const + { + return compare (x.c_str (), x.size (), y.c_str (), y.size ()) < 0; + } + + bool + prefix (const K& p, const K& k) const + { + size_type pn (p.size ()), kn (k.size ()); + return pn == 0 || // Empty key is always a prefix. + (pn <= kn && + compare (p.c_str (), pn, k.c_str (), pn == kn ? pn : pn + 1) == 0); + } + + protected: + int + compare (const C* x, size_type xn, + const C* y, size_type yn) const + { + size_type n (std::min (xn, yn)); + int r (traits_type::compare (x, y, n)); + + if (r == 0) + { + // Pretend there is the delimiter characters at the end of the + // shorter string. + // + char xc (xn > n ? x[n] : (xn++, d_)); + char yc (yn > n ? y[n] : (yn++, d_)); + r = traits_type::compare (&xc, &yc, 1); + + // If we are still equal, then compare the lengths. + // + if (r == 0) + r = (xn == yn ? 0 : (xn < yn ? -1 : 1)); + } + + return r; + } + + private: + delimiter_type d_; + }; + + template + struct prefix_map_common: M + { + typedef M map_type; + typedef typename map_type::key_type key_type; + typedef typename map_type::value_type value_type; + typedef typename map_type::key_compare compare_type; + typedef typename compare_type::delimiter_type delimiter_type; + + typedef typename map_type::iterator iterator; + typedef typename map_type::const_iterator const_iterator; + + explicit + prefix_map_common (delimiter_type d) + : map_type (compare_type (d)) {} + + prefix_map_common (std::initializer_list i, delimiter_type d) + : map_type (std::move (i), compare_type (d)) {} + + std::pair + find_prefix (const key_type&); + + std::pair + find_prefix (const key_type&) const; + }; + + template ::delimiter_type D> + struct prefix_map_impl: prefix_map_common + { + typedef typename prefix_map_common::value_type value_type; + + prefix_map_impl (): prefix_map_common (D) {} + prefix_map_impl (std::initializer_list i) + : prefix_map_common (std::move (i), D) {} + }; + + template ::delimiter_type D> + using prefix_map = prefix_map_impl>, D>; + + template ::delimiter_type D> + using prefix_multimap = + prefix_map_impl>, D>; +} + +#include + +#endif // LIBBUTL_PREFIX_MAP_HXX diff --git a/libbutl/prefix-map.txx b/libbutl/prefix-map.txx new file mode 100644 index 0000000..f3cd29f --- /dev/null +++ b/libbutl/prefix-map.txx @@ -0,0 +1,39 @@ +// file : libbutl/prefix-map.txx -*- C++ -*- +// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +namespace butl +{ + template + auto prefix_map_common:: + find_prefix (const key_type& k) -> std::pair + { + std::pair r; + r.first = this->lower_bound (k); + + for (r.second = r.first; r.second != this->end (); ++r.second) + { + if (!this->key_comp ().prefix (k, r.second->first)) + break; + } + + return r; + } + + template + auto prefix_map_common:: + find_prefix (const key_type& k) const -> + std::pair + { + std::pair r; + r.first = this->lower_bound (k); + + for (r.second = r.first; r.second != this->end (); ++r.second) + { + if (!this->key_comp ().prefix (k, r.second->first)) + break; + } + + return r; + } +} diff --git a/libbutl/process-details.hxx b/libbutl/process-details.hxx new file mode 100644 index 0000000..b078cbb --- /dev/null +++ b/libbutl/process-details.hxx @@ -0,0 +1,49 @@ +// file : libbutl/process-details.hxx -*- C++ -*- +// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#ifndef LIBBUTL_PROCESS_DETAILS_HXX +#define LIBBUTL_PROCESS_DETAILS_HXX + +#include + +#include +#if defined(__cpp_lib_shared_mutex) || defined(__cpp_lib_shared_timed_mutex) +# include +#endif + +namespace butl +{ +#if defined(__cpp_lib_shared_mutex) + using shared_mutex = std::shared_mutex; + using ulock = std::unique_lock; + using slock = std::shared_lock; +#elif defined(__cpp_lib_shared_timed_mutex) + using shared_mutex = std::shared_timed_mutex; + using ulock = std::unique_lock; + using slock = std::shared_lock; +#else + // Because we have this fallback, we need to be careful not to create + // multiple shared locks in the same thread. + // + struct shared_mutex: std::mutex + { + using mutex::mutex; + + void lock_shared () { lock (); } + void try_lock_shared () { try_lock (); } + void unlock_shared () { unlock (); } + }; + + using ulock = std::unique_lock; + using slock = ulock; +#endif + + // Mutex that is acquired to make a sequence of operations atomic in regards + // to child process spawning. Must be aquired for exclusive access for child + // process startup, and for shared access otherwise. Defined in process.cxx. + // + extern shared_mutex process_spawn_mutex; +} + +#endif // LIBBUTL_PROCESS_DETAILS_HXX diff --git a/libbutl/process-io.hxx b/libbutl/process-io.hxx new file mode 100644 index 0000000..b70080c --- /dev/null +++ b/libbutl/process-io.hxx @@ -0,0 +1,28 @@ +// file : libbutl/process-io.hxx -*- C++ -*- +// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#ifndef LIBBUTL_PROCESS_IO_HXX +#define LIBBUTL_PROCESS_IO_HXX + +#include + +#include + +namespace butl +{ + inline std::ostream& + operator<< (std::ostream& o, const process_path& p) + { + return o << p.recall_string (); + } + + inline std::ostream& + operator<< (std::ostream& o, const process_args& a) + { + process::print (o, a.argv, a.argc); + return o; + } +} + +#endif // LIBBUTL_PROCESS_IO_HXX diff --git a/libbutl/process-run.cxx b/libbutl/process-run.cxx new file mode 100644 index 0000000..ce5ab20 --- /dev/null +++ b/libbutl/process-run.cxx @@ -0,0 +1,32 @@ +// file : libbutl/process-run.cxx -*- C++ -*- +// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#include + +#include // exit() +#include // cerr + +using namespace std; + +namespace butl +{ + process + process_start (const dir_path& cwd, + const process_path& pp, + const char* cmd[], + int in, + int out, + int err) + { + try + { + return process (cwd.string ().c_str (), pp, cmd, in, out, err); + } + catch (const process_child_error& e) + { + cerr << "unable to execute " << cmd[0] << ": " << e << endl; + exit (1); + } + } +} diff --git a/libbutl/process-run.txx b/libbutl/process-run.txx new file mode 100644 index 0000000..28c44cf --- /dev/null +++ b/libbutl/process-run.txx @@ -0,0 +1,226 @@ +// file : libbutl/process-run.txx -*- C++ -*- +// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#include +#include // move(), forward(), index_sequence + +namespace butl +{ + inline int process_stdin (int v) {assert (v >= 0); return v;} + inline int process_stdout (int v) {assert (v >= 0); return v;} + inline int process_stderr (int v) {assert (v >= 0); return v;} + + inline int + process_stdin (const auto_fd& v) {assert (v.get () >= 0); return v.get ();} + + inline int + process_stdout (const auto_fd& v) {assert (v.get () >= 0); return v.get ();} + + inline int + process_stderr (const auto_fd& v) {assert (v.get () >= 0); return v.get ();} + + LIBBUTL_EXPORT process + process_start (const dir_path& cwd, + const process_path& pp, + const char* cmd[], + int in, + int out, + int err); + + template + inline const char* + process_args_as_wrapper (V& v, const T& x, std::string& storage) + { + process_args_as (v, x, storage); + return nullptr; + } + + template + process + process_start (std::index_sequence, + const C& cmdc, + I&& in, + O&& out, + E&& err, + const dir_path& cwd, + const process_path& pp, + A&&... args) + { + // Map stdin/stdout/stderr arguments to their integer values, as expected + // by the process constructor. + // + int in_i (process_stdin (std::forward (in))); + int out_i (process_stdout (std::forward (out))); + int err_i (process_stderr (std::forward (err))); + + // Construct the command line array. + // + const std::size_t args_size (sizeof... (args)); + + small_vector cmd; + cmd.push_back (pp.recall_string ()); + + std::string storage[args_size != 0 ? args_size : 1]; + + const char* dummy[] = { + nullptr, process_args_as_wrapper (cmd, args, storage[index])... }; + + cmd.push_back (dummy[0]); // NULL (and get rid of unused warning). + + cmdc (cmd.data (), cmd.size ()); + + // @@ Do we need to make sure certain fd's are closed before calling + // wait()? Is this only the case with pipes? Needs thinking. + + return process_start (cwd, pp, cmd.data (), in_i, out_i, err_i); + } + + template + inline process + process_start (const C& cmdc, + I&& in, + O&& out, + E&& err, + const dir_path& cwd, + const process_path& pp, + A&&... args) + { + return process_start (std::index_sequence_for (), + cmdc, + std::forward (in), + std::forward (out), + std::forward (err), + cwd, + pp, + std::forward (args)...); + } + + template + inline process + process_start (I&& in, + O&& out, + E&& err, + const dir_path& cwd, + const P& p, + A&&... args) + { + return process_start ([] (const char* [], std::size_t) {}, + std::forward (in), + std::forward (out), + std::forward (err), + cwd, + process::path_search (p, true), + std::forward (args)...); + } + + template + inline process + process_start (const C& cmdc, + I&& in, + O&& out, + E&& err, + const dir_path& cwd, + const P& p, + A&&... args) + { + return process_start (cmdc, + std::forward (in), + std::forward (out), + std::forward (err), + cwd, + process::path_search (p, true), + std::forward (args)...); + } + + template + inline process_exit + process_run (const C& cmdc, + I&& in, + O&& out, + E&& err, + const dir_path& cwd, + const process_path& pp, + A&&... args) + { + process pr ( + process_start (cmdc, + std::forward (in), + std::forward (out), + std::forward (err), + cwd, + pp, + std::forward (args)...)); + + pr.wait (); + return *pr.exit; + } + + template + inline process_exit + process_run (I&& in, + O&& out, + E&& err, + const dir_path& cwd, + const P& p, + A&&... args) + { + return process_run ([] (const char* [], std::size_t) {}, + std::forward (in), + std::forward (out), + std::forward (err), + cwd, + process::path_search (p, true), + std::forward (args)...); + } + + template + inline process_exit + process_run (const C& cmdc, + I&& in, + O&& out, + E&& err, + const dir_path& cwd, + const P& p, + A&&... args) + { + return process_run (cmdc, + std::forward (in), + std::forward (out), + std::forward (err), + cwd, + process::path_search (p, true), + std::forward (args)...); + } +} diff --git a/libbutl/process.cxx b/libbutl/process.cxx new file mode 100644 index 0000000..08abc22 --- /dev/null +++ b/libbutl/process.cxx @@ -0,0 +1,1440 @@ +// file : libbutl/process.cxx -*- C++ -*- +// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#include + +#ifndef _WIN32 +# include // execvp, fork, dup2, pipe, chdir, *_FILENO, getpid +# include // waitpid +# include // _stat +# include // _stat(), S_IS* +#else +# include + +# include // EnumProcessModules(), etc + +# include // _get_osfhandle(), _close() +# include // _MAX_PATH +# include // stat +# include // stat(), S_IS* + +# ifdef _MSC_VER // Unlikely to be fixed in newer versions. +# define S_ISREG(m) (((m) & S_IFMT) == S_IFREG) + +# define STDIN_FILENO 0 +# define STDOUT_FILENO 1 +# define STDERR_FILENO 2 +# endif // _MSC_VER + +# include // getenv(), __argv[] + +# include +#endif + +#include + +#include // ios_base::failure +#include +#include // size_t +#include // strlen(), strchr() +#include // move() +#include + +#include // casecmp() +#include // fdnull() +#include + +#include + +using namespace std; + +#ifdef _WIN32 +using namespace butl::win32; +#endif + +namespace butl +{ + shared_mutex process_spawn_mutex; + + // process + // + static process_path + path_search (const char*, const dir_path&); + + process_path process:: + path_search (const char* f, bool init, const dir_path& fb) + { + process_path r (try_path_search (f, init, fb)); + + if (r.empty ()) + throw process_error (ENOENT); + + return r; + } + + process_path process:: + try_path_search (const char* f, bool init, const dir_path& fb) + { + process_path r (butl::path_search (f, fb)); + + if (!init && !r.empty ()) + { + path& rp (r.recall); + r.initial = (rp.empty () ? (rp = path (f)) : rp).string ().c_str (); + } + + return r; + } + + void process:: + print (ostream& o, const char* const args[], size_t n) + { + size_t m (0); + const char* const* p (args); + do + { + if (m != 0) + o << " |"; // Trailing space will be added inside the loop. + + for (m++; *p != nullptr; p++, m++) + { + if (p != args) + o << ' '; + + // Quote if empty or contains spaces. + // + bool q (**p == '\0' || strchr (*p, ' ') != nullptr); + + if (q) + o << '"'; + + o << *p; + + if (q) + o << '"'; + } + + if (m < n) // Can we examine the next element? + { + p++; + m++; + } + + } while (*p != nullptr); + } + + process:: + process (const char* cwd, + const process_path& pp, const char* args[], + process& in, int out, int err) + : process (cwd, pp, args, in.in_ofd.get (), out, err) + { + assert (in.in_ofd.get () != -1); // Should be a pipe. + in.in_ofd.reset (); // Close it on our side. + } + +#ifndef _WIN32 + + static process_path + path_search (const char* f, const dir_path& fb) + { + // Note that there is a similar version for Win32. + + typedef path::traits traits; + + size_t fn (strlen (f)); + + process_path r (f, path (), path ()); // Make sure it is not empty. + path& rp (r.recall); + path& ep (r.effect); + + // Check that the file exists and has at least one executable bit set. + // This way we get a bit closer to the "continue search on EACCES" + // semantics (see below). + // + auto exists = [] (const char* f) -> bool + { + struct stat si; + return (stat (f, &si) == 0 && + S_ISREG (si.st_mode) && + (si.st_mode & (S_IEXEC | S_IXGRP | S_IXOTH)) != 0); + }; + + auto search = [&ep, f, fn, &exists] (const char* d, + size_t dn, + bool norm = false) -> bool + { + string s (move (ep).string ()); // Reuse buffer. + + if (dn != 0) + { + s.assign (d, dn); + + if (!traits::is_separator (s.back ())) + s += traits::directory_separator; + } + + s.append (f, fn); + ep = path (move (s)); // Move back into result. + + if (norm) + ep.normalize (); + + return exists (ep.string ().c_str ()); + }; + + // If there is a directory component in the file, then the PATH search + // does not apply. If the path is relative, then prepend CWD. In both + // cases make sure the file actually exists. + // + if (traits::find_separator (f, fn) != nullptr) + { + if (traits::absolute (f, fn)) + { + if (exists (f)) + return r; + } + else + { + const string& d (traits::current_directory ()); + + if (search (d.c_str (), d.size (), true)) + return r; + } + + return process_path (); + } + + // The search order is documented in exec(3). Some of the differences + // compared to exec*p() functions: + // + // 1. If there no PATH, we don't default to current directory/_CS_PATH. + // 2. We do not continue searching on EACCES from execve(). + // 3. We do not execute via default shell on ENOEXEC from execve(). + // + for (const char* b (getenv ("PATH")), *e; + b != nullptr; + b = (e != nullptr ? e + 1 : e)) + { + e = strchr (b, traits::path_separator); + + // Empty path (i.e., a double colon or a colon at the beginning or end + // of PATH) means search in the current dirrectory. Silently skip + // invalid paths. + // + try + { + if (search (b, e != nullptr ? e - b : strlen (b))) + return r; + } + catch (const invalid_path&) + { + } + } + + // If we were given a fallback, try that. + // + if (!fb.empty ()) + { + if (search (fb.string ().c_str (), fb.string ().size ())) + { + // In this case we have to set the recall path. And we know from + // search() implementation that it will be the same as effective. + // Which means we can just move effective to recall. + // + rp.swap (ep); + + return r; + } + } + + // Did not find anything. + // + return process_path (); + } + + process:: + process (const char* cwd, + const process_path& pp, const char* args[], + int in, int out, int err) + { + fdpipe out_fd; + fdpipe in_ofd; + fdpipe in_efd; + + auto fail = [] (bool child) + { + if (child) + throw process_child_error (errno); + else + throw process_error (errno); + }; + + auto open_pipe = [] () -> fdpipe + { + try + { + return fdopen_pipe (); + } + catch (const ios_base::failure&) + { + // Translate to process_error. + // + // For old versions of g++ (as of 4.9) ios_base::failure is not derived + // from system_error and so we cannot recover the errno value. On the + // other hand the only possible values are EMFILE and ENFILE. Lets use + // EMFILE as the more probable. This is a temporary code after all. + // + throw process_error (EMFILE); + } + }; + + auto open_null = [] () -> auto_fd + { + try + { + return fdnull (); + } + catch (const ios_base::failure& e) + { + // Translate to process_error. + // + // For old versions of g++ (as of 4.9) ios_base::failure is not derived + // from system_error and so we cannot recover the errno value. Lets use + // EIO in this case. This is a temporary code after all. + // + const system_error* se (dynamic_cast (&e)); + + throw process_error (se != nullptr + ? se->code ().value () + : EIO); + } + }; + + // If we are asked to open null (-2) then open "half-pipe". + // + if (in == -1) + out_fd = open_pipe (); + else if (in == -2) + out_fd.in = open_null (); + + if (out == -1) + in_ofd = open_pipe (); + else if (out == -2) + in_ofd.out = open_null (); + + if (err == -1) + in_efd = open_pipe (); + else if (err == -2) + in_efd.out = open_null (); + + { + ulock l (process_spawn_mutex); // Will not be released in child. + handle = fork (); + + if (handle == -1) + fail (false); + + if (handle == 0) + { + // Child. + // + // Duplicate the user-supplied (fd > -1) or the created pipe descriptor + // to the standard stream descriptor (read end for STDIN_FILENO, write + // end otherwise). Close the pipe afterwards. + // + auto duplicate = [&fail] (int sd, int fd, fdpipe& pd) + { + if (fd == -1 || fd == -2) + fd = (sd == STDIN_FILENO ? pd.in : pd.out).get (); + + assert (fd > -1); + if (dup2 (fd, sd) == -1) + fail (true); + + pd.in.reset (); // Silently close. + pd.out.reset (); // Silently close. + }; + + if (in != STDIN_FILENO) + duplicate (STDIN_FILENO, in, out_fd); + + // If stdout is redirected to stderr (out == 2) we need to duplicate it + // after duplicating stderr to pickup the proper fd. Otherwise keep the + // "natual" order of duplicate() calls, so if stderr is redirected to + // stdout it picks up the proper fd as well. + // + if (out == STDERR_FILENO) + { + if (err != STDERR_FILENO) + duplicate (STDERR_FILENO, err, in_efd); + + if (out != STDOUT_FILENO) + duplicate (STDOUT_FILENO, out, in_ofd); + } + else + { + if (out != STDOUT_FILENO) + duplicate (STDOUT_FILENO, out, in_ofd); + + if (err != STDERR_FILENO) + duplicate (STDERR_FILENO, err, in_efd); + } + + // Change current working directory if requested. + // + if (cwd != nullptr && *cwd != '\0' && chdir (cwd) != 0) + fail (true); + + if (execv (pp.effect_string (), const_cast (&args[0])) == -1) + fail (true); + } + } // Release the lock in parent. + + assert (handle != 0); // Shouldn't get here unless in the parent process. + + this->out_fd = move (out_fd.out); + this->in_ofd = move (in_ofd.in); + this->in_efd = move (in_efd.in); + } + + bool process:: + wait (bool ie) + { + if (handle != 0) + { + int es; + int r (waitpid (handle, &es, 0)); + handle = 0; // We have tried. + + if (r == -1) + { + // If ignore errors then just leave exit nullopt, so it has "no exit + // information available" semantics. + // + if (!ie) + throw process_error (errno); + } + else + exit = process_exit (es, process_exit::as_status); + } + + return exit && exit->normal () && exit->code () == 0; + } + + bool process:: + try_wait () + { + if (handle != 0) + { + int es; + int r (waitpid (handle, &es, WNOHANG)); + + if (r == 0) // Not exited yet. + return false; + + handle = 0; // We have tried. + + if (r == -1) + throw process_error (errno); + + exit = process_exit (es, process_exit::as_status); + } + + return true; + } + + process::id_type process:: + current_id () + { + return getpid (); + } + + // process_exit + // + process_exit:: + process_exit (code_type c) + // + // Note that such an initialization is not portable as POSIX doesn't + // specify the bits layout for the value returned by waitpid(). However + // for the major POSIX systems (Linux, FreeBSD, MacOS) it is the + // following: + // + // [0, 7) - terminating signal + // [7, 8) - coredump flag + // [8, 16) - program exit code + // + // Also the lowest 7 bits value is used to distinguish the normal and + // abnormal process terminations. If it is zero then the program exited + // normally and the exit code is available. + // + : status (c << 8) + { + } + + // Make sure the bits layout we stick to (read above) correlates to the W*() + // macros implementations for the current platform. + // + namespace details + { + // W* macros may require an argument to be lvalue (for example for glibc). + // + static const process_exit::status_type status_code (0xFF00); + + static_assert (WIFEXITED (status_code) && + WEXITSTATUS (status_code) == 0xFF && + !WIFSIGNALED (status_code), + "unexpected process exit status bits layout"); + } + + bool process_exit:: + normal () const + { + return WIFEXITED (status); + } + + process_exit::code_type process_exit:: + code () const + { + assert (normal ()); + return WEXITSTATUS (status); + } + + int process_exit:: + signal () const + { + assert (!normal ()); + + // WEXITSTATUS() and WIFSIGNALED() can both return false for the same + // status, so we have neither exit code nor signal. We return zero for + // such a case. + // + return WIFSIGNALED (status) ? WTERMSIG (status) : 0; + } + + bool process_exit:: + core () const + { + assert (!normal ()); + + // Not a POSIX macro (available on Linux, FreeBSD, MacOS). + // +#ifdef WCOREDUMP + return WIFSIGNALED (status) && WCOREDUMP (status); +#else + return false; +#endif + } + + string process_exit:: + description () const + { + assert (!normal ()); + + // It would be convenient to use strsignal() or sys_siglist[] to obtain a + // signal name for the number, but the function is not thread-safe and the + // array is not POSIX. So we will use the custom mapping of POSIX signals + // (IEEE Std 1003.1-2008, 2016 Edition) to their names (as they appear in + // glibc). + // + switch (signal ()) + { + case SIGHUP: return "hangup (SIGHUP)"; + case SIGINT: return "interrupt (SIGINT)"; + case SIGQUIT: return "quit (SIGQUIT)"; + case SIGILL: return "illegal instruction (SIGILL)"; + case SIGABRT: return "aborted (SIGABRT)"; + case SIGFPE: return "floating point exception (SIGFPE)"; + case SIGKILL: return "killed (SIGKILL)"; + case SIGSEGV: return "segmentation fault (SIGSEGV)"; + case SIGPIPE: return "broken pipe (SIGPIPE)"; + case SIGALRM: return "alarm clock (SIGALRM)"; + case SIGTERM: return "terminated (SIGTERM)"; + case SIGUSR1: return "user defined signal 1 (SIGUSR1)"; + case SIGUSR2: return "user defined signal 2 (SIGUSR2)"; + case SIGCHLD: return "child exited (SIGCHLD)"; + case SIGCONT: return "continued (SIGCONT)"; + case SIGSTOP: return "stopped (process; SIGSTOP)"; + case SIGTSTP: return "stopped (typed at terminal; SIGTSTP)"; + case SIGTTIN: return "stopped (tty input; SIGTTIN)"; + case SIGTTOU: return "stopped (tty output; SIGTTOU)"; + case SIGBUS: return "bus error (SIGBUS)"; + + // Unavailabe on MacOS 10.11. + // +#ifdef SIGPOLL + case SIGPOLL: return "I/O possible (SIGPOLL)"; +#endif + + case SIGPROF: return "profiling timer expired (SIGPROF)"; + case SIGSYS: return "bad system call (SIGSYS)"; + case SIGTRAP: return "trace/breakpoint trap (SIGTRAP)"; + case SIGURG: return "urgent I/O condition (SIGURG)"; + case SIGVTALRM: return "virtual timer expired (SIGVTALRM)"; + case SIGXCPU: return "CPU time limit exceeded (SIGXCPU)"; + case SIGXFSZ: return "file size limit exceeded (SIGXFSZ)"; + + case 0: return "status unknown"; + default: return "unknown signal " + to_string (signal ()); + } + } + +#else // _WIN32 + + static process_path + path_search (const char* f, const dir_path& fb) + { + // Note that there is a similar version for Win32. + + typedef path::traits traits; + + size_t fn (strlen (f)); + + // Unless there is already the .exe/.bat extension, then we will need to + // add it. + // + bool ext; + { + const char* e (traits::find_extension (f, fn)); + ext = (e == nullptr || + (casecmp (e, ".exe") != 0 && + casecmp (e, ".bat") != 0 && + casecmp (e, ".cmd") != 0)); + } + + process_path r (f, path (), path ()); // Make sure it is not empty. + path& rp (r.recall); + path& ep (r.effect); + + // Check that the file exists. Since the executable mode is set according + // to the file extension, we don't check for that. + // + auto exists = [] (const char* f) -> bool + { + struct _stat si; + return _stat (f, &si) == 0 && S_ISREG (si.st_mode); + }; + + // Check with extensions: .exe, .cmd, and .bat. + // + auto exists_ext = [&exists] (string& s) -> bool + { + size_t i (s.size () + 1); // First extension letter. + + s += ".exe"; + if (exists (s.c_str ())) + return true; + + s[i] = 'c'; s[i + 1] = 'm'; s[i + 2] = 'd'; + if (exists (s.c_str ())) + return true; + + s[i] = 'b'; s[i + 1] = 'a'; s[i + 2] = 't'; + return exists (s.c_str ()); + }; + + auto search = [&ep, f, fn, ext, &exists, &exists_ext] ( + const char* d, size_t dn, bool norm = false) -> bool + { + string s (move (ep).string ()); // Reuse buffer. + + if (dn != 0) + { + s.assign (d, dn); + + if (!traits::is_separator (s.back ())) + s += traits::directory_separator; + } + + s.append (f, fn); + ep = path (move (s)); // Move back into result. + + if (norm) + ep.normalize (); + + if (!ext) + return exists (ep.string ().c_str ()); + + // Try with the extensions. + // + s = move (ep).string (); + bool e (exists_ext (s)); + ep = path (move (s)); + return e; + }; + + // If there is a directory component in the file, then the PATH search + // does not apply. If the path is relative, then prepend CWD. In both + // cases we may still need to append the extension and make sure the file + // actually exists. + // + if (traits::find_separator (f, fn) != nullptr) + { + if (traits::absolute (f, fn)) + { + bool e; + if (!ext) + e = exists (r.effect_string ()); + else + { + string s (f, fn); + e = exists_ext (s); + ep = path (move (s)); + } + + if (e) + return r; + } + else + { + const string& d (traits::current_directory ()); + + if (search (d.c_str (), d.size (), true)) // Appends extension. + return r; + } + + return process_path (); + } + + // The search order is documented in CreateProcess(). First we look in the + // directory of the parent executable. + // + { + char d[_MAX_PATH + 1]; + DWORD n (GetModuleFileName (NULL, d, _MAX_PATH + 1)); + + if (n == 0 || n == _MAX_PATH + 1) // Failed or truncated. + throw process_error (last_error_msg ()); + + const char* p (traits::rfind_separator (d, n)); + assert (p != nullptr); + + if (search (d, p - d + 1)) // Include trailing slash. + { + // In this case we have to set the recall path. + // + // Note that the directory we have extracted is always absolute but + // the parent's recall path (argv[0]) might be relative. It seems, + // ideally, we would want to use parent's argv[0] dir (if any) to form + // the recall path. In particular, if the parent has no directory, + // then it means it was found via the standard search (e.g., PATH) and + // then so should the child. + // + // How do we get the parent's argv[0]? Luckily, here is __argv on + // Windows. + // + const char* d (__argv[0]); + size_t n (strlen (d)); + if (const char* p = traits::rfind_separator (d, n)) + { + string s (d, p - d + 1); // Include trailing slash. + s.append (f, fn); + rp = path (move (s)); + + // If recall is the same as effective, then set effective to empty. + // + if (rp == ep) + ep.clear (); + } + + return r; + } + } + + // Next look in the current working directory. Crazy, I know. + // + // The recall path is the same as initial, though it might not be a bad + // idea to prepend .\ for clarity. + // + { + const string& d (traits::current_directory ()); + + if (search (d.c_str (), d.size ())) + return r; + } + + // Now search in PATH. Recall is unchanged. + // + for (const char* b (getenv ("PATH")), *e; + b != nullptr; + b = (e != nullptr ? e + 1 : e)) + { + e = strchr (b, traits::path_separator); + + // Empty path (i.e., a double colon or a colon at the beginning or end + // of PATH) means search in the current directory. Silently skip invalid + // paths. + // + try + { + if (search (b, e != nullptr ? e - b : strlen (b))) + return r; + } + catch (const invalid_path&) + { + } + } + + // Finally, if we were given a fallback, try that. This case is similar to + // searching in the parent executable's directory. + // + if (!fb.empty ()) + { + // I would have been nice to preserve trailing slash (by using + // representation() instead of string()), but that would involve a + // copy. Oh, well, can't always win. + // + if (search (fb.string ().c_str (), fb.string ().size ())) + { + // In this case we have to set the recall path. At least here we got + // to keep the original slash. + // + rp = fb; + rp /= f; + + // If recall is the same as effective, then set effective to empty. + // + if (rp == ep) + ep.clear (); + + return r; + } + } + + // Did not find anything. + // + return process_path (); + } + + class auto_handle + { + public: + explicit + auto_handle (HANDLE h = INVALID_HANDLE_VALUE) noexcept: handle_ (h) {} + + auto_handle (const auto_handle&) = delete; + auto_handle& operator= (const auto_handle&) = delete; + + ~auto_handle () noexcept {reset ();} + + HANDLE + get () const noexcept {return handle_;} + + HANDLE + release () noexcept + { + HANDLE r (handle_); + handle_ = INVALID_HANDLE_VALUE; + return r; + } + + void + reset (HANDLE h = INVALID_HANDLE_VALUE) noexcept + { + if (handle_ != INVALID_HANDLE_VALUE) + { + bool r (CloseHandle (handle_)); + + // The valid process, thread or file handle that has no IO operations + // being performed on it should close successfully, unless something + // is severely damaged. + // + assert (r); + } + + handle_ = h; + } + + private: + HANDLE handle_; + }; + + // Make handles inheritable. The process_spawn_mutex must be pre-acquired for + // exclusive access. Revert handles inheritability state in destructor. + // + // There is a period of time when the process ctor makes file handles it + // passes to the child to be inheritable, that otherwise are not inheritable + // by default. During this time these handles can also be inherited by other + // (irrelevant) child processed spawned from other threads. That can lead to + // some unwanted consequences, such as inability to delete a file + // corresponding to such a handle until all childs, that the handle leaked + // into, terminate. To prevent this behavior the specific sequence of steps + // (that involves making handles inheritable, spawning process and reverting + // handles to non-inheritable state back) will be performed after aquiring + // the process_spawn_mutex (that is released afterwards). + // + class inheritability_guard + { + public: + // Require the proof that the mutex is pre-acquired for exclusive access. + // + inheritability_guard (const ulock&) {} + + ~inheritability_guard () + { + for (auto h: handles_) + inheritable (h, false); // Can't throw. + } + + void + inheritable (HANDLE h) + { + inheritable (h, true); // Can throw. + handles_.push_back (h); + } + + private: + void + inheritable (HANDLE h, bool state) + { + if (!SetHandleInformation ( + h, HANDLE_FLAG_INHERIT, state ? HANDLE_FLAG_INHERIT : 0)) + { + if (state) + throw process_error (last_error_msg ()); + + // We should be able to successfully reset the HANDLE_FLAG_INHERIT flag + // that we successfully set, unless something is severely damaged. + // + assert (false); + } + } + + private: + small_vector handles_; + }; + + process:: + process (const char* cwd, + const process_path& pp, const char* args[], + int in, int out, int err) + { + // Figure out if this is a batch file since running them requires starting + // cmd.exe and passing the batch file as an argument (see CreateProcess() + // for deails). + // + const char* batch (nullptr); + { + const char* p (pp.effect_string ()); + const char* e (path::traits::find_extension (p, strlen (p))); + if (e != nullptr && (casecmp (e, ".bat") == 0 || + casecmp (e, ".cmd") == 0)) + { + batch = getenv ("COMSPEC"); + + if (batch == nullptr) + batch = "C:\\Windows\\System32\\cmd.exe"; + } + } + + fdpipe out_fd; + fdpipe in_ofd; + fdpipe in_efd; + + auto open_pipe = [] () -> fdpipe + { + try + { + return fdopen_pipe (); + } + catch (const ios_base::failure&) + { + // Translate to process_error. + // + // For old versions of g++ (as of 4.9) ios_base::failure is not derived + // from system_error and so we cannot recover the errno value. On the + // other hand the only possible values are EMFILE and ENFILE. Lets use + // EMFILE as the more probable. Also let's make no distinction for VC. + // This is a temporary code after all. + // + throw process_error (EMFILE); + } + }; + + auto fail = [](const char* m = nullptr) + { + throw process_error (m == nullptr ? last_error_msg () : m); + }; + + auto open_null = [] () -> auto_fd + { + // Note that we are using a faster, temporary file-based emulation of + // NUL since we have no way of making sure the child buffers things + // properly (and by default they seem no to). + // + try + { + return fdnull (true); + } + catch (const ios_base::failure& e) + { + // Translate to process_error. + // + // For old versions of g++ (as of 4.9) ios_base::failure is not derived + // from system_error and so we cannot recover the errno value. Lets use + // EIO in this case. This is a temporary code after all. + // + const system_error* se (dynamic_cast (&e)); + + throw process_error (se != nullptr + ? se->code ().value () + : EIO); + } + }; + + // If we are asked to open null (-2) then open "half-pipe". + // + if (in == -1) + out_fd = open_pipe (); + else if (in == -2) + out_fd.in = open_null (); + + if (out == -1) + in_ofd = open_pipe (); + else if (out == -2) + in_ofd.out = open_null (); + + if (err == -1) + in_efd = open_pipe (); + else if (err == -2) + in_efd.out = open_null (); + + // Create the process. + // + + // Serialize the arguments to string. + // + string cmd_line; + { + auto append = [&cmd_line] (const string& a) + { + if (!cmd_line.empty ()) + cmd_line += ' '; + + // On Windows we need to protect values with spaces using quotes. + // Since there could be actual quotes in the value, we need to escape + // them. + // + bool quote (a.empty () || a.find (' ') != string::npos); + + if (quote) + cmd_line += '"'; + + for (size_t i (0); i < a.size (); ++i) + { + if (a[i] == '"') + cmd_line += "\\\""; + else + cmd_line += a[i]; + } + + if (quote) + cmd_line += '"'; + }; + + if (batch != nullptr) + { + append (batch); + append ("/c"); + append (pp.effect_string ()); + } + + for (const char* const* p (args + (batch != nullptr ? 1 : 0)); + *p != 0; + ++p) + append (*p); + } + + // Prepare other process information. + // + STARTUPINFO si; + PROCESS_INFORMATION pi; + memset (&si, 0, sizeof (STARTUPINFO)); + memset (&pi, 0, sizeof (PROCESS_INFORMATION)); + + si.cb = sizeof (STARTUPINFO); + si.dwFlags |= STARTF_USESTDHANDLES; + + { + ulock l (process_spawn_mutex); + inheritability_guard ig (l); + + // Resolve file descriptor to HANDLE and make sure it is inherited. Note + // that the handle is closed either when CloseHandle() is called for it + // or when _close() is called for the associated file descriptor. Make + // sure that either the original file descriptor or the resulting HANDLE + // is closed but not both of them. + // + auto get_osfhandle = [&fail, &ig] (int fd) -> HANDLE + { + HANDLE h (reinterpret_cast (_get_osfhandle (fd))); + if (h == INVALID_HANDLE_VALUE) + fail ("unable to obtain file handle"); + + // Make the handle inheritable by the child unless it is already + // inheritable. + // + DWORD f; + if (!GetHandleInformation (h, &f)) + fail (); + + // Note that the flag check is essential as SetHandleInformation() + // fails for standard handles and their duplicates. + // + if ((f & HANDLE_FLAG_INHERIT) == 0) + ig.inheritable (h); + + return h; + }; + + si.hStdInput = in == -1 || in == -2 + ? get_osfhandle (out_fd.in.get ()) + : (in == STDIN_FILENO + ? GetStdHandle (STD_INPUT_HANDLE) + : get_osfhandle (in)); + + si.hStdOutput = out == -1 || out == -2 + ? get_osfhandle (in_ofd.out.get ()) + : (out == STDOUT_FILENO + ? GetStdHandle (STD_OUTPUT_HANDLE) + : get_osfhandle (out)); + + si.hStdError = err == -1 || err == -2 + ? get_osfhandle (in_efd.out.get ()) + : (err == STDERR_FILENO + ? GetStdHandle (STD_ERROR_HANDLE) + : get_osfhandle (err)); + + // Perform standard stream redirection if requested. + // + if (err == STDOUT_FILENO) + si.hStdError = si.hStdOutput; + else if (out == STDERR_FILENO) + si.hStdOutput = si.hStdError; + + if (err == STDIN_FILENO || + out == STDIN_FILENO || + in == STDOUT_FILENO || + in == STDERR_FILENO) + fail ("invalid file descriptor"); + + // Ready for some "Fun with Windows"(TM)? Here is what's in today's + // episode: MSYS2 (actually, Cygwin) tries to emulate POSIX fork() on + // Win32 via some pretty heavy hackery. As a result it makes a bunch of + // assumptions such as that the child process will have the same virtual + // memory position as the parent and that nobody interferes in its + // child-parent dance. + // + // This, however, doesn't always pan out: for reasons unknown Windows + // sometimes decides to start the child somewhere else (or, as Cygwin + // FAQ puts it: "sometimes Windows sets up a process environment that is + // even more hostile to fork() than usual"). Also things like Windows + // Defender (collectively called Big List Of Dodgy Apps/BLODA in Cygwin + // speak) do interfere in all kinds of ways. + // + // We also observe another issue that seem related: if we run multiple + // MSYS2-based applications in parallel (either from the same process + // or from several processes), then they sometimes terminate abnormally + // (but quietly, without printing any of the cygheap/fork diagnostics) + // with status 0xC0000142 (STATUS_DLL_INIT_FAILED). + // + // Cygwin FAQ suggests the following potential solutions: + // + // 1. Restart the process hoping things will pan out next time around. + // + // 2. Eliminate/disable programs from BLODA (disabling Defender helps + // a lot but not entirely). + // + // 3. Apparently switching from 32 to 64-bit should help (less chance + // for address collisions). + // + // 4. Rebase all the Cygwin DLLs (this is a topic for a another episode). + // + // To add to this list, we also have our own remedy (which is not + // generally applicable): + // + // 5. Make sure processes that you start don't need to fork. A good + // example would be tar that runs gz/bzip2/xz. Instead, we start and + // pipe them ourselves. + // + // So what's coming next is a hack that implements remedy #1: after + // starting the process we wait a bit (50ms) and check if it has + // terminated with STATUS_DLL_INIT_FAILED (the assumption here is that + // if this happens, it happens quickly). We then retry starting the + // process for up to a second. + // + // One way to improve this implementation would be to only do it for + // MSYS2-based programs, for example, by checking (EnumProcessModules()) + // if the process loaded the msys-2.0.dll (not clear though if it will + // be in the returned list if it has failed to initialize). With this + // improvement we could then wait longer and try harder. + // + optional msys; // Absent if we don't know. + + for (size_t ret (0); ret != 5; ++ret) + { + if (!CreateProcess ( + batch != nullptr ? batch : pp.effect_string (), + const_cast (cmd_line.c_str ()), + 0, // Process security attributes. + 0, // Primary thread security attributes. + true, // Inherit handles. + 0, // Creation flags. + 0, // Use our environment. + cwd != nullptr && *cwd != '\0' ? cwd : nullptr, + &si, + &pi)) + fail (); + + auto_handle (pi.hThread).reset (); // Close. + + // Detect if this is an MSYS2 process by checking if the process has + // loaded msys-2.0.dll. + // + size_t wait (200); + + if (!msys) + { + // Wait a bit for the process to load its DLLs. + // + if (WaitForSingleObject (pi.hProcess, 50) == WAIT_TIMEOUT) + { + wait -= 50; + + DWORD mn; + HMODULE ms[32]; // Normally it is one of the first. + + if (EnumProcessModules (pi.hProcess, ms, sizeof (ms), &mn)) + { + for (DWORD i (0); !msys && i != mn / sizeof (HMODULE); ++i) + { + char p[_MAX_PATH + 1]; + if (GetModuleFileNameExA (pi.hProcess, ms[i], p, sizeof (p))) + { + size_t n (strlen (p)); + if (n >= 12 && casecmp (p + n - 12, "msys-2.0.dll") == 0) + msys = true; + } + } + + if (!msys) + msys = false; + } + // EnumProcessModules() failed (presumably because the process has + // already exited), fall through. + } + // Process exited, fall through. + } + + if (msys && !*msys) + break; + + // Wait a bit longer and check if the process has terminated. If it is + // still running then we assume all is good. Otherwise, retry if this + // is the DLL initialization error. + // + DWORD s; + if (WaitForSingleObject (pi.hProcess, wait) != WAIT_OBJECT_0 || + !GetExitCodeProcess (pi.hProcess, &s) || + s != STATUS_DLL_INIT_FAILED) + break; + } + } // Revert handles back to non-inheritable and release the lock. + + // 0 has a special meaning denoting a terminated process handle. + // + this->handle = pi.hProcess; + assert (this->handle != 0 && this->handle != INVALID_HANDLE_VALUE); + + this->out_fd = move (out_fd.out); + this->in_ofd = move (in_ofd.in); + this->in_efd = move (in_efd.in); + } + + bool process:: + wait (bool ie) + { + if (handle != 0) + { + DWORD es; + DWORD e (NO_ERROR); + if (WaitForSingleObject (handle, INFINITE) != WAIT_OBJECT_0 || + !GetExitCodeProcess (handle, &es)) + e = GetLastError (); + + auto_handle h (handle); // Auto-deleter. + handle = 0; // We have tried. + + if (e == NO_ERROR) + { + exit = process_exit (); + exit->status = es; + } + else + { + // If ignore errors then just leave exit nullopt, so it has "no exit + // information available" semantics. + // + if (!ie) + throw process_error (error_msg (e)); + } + } + + return exit && exit->normal () && exit->code () == 0; + } + + bool process:: + try_wait () + { + if (handle != 0) + { + DWORD r (WaitForSingleObject (handle, 0)); + if (r == WAIT_TIMEOUT) + return false; + + DWORD es; + DWORD e (NO_ERROR); + if (r != WAIT_OBJECT_0 || !GetExitCodeProcess (handle, &es)) + e = GetLastError (); + + auto_handle h (handle); + handle = 0; // We have tried. + + if (e != NO_ERROR) + throw process_error (error_msg (e)); + + exit = process_exit (); + exit->status = es; + } + + return true; + } + + process::id_type process:: + id () const + { + id_type r (GetProcessId (handle)); + + if (r == 0) + throw process_error (last_error_msg ()); + + return r; + } + + process::id_type process:: + current_id () + { + return GetCurrentProcessId (); + } + + // process_exit + // + process_exit:: + process_exit (code_type c) + // + // The NTSTATUS value returned by GetExitCodeProcess() has the following + // layout of bits: + // + // [ 0, 16) - program exit code or exception code + // [16, 29) - facility + // [29, 30) - flag indicating if the status value is customer-defined + // [30, 31) - severity (00 -success, 01 - informational, 10 - warning, + // 11 - error) + // + : status (c) + { + } + + bool process_exit:: + normal () const + { + // We consider status values with severities other than 0 not being + // returned by the process and so denoting the abnormal termination. + // + return ((status >> 30) & 0x3) == 0; + } + + process_exit::code_type process_exit:: + code () const + { + assert (normal ()); + return status & 0xFFFF; + } + + string process_exit:: + description () const + { + assert (!normal ()); + + // Error codes (or, as MSDN calls them, exception codes) are defined in + // ntstatus.h. It is possible to obtain message descriptions for them + // using FormatMessage() with the FORMAT_MESSAGE_FROM_HMODULE flag and the + // handle returned by LoadLibrary("NTDLL.DLL") call. However, the returned + // messages are pretty much useless being format strings. For example for + // STATUS_ACCESS_VIOLATION error code the message string is "The + // instruction at 0x%p referenced memory at 0x%p. The memory could not be + // %s.". Also under Wine (1.9.8) it is not possible to obtain such a + // descriptions at all for some reason. + // + // Let's use a custom code-to-message mapping for the most common error + // codes, and extend it as needed. + // + // Note that the error code most likely will be messed up if the abnormal + // termination of a process is intercepted with the "searching for + // available solution" message box or debugger invocation. Also note that + // the same failure can result in different exit codes for a process being + // run on Windows nativelly and under Wine. For example under Wine 1.9.8 a + // process that fails due to the stack overflow exits normally with 0 + // status but prints the "err:seh:setup_exception stack overflow ..." + // message to stderr. + // + switch (status) + { + case STATUS_ACCESS_VIOLATION: return "access violation"; + case STATUS_DLL_INIT_FAILED: return "DLL initialization failed"; + case STATUS_INTEGER_DIVIDE_BY_ZERO: return "integer divided by zero"; + + // VC-compiled program that calls abort() terminates with this error code + // (0xC0000409). That differs from MinGW GCC-compiled one, that exits + // normally with status 3 (conforms to MSDN). Under Wine (1.9.8) such a + // program exits with status 3 for both VC and MinGW GCC. Sounds weird. + // + case STATUS_STACK_BUFFER_OVERRUN: return "stack buffer overrun"; + case STATUS_STACK_OVERFLOW: return "stack overflow"; + + default: + { + string desc ("unknown error 0x"); + + // Add error code hex representation (as it is defined in ntstatus.h). + // + // Strange enough, there is no easy way to convert a number into the + // hex string representation (not using streams). + // + const char digits[] = "0123456789ABCDEF"; + bool skip (true); // Skip leading zeros. + + auto add = [&desc, &digits, &skip] (unsigned char d, bool force) + { + if (d != 0 || !skip || force) + { + desc += digits[d]; + skip = false; + } + }; + + for (int i (sizeof (status) - 1); i >= 0 ; --i) + { + unsigned char c ((status >> (i * 8)) & 0xFF); + add ((c >> 4) & 0xF, false); // Convert the high 4 bits to a digit. + add (c & 0xF, i == 0); // Convert the low 4 bits to a digit. + } + + return desc; + } + } + } + +#endif // _WIN32 +} diff --git a/libbutl/process.hxx b/libbutl/process.hxx new file mode 100644 index 0000000..799313e --- /dev/null +++ b/libbutl/process.hxx @@ -0,0 +1,569 @@ +// file : libbutl/process.hxx -*- C++ -*- +// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#ifndef LIBBUTL_PROCESS_HXX +#define LIBBUTL_PROCESS_HXX + +#ifndef _WIN32 +# include // pid_t +#endif + +#include +#include +#include +#include // uint32_t +#include + +#include +#include +#include +#include // auto_fd, fdpipe +#include +#include + +namespace butl +{ + struct process_error: std::system_error + { + const bool child; + + process_error (int e, bool child = false) + : system_error (e, std::generic_category ()), child (child) {} + +#ifdef _WIN32 + process_error (const std::string& d, int fallback_errno_code = 0) + : system_error (fallback_errno_code, std::system_category (), d), + child (false) {} +#endif + }; + + struct process_child_error: process_error + { + explicit + process_child_error (int e): process_error (e, true) {} + }; + + // Process arguments (i.e., the command line). The first must be an + // executable name and the last element should be NULL. Can also be the + // multi-process piped command line (see process::print() for details). + // + struct process_args + { + const char* const* argv; + std::size_t argc; + }; + + // A process executable has three paths: initial, recall, and effective. + // Initial is the original "command" that you specify in argv[0] and on + // POSIX that's what ends up in the child's argv[0]. But not on Windows. On + // Windows the command is first searched for in the parent executable's + // directory and if found then that's what should end up in child's argv[0]. + // So this is the recall path. It is called recall because this is what the + // caller of the parent process will be able to execute if you printed the + // command line (provided you haven't changed the CWD). Finally, effective + // is the absolute path to the executable that will include the directory + // part if found in PATH, the .exe extension if one is missing, etc. + // + // As an example, let's say we run foo\foo.exe that itself spawns bar which + // is found as foo\bar.exe. The paths will then be: + // + // initial: bar + // recall: foo\bar + // effective: c:\...\foo\bar.exe + // + // In most cases, at least on POSIX, the first two paths will be the same. + // As an optimization, if the recall path is empty, then it means it is the + // same as initial. Similarly, if the effective path is empty then, it is + // the same as recall (and if that is empty, as initial). + // + // Note that the call to path_search() below adjust args[0] to point to the + // recall path which brings up lifetime issues. To address this this class + // also implements an RAII-based auto-restore of args[0] to its initial + // value. + // + class process_path + { + public: + const char* initial = nullptr; + path recall; + path effect; + + // Handle empty recall/effect. + // + const char* recall_string () const; + const char* effect_string () const; + + bool empty () const + { + return initial == nullptr && recall.empty () && effect.empty (); + } + + // Moveable-only type. + // + process_path (process_path&&); + process_path& operator= (process_path&&); + + process_path (const process_path&) = delete; + process_path& operator= (const process_path&) = delete; + + process_path () = default; + process_path (const char* i, path&& r, path&& e); + ~process_path (); + + private: + friend class process; + const char** args0_ = nullptr; + }; + + // Process exit information. + // + struct LIBBUTL_EXPORT process_exit + { + // Status type is the raw exit value as returned by GetExitCodeProcess() + // (NTSTATUS value that represents exit or error codes; MSDN refers to the + // error code as "value of the exception that caused the termination") or + // waitpid(1). Code type is the return value if the process exited + // normally. + // +#ifndef _WIN32 + using status_type = int; + using code_type = std::uint8_t; +#else + using status_type = std::uint32_t; // Win32 DWORD + using code_type = std::uint16_t; // Win32 WORD +#endif + + status_type status; + + process_exit () = default; + + explicit + process_exit (code_type); + + enum as_status_type {as_status}; + process_exit (status_type s, as_status_type): status (s) {} + + // Return false if the process exited abnormally. + // + bool + normal () const; + + code_type + code () const; + + explicit operator bool () const {return normal () && code () == 0;} + + // Abnormal termination information. + // + + // Return the signal number that caused the termination or 0 if no such + // information is available. + // + int + signal () const; + + // Return true if the core file was generated. + // + bool + core () const; + + // Return a description of the reason that caused the process to terminate + // abnormally. On POSIX this is the signal name, on Windows -- the summary + // produced from the corresponding error identifier defined in ntstatus.h. + // + std::string + description () const; + }; + + class LIBBUTL_EXPORT process + { + public: +#ifndef _WIN32 + using handle_type = pid_t; + using id_type = pid_t; +#else + using handle_type = void*; // Win32 HANDLE + using id_type = std::uint32_t; // Win32 DWORD +#endif + + // Start another process using the specified command line. The default + // values to the in, out and err arguments indicate that the child process + // should inherit the parent process stdin, stdout, and stderr, + // respectively. If -1 is passed instead, then the corresponding child + // process descriptor is connected (via a pipe) to out_fd for stdin, + // in_ofd for stdout, and in_efd for stderr (see data members below). If + // -2 is passed, then the corresponding child process descriptor is + // replaced with the null device descriptor (e.g., /dev/null). This + // results in the child process not being able to read anything from stdin + // (gets immediate EOF) and all data written to stdout/stderr being + // discarded. + // + // On Windows parent process pipe descriptors are set to text mode to be + // consistent with the default (text) mode of standard file descriptors of + // the child process. When reading in the text mode the sequence of 0xD, + // 0xA characters is translated into the single OxA character and 0x1A is + // interpreted as EOF. When writing in the text mode the OxA character is + // translated into the 0xD, 0xA sequence. Use the fdmode() function to + // change the mode, if required. + // + // Instead of passing -1, -2 or the default value, you can also pass your + // own descriptors. Note, however, that in this case they are not closed by + // the parent. So you should do this yourself, if required. For example, + // to redirect the child process stdout to stderr, you can do: + // + // process p (..., 0, 2); + // + // Throw process_error if anything goes wrong. Note that some of the + // exceptions (e.g., if exec() failed) can be thrown in the child + // version of us (as process_child_error). + // + // Note that the versions without the the process_path argument may + // temporarily change args[0] (see path_search() for details). + // + process (const char* args[], int in = 0, int out = 1, int err = 2); + + process (const process_path&, const char* args[], + int in = 0, int out = 1, int err = 2); + + // The "piping" constructor, for example: + // + // process lhs (..., 0, -1); // Redirect stdout to a pipe. + // process rhs (..., lhs); // Redirect stdin to lhs's pipe. + // + // rhs.wait (); // Wait for last first. + // lhs.wait (); + // + process (const char* args[], process& in, int out = 1, int err = 2); + + process (const process_path&, const char* args[], + process& in, int out = 1, int err = 2); + + // Versions of the above constructors that allow us to change the + // current working directory of the child process. NULL and empty + // cwd arguments are ignored. + // + process (const char* cwd, const char* [], int = 0, int = 1, int = 2); + + process (const char* cwd, + const process_path&, const char* [], + int = 0, int = 1, int = 2); + + process (const char* cwd, const char* [], process&, int = 1, int = 2); + + process (const char* cwd, + const process_path&, const char* [], + process&, int = 1, int = 2); + + // Wait for the process to terminate. Return true if the process + // terminated normally and with the zero exit code. Unless ignore_error + // is true, throw process_error if anything goes wrong. This function can + // be called multiple times with subsequent calls simply returning the + // status. + // + bool + wait (bool ignore_errors = false); + + // Return true if the process has already terminated in which case + // optionally set the argument to the result of wait(). + // + bool + try_wait (); + + bool + try_wait (bool&); + + // Note that the destructor will wait for the process but will ignore + // any errors and the exit status. + // + ~process () {if (handle != 0) wait (true);} + + // Moveable-only type. + // + process (process&&); + process& operator= (process&&); + + process (const process&) = delete; + process& operator= (const process&) = delete; + + // Create an empty or "already terminated" process. By default the + // termination status is unknown but you can change that. + // + explicit + process (optional = nullopt); + + // Resolve process' paths based on the initial path in args0. If recall + // differs from initial, adjust args0 to point to the recall path. If + // resolution fails, throw process_error. Normally, you will use this + // function like this: + // + // const char* args[] = {"foo", ..., nullptr}; + // + // process_path pp (process::path_search (args[0])) + // + // ... // E.g., print args[0]. + // + // process p (pp, args); + // + // You can also specify the fallback directory which will be tried last. + // This, for example, can be used to implement the Windows "search in the + // parent executable's directory" semantics across platforms. + // + static process_path + path_search (const char*& args0, const dir_path& fallback = dir_path ()); + + // This version is primarily useful when you want to pre-search the + // executable before creating the args[] array. In this case you will + // use the recall path for args[0]. + // + // The init argument determines whether to initialize the initial path to + // the shallow copy of file. If it is true, then initial is the same as + // file and recall is either empty or contain a different path. If it is + // false then initial contains a shallow copy of recall, and recall is + // either a different path or a deep copy of file. Normally you don't care + // about initial once you got recall and the main reason to pass true to + // this argument is to save a copy (since initial and recall are usually + // the same). + // + static process_path + path_search (const char* file, bool init, const dir_path& = dir_path ()); + + static process_path + path_search (const std::string&, bool, const dir_path& = dir_path ()); + + static process_path + path_search (const path&, bool, const dir_path& = dir_path ()); + + // As above but if not found return empty process_path instead of + // throwing. + // + static process_path + try_path_search (const char*, bool, const dir_path& = dir_path ()); + + static process_path + try_path_search (const std::string&, bool, const dir_path& = dir_path ()); + + static process_path + try_path_search (const path&, bool, const dir_path& = dir_path ()); + + // Print process commmand line. If the number of elements is specified, + // then it will print the piped multi-process command line, if present. + // In this case, the expected format is as follows: + // + // name1 arg arg ... nullptr + // name2 arg arg ... nullptr + // ... + // nameN arg arg ... nullptr nullptr + // + static void + print (std::ostream&, const char* const args[], size_t n = 0); + + public: + id_type + id () const; + + static id_type + current_id (); + + public: + handle_type handle; + + // Absence means that the exit information is not (yet) known. This can be + // because you haven't called wait() yet or because wait() failed. + // + optional exit; + + // Use the following file descriptors to communicate with the new process's + // standard streams. + // + auto_fd out_fd; // Write to it to send to stdin. + auto_fd in_ofd; // Read from it to receive from stdout. + auto_fd in_efd; // Read from it to receive from stderr. + }; + + // Higher-level process running interface that aims to make executing a + // process for the common cases as simple as calling a functions. Normally + // it is further simplified by project-specific wrapper functions that + // handle the process_error exception as well as abnormal and/or non-zero + // exit status. + // + // The I/O/E arguments determine the child's stdin/stdout/stderr. They can + // be of type int, auto_fd (and, in the future, perhaps also fd_pipe, + // string, buffer, etc). For example, the following call will make stdin + // read from /dev/null, stdout redirect to stderr, and inherit the parent's + // stderr. + // + // process_run (..., fdnull (), 2, 2, ...) + // + // The P argument is the program path. It can be anything that can be passed + // to process::path_search() (const char*, std::string, path) or the + // process_path itself. + // + // The A arguments can be anything convertible to const char* via the + // overloaded process_arg_as() (see below). Out of the box you can use const + // char*, std::string, path/dir_path, (as well as [small_]vector[_view] of + // these), and numeric types. + // + template + process_exit + process_run (I&& in, + O&& out, + E&& err, + const dir_path& cwd, + const P&, + A&&... args); + + // The version with the command callback that can be used for printing the + // command line or similar. It should be callable with the following + // signature: + // + // void (const char*[], std::size_t) + // + template + process_exit + process_run (const C&, + I&& in, + O&& out, + E&& err, + const dir_path& cwd, + const P&, + A&&... args); + + // Versions that start the process without waiting. + // + template + process + process_start (I&& in, + O&& out, + E&& err, + const dir_path& cwd, + const P&, + A&&... args); + + template + process + process_start (const C&, + I&& in, + O&& out, + E&& err, + const dir_path& cwd, + const P&, + A&&... args); + + // Conversion of types to their C string representations. Can be overloaded + // (including via ADL) for custom types. The default implementation calls + // to_string() which covers all the numeric values via std::to_string () and + // also any type that defines to_string() (via ADL). + // + template + inline const char* + process_arg_as (const T& x, std::string& storage) + { + using namespace std; + return (storage = to_string (x)).c_str (); + } + + inline const char* + process_arg_as (const std::string& s, std::string&) {return s.c_str ();} + + template + inline const char* + process_arg_as (const basic_path& p, std::string&) + { + return p.string ().c_str (); + } + + // char[N] + // + inline const char* + process_arg_as (const char* s, std::string&) {return s;} + + template + inline const char* + process_arg_as (char (&s)[N], std::string&) {return s;} + + template + inline const char* + process_arg_as (const char (&s)[N], std::string&) {return s;} + + template + inline void + process_args_as (V& v, const T& x, std::string& storage) + { + v.push_back (process_arg_as (x, storage)); + } + + // [small_]vector[_view]<> + // + template + inline void + process_args_as (V& v, const std::vector& vs, std::string&) + { + for (const std::string& s: vs) + v.push_back (s.c_str ()); + } + + template + inline void + process_args_as (V& v, const small_vector& vs, std::string&) + { + for (const std::string& s: vs) + v.push_back (s.c_str ()); + } + + template + inline void + process_args_as (V& v, const vector_view& vs, std::string&) + { + for (const std::string& s: vs) + v.push_back (s.c_str ()); + } + + template + inline void + process_args_as (V& v, const std::vector& vs, std::string&) + { + for (const char* s: vs) + v.push_back (s); + } + + template + inline void + process_args_as (V& v, const small_vector& vs, std::string&) + { + for (const char* s: vs) + v.push_back (s); + } + + template + inline void + process_args_as (V& v, const vector_view& vs, std::string&) + { + for (const char* s: vs) + v.push_back (s); + } +} + +#include + +#include + +#endif // LIBBUTL_PROCESS_HXX diff --git a/libbutl/process.ixx b/libbutl/process.ixx new file mode 100644 index 0000000..9a09487 --- /dev/null +++ b/libbutl/process.ixx @@ -0,0 +1,207 @@ +// file : libbutl/process.ixx -*- C++ -*- +// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#include // move() + +namespace butl +{ + // process_path + // + inline process_path:: + ~process_path () + { + if (args0_ != nullptr) + *args0_ = initial; + } + + inline process_path:: + process_path (const char* i, path&& r, path&& e) + : initial (i), + recall (std::move (r)), + effect (std::move (e)), + args0_ (nullptr) {} + + inline process_path:: + process_path (process_path&& p) + : initial (p.initial), + recall (std::move (p.recall)), + effect (std::move (p.effect)), + args0_ (p.args0_) + { + p.args0_ = nullptr; + } + + inline process_path& process_path:: + operator= (process_path&& p) + { + if (this != &p) + { + if (args0_ != nullptr) + *args0_ = initial; + + initial = p.initial; + recall = std::move (p.recall); + effect = std::move (p.effect); + args0_ = p.args0_; + + p.args0_ = nullptr; + } + + return *this; + } + + inline const char* process_path:: + recall_string () const + { + return recall.empty () ? initial : recall.string ().c_str (); + } + + inline const char* process_path:: + effect_string () const + { + return effect.empty () ? recall_string () : effect.string ().c_str (); + } + + // process_exit + // +#ifdef _WIN32 + inline int process_exit:: + signal () const + { + return 0; + } + + inline bool process_exit:: + core () const + { + return false; + } +#endif + + // process + // +#ifndef _WIN32 + inline process::id_type process:: + id () const + { + return handle; + } +#endif + + inline process_path process:: + path_search (const char*& a0, const dir_path& fb) + { + process_path r (path_search (a0, true, fb)); + + if (!r.recall.empty ()) + { + r.args0_ = &a0; + a0 = r.recall.string ().c_str (); + } + + return r; + } + + inline process_path process:: + path_search (const std::string& f, bool i, const dir_path& fb) + { + return path_search (f.c_str (), i, fb); + } + + inline process_path process:: + path_search (const path& f, bool i, const dir_path& fb) + { + return path_search (f.string ().c_str (), i, fb); + } + + inline process_path process:: + try_path_search (const std::string& f, bool i, const dir_path& fb) + { + return try_path_search (f.c_str (), i, fb); + } + + inline process_path process:: + try_path_search (const path& f, bool i, const dir_path& fb) + { + return try_path_search (f.string ().c_str (), i, fb); + } + + inline process:: + process (optional e) + : handle (0), + exit (std::move (e)), + out_fd (-1), + in_ofd (-1), + in_efd (-1) + { + } + + inline process:: + process (const char* args[], int in, int out, int err) + : process (nullptr, path_search (args[0]), args, in, out, err) {} + + inline process:: + process (const process_path& pp, const char* args[], + int in, int out, int err) + : process (nullptr, pp, args, in, out, err) {} + + inline process:: + process (const char* args[], process& in, int out, int err) + : process (nullptr, path_search (args[0]), args, in, out, err) {} + + inline process:: + process (const process_path& pp, const char* args[], + process& in, int out, int err) + : process (nullptr, pp, args, in, out, err) {} + + inline process:: + process (const char* cwd, const char* args[], int in, int out, int err) + : process (cwd, path_search (args[0]), args, in, out, err) {} + + inline process:: + process (const char* cwd, const char* args[], process& in, int out, int err) + : process (cwd, path_search (args[0]), args, in, out, err) {} + + inline process:: + process (process&& p) + : handle (p.handle), + exit (std::move (p.exit)), + out_fd (std::move (p.out_fd)), + in_ofd (std::move (p.in_ofd)), + in_efd (std::move (p.in_efd)) + { + p.handle = 0; + } + + inline process& process:: + operator= (process&& p) + { + if (this != &p) + { + if (handle != 0) + wait (); + + handle = p.handle; + exit = std::move (p.exit); + out_fd = std::move (p.out_fd); + in_ofd = std::move (p.in_ofd); + in_efd = std::move (p.in_efd); + + p.handle = 0; + } + + return *this; + } + + inline bool process:: + try_wait (bool& s) + { + bool r (try_wait ()); + + if (r) + s = exit && exit->normal () && exit->code () == 0; + + return r; + } +} diff --git a/libbutl/sendmail.cxx b/libbutl/sendmail.cxx new file mode 100644 index 0000000..5b1c863 --- /dev/null +++ b/libbutl/sendmail.cxx @@ -0,0 +1,40 @@ +// file : libbutl/sendmail.cxx -*- C++ -*- +// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#include + +using namespace std; + +namespace butl +{ + void sendmail:: + headers (const std::string& from, + const std::string& subj, + const recipients_type& to, + const recipients_type& cc, + const recipients_type& bcc) + { + if (!from.empty ()) + out << "From: " << from << endl; + + auto rcp =[this] (const char* h, const recipients_type& rs) + { + if (!rs.empty ()) + { + bool f (true); + out << h << ": "; + for (const string& r: rs) + out << (f ? (f = false, "") : ", ") << r; + out << endl; + } + }; + + rcp ("To", to); + rcp ("Cc", cc); + rcp ("Bcc", bcc); + + out << "Subject: " << subj << endl + << endl; // Header/body separator. + } +} diff --git a/libbutl/sendmail.hxx b/libbutl/sendmail.hxx new file mode 100644 index 0000000..8ff264d --- /dev/null +++ b/libbutl/sendmail.hxx @@ -0,0 +1,91 @@ +// file : libbutl/sendmail.hxx -*- C++ -*- +// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#ifndef LIBBUTL_SENDMAIL_HXX +#define LIBBUTL_SENDMAIL_HXX + +#include + +#include + +#include +#include +#include + +namespace butl +{ + // Send email using the sendmail(1) program. + // + // Write the body of the email to out. Note that you must explicitly close + // it before calling wait(). Throw process_error and io_error (both derive + // from system_error) in case of errors. + // + // Typical usage: + // + // try + // { + // sendmail sm (2, // Diagnostics to stderr. + // "", // Default From: address. + // "Test subject", + // {"test@example.com"}); + // + // sm.out << "Test body" << endl; + // + // sm.out.close (); + // + // if (!sm.wait ()) + // ... // sendmail returned non-zero status. + // } + // catch (const std::system_error& e) + // { + // cerr << "sendmail error: " << e << endl; + // } + // + class LIBBUTL_EXPORT sendmail: public process + { + public: + ofdstream out; + + // Notes: + // + // - If from is empty then the process user's address is used. + // + // - The to/cc/bcc addressed should already be quoted if required. + // + using recipients_type = small_vector; + + template + sendmail (E&& err, + const std::string& from, + const std::string& subject, + const recipients_type& to, + const recipients_type& cc = recipients_type (), + const recipients_type& bcc = recipients_type (), + O&&... options); + + // Version with the command line callback (see process_run() for details). + // + template + sendmail (const C&, + E&& err, + const std::string& from, + const std::string& subject, + const recipients_type& to, + const recipients_type& cc = recipients_type (), + const recipients_type& bcc = recipients_type (), + O&&... options); + + private: + void + headers (const std::string& from, + const std::string& subj, + const recipients_type& to, + const recipients_type& cc, + const recipients_type& bcc); + }; +} + +#include + +#endif // LIBBUTL_SENDMAIL_HXX diff --git a/libbutl/sendmail.ixx b/libbutl/sendmail.ixx new file mode 100644 index 0000000..067153c --- /dev/null +++ b/libbutl/sendmail.ixx @@ -0,0 +1,68 @@ +// file : libbutl/sendmail.ixx -*- C++ -*- +// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#include // move(), forward() + +namespace butl +{ + template + inline sendmail:: + sendmail (E&& err, + const std::string& from, + const std::string& subj, + const recipients_type& to, + const recipients_type& cc, + const recipients_type& bcc, + O&&... options) + : sendmail ([] (const char* [], std::size_t) {}, + std::forward (err), + from, + subj, + to, + cc, + bcc, + std::forward (options)...) + { + } + + template + inline sendmail:: + sendmail (const C& cmdc, + E&& err, + const std::string& from, + const std::string& subj, + const recipients_type& to, + const recipients_type& cc, + const recipients_type& bcc, + O&&... options) + { + fdpipe pipe (fdopen_pipe ()); // Text mode seems appropriate. + + process& p (*this); + p = process_start (cmdc, + pipe.in, + 2, // No output expected so redirect to stderr. + std::forward (err), + dir_path (), + "sendmail", + "-i", // Don't treat '.' as the end of input. + "-t", // Read recipients from headers. + std::forward (options)...); + + // Close the reading end of the pipe not to block on writing if sendmail + // terminates before that. + // + pipe.in.close (); + + out.open (std::move (pipe.out)); + + // Write headers. + // + // Note that if this throws, then the ofdstream will be closed first + // (which should signal to the process we are done). Then the process's + // destructor will wait for its termination ignoring any errors. + // + headers (from, subj, to, cc, bcc); + } +} diff --git a/libbutl/sha256.cxx b/libbutl/sha256.cxx new file mode 100644 index 0000000..09f6844 --- /dev/null +++ b/libbutl/sha256.cxx @@ -0,0 +1,142 @@ +// file : libbutl/sha256.cxx -*- C++ -*- +// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#include + +// C interface for sha256c. +// +#include +#include // size_t + +#include // isxdigit() +#include // invalid_argument + +#include // ucase(), lcase() + +using SHA256_CTX = butl::sha256::context; + +extern "C" +{ + static void SHA256_Init (SHA256_CTX*); + static void SHA256_Update (SHA256_CTX*, const void*, size_t); + static void SHA256_Final (uint8_t[32], SHA256_CTX*); + +#include "sha256c.c" +} + +using namespace std; + +namespace butl +{ + // sha256 + // + sha256:: + sha256 () + : done_ (false) + { + SHA256_Init (&ctx_); + } + + void sha256:: + append (const void* b, size_t n) + { + SHA256_Update (&ctx_, b, n); + } + + const sha256::digest_type& sha256:: + binary () const + { + if (!done_) + { + SHA256_Final (bin_, &ctx_); + done_ = true; + str_[0] = '\0'; // Indicate we haven't computed the string yet. + } + + return bin_; + } + + static const char hex_map[16] = { + '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', + 'a', 'b', 'c', 'd', 'e', 'f'}; + + const char* sha256:: + string () const + { + if (!done_) + binary (); + + if (str_[0] == '\0') + { + for (size_t i (0); i != 32; ++i) + { + str_[i * 2] = hex_map[bin_[i] >> 4]; + str_[i * 2 + 1] = hex_map[bin_[i] & 0x0f]; + } + + str_[64] = '\0'; + } + + return str_; + } + + // Conversion functions + // + string + sha256_to_fingerprint (const string& s) + { + auto bad = []() {throw invalid_argument ("invalid SHA256 string");}; + + size_t n (s.size ()); + if (n != 64) + bad (); + + string f; + f.reserve (n + 31); + for (size_t i (0); i < n; ++i) + { + char c (s[i]); + if (!isxdigit (c)) + bad (); + + if (i > 0 && i % 2 == 0) + f += ":"; + + f += ucase (c); + } + + return f; + } + + string + fingerprint_to_sha256 (const string& f) + { + auto bad = []() {throw invalid_argument ("invalid fingerprint");}; + + size_t n (f.size ()); + if (n != 32 * 3 - 1) + bad (); + + string s; + s.reserve (64); + for (size_t i (0); i < n; ++i) + { + char c (f[i]); + if ((i + 1) % 3 == 0) + { + if (c != ':') + bad (); + } + else + { + if (!isxdigit (c)) + bad (); + + s += lcase (c); + } + } + + return s; + } +} diff --git a/libbutl/sha256.hxx b/libbutl/sha256.hxx new file mode 100644 index 0000000..eea2b23 --- /dev/null +++ b/libbutl/sha256.hxx @@ -0,0 +1,101 @@ +// file : libbutl/sha256.hxx -*- C++ -*- +// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#ifndef LIBBUTL_SHA256_HXX +#define LIBBUTL_SHA256_HXX + +#include +#include // strlen() +#include +#include // size_t + +#include + +namespace butl +{ + // SHA256 checksum calculator. + // + // For a single chunk of data a sum can be obtained in one line, for + // example: + // + // cerr << sha256 ("123").string () << endl; + // + class LIBBUTL_EXPORT sha256 + { + public: + sha256 (); + + // Append binary data. + // + void + append (const void*, std::size_t); + + sha256 (const void* b, std::size_t n): sha256 () {append (b, n);} + + // Append string. + // + // Note that the hash includes the '\0' terminator. Failed that, a call + // with an empty string will be indistinguishable from no call at all. + // + void + append (const std::string& s) {append (s.c_str (), s.size () + 1);} + + void + append (const char* s) {append (s, std::strlen (s) + 1);} + + explicit + sha256 (const std::string& s): sha256 () {append (s);} + + explicit + sha256 (const char* s): sha256 () {append (s);} + + // Extract result. + // + // It can be obtained as either a 32-byte binary digest or as a 64- + // character hex-encoded C-string. + // + using digest_type = std::uint8_t[32]; + + const digest_type& + binary () const; + + const char* + string () const; + + public: + struct context + { + std::uint32_t state[8]; + std::uint64_t count; + std::uint8_t buf[64]; + }; + + private: + union + { + mutable context ctx_; + mutable char str_[65]; + }; + + mutable digest_type bin_; + mutable bool done_; + }; + + // Convert a SHA256 string representation (64 hex digits) to the fingerprint + // canonical representation (32 colon-separated upper case hex digit pairs, + // like 01:AB:CD:...). Throw invalid_argument if the argument is not a valid + // SHA256 string. + // + LIBBUTL_EXPORT std::string + sha256_to_fingerprint (const std::string&); + + // Convert a fingerprint (32 colon-separated hex digit pairs) to the SHA256 + // string representation (64 lower case hex digits). Throw invalid_argument + // if the argument is not a valid fingerprint. + // + LIBBUTL_EXPORT std::string + fingerprint_to_sha256 (const std::string&); +} + +#endif // LIBBUTL_SHA256_HXX diff --git a/libbutl/sha256c.c b/libbutl/sha256c.c new file mode 100644 index 0000000..156751c --- /dev/null +++ b/libbutl/sha256c.c @@ -0,0 +1,393 @@ +/*- + * Copyright 2005 Colin Percival + * Copyright (c) 2017 Code Synthesis Ltd + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +#include /* size_t */ + +#ifdef SHA256_TEST_DRIVER + +struct context +{ + uint32_t state[8]; + uint64_t count; + uint8_t buf[64]; +}; + +typedef struct context SHA256_CTX; + +static void SHA256_Init (SHA256_CTX*); +static void SHA256_Update (SHA256_CTX*, const void*, size_t); +static void SHA256_Final (uint8_t[32], SHA256_CTX*); + +#include +#include + +int +main () +{ + SHA256_CTX c; + uint8_t r[32]; + + /* "" e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855 */ + SHA256_Init (&c); + SHA256_Final (r, &c); + assert (r[0] == 0xe3 && r[31] == 0x55); + + /* "123" a665a45920422f9d417e4867efdc4fb8a04a1f3fff1fa07e998e86f7f7a27ae3 */ + SHA256_Init (&c); + SHA256_Update (&c, "123", 3); + SHA256_Final (r, &c); + assert (r[0] == 0xa6 && r[31] == 0xe3); + + return 0; +} + +#endif /* SHA256_TEST_DRIVER */ + +#ifdef __FreeBSD__ +# include /* BYTE_ORDER, be32dec(), be32enc(), be64enc */ +#else +# if defined(_WIN32) +# ifndef BYTE_ORDER +# define BIG_ENDIAN 4321 +# define LITTLE_ENDIAN 1234 +# define BYTE_ORDER LITTLE_ENDIAN +# endif +# else +# include /* BYTE_ORDER/__BYTE_ORDER */ +# ifndef BYTE_ORDER +# ifdef __BYTE_ORDER +# define BYTE_ORDER __BYTE_ORDER +# define BIG_ENDIAN __BIG_ENDIAN +# define LITTLE_ENDIAN __LITTLE_ENDIAN +# else +# error no BYTE_ORDER/__BYTE_ORDER define +# endif +# endif +# endif + +static uint32_t +be32dec(const void *pp) +{ + unsigned char const *p = (unsigned char const *)pp; + + return ((p[0] << 24) | (p[1] << 16) | (p[2] << 8) | p[3]); +} + +static void +be32enc(void *pp, uint32_t u) +{ + unsigned char *p = (unsigned char *)pp; + + p[0] = (u >> 24) & 0xff; + p[1] = (u >> 16) & 0xff; + p[2] = (u >> 8) & 0xff; + p[3] = u & 0xff; +} + +static void +be64enc(void *pp, uint64_t u) +{ + unsigned char *p = (unsigned char *)pp; + + p[0] = (u >> 56) & 0xff; + p[1] = (u >> 48) & 0xff; + p[2] = (u >> 40) & 0xff; + p[3] = (u >> 32) & 0xff; + p[4] = (u >> 24) & 0xff; + p[5] = (u >> 16) & 0xff; + p[6] = (u >> 8) & 0xff; + p[7] = u & 0xff; +} +#endif + +/* The rest is the unmodified (except for a few explicit casts to make it + compilable in C++), latest implementation from FreeBSD. */ + +#include + +#if BYTE_ORDER == BIG_ENDIAN + +/* Copy a vector of big-endian uint32_t into a vector of bytes */ +#define be32enc_vect(dst, src, len) \ + memcpy((void *)dst, (const void *)src, (size_t)len) + +/* Copy a vector of bytes into a vector of big-endian uint32_t */ +#define be32dec_vect(dst, src, len) \ + memcpy((void *)dst, (const void *)src, (size_t)len) + +#else /* BYTE_ORDER != BIG_ENDIAN */ + +/* + * Encode a length len/4 vector of (uint32_t) into a length len vector of + * (unsigned char) in big-endian form. Assumes len is a multiple of 4. + */ +static void +be32enc_vect(unsigned char *dst, const uint32_t *src, size_t len) +{ + size_t i; + + for (i = 0; i < len / 4; i++) + be32enc(dst + i * 4, src[i]); +} + +/* + * Decode a big-endian length len vector of (unsigned char) into a length + * len/4 vector of (uint32_t). Assumes len is a multiple of 4. + */ +static void +be32dec_vect(uint32_t *dst, const unsigned char *src, size_t len) +{ + size_t i; + + for (i = 0; i < len / 4; i++) + dst[i] = be32dec(src + i * 4); +} + +#endif /* BYTE_ORDER != BIG_ENDIAN */ + +/* Elementary functions used by SHA256 */ +#define Ch(x, y, z) ((x & (y ^ z)) ^ z) +#define Maj(x, y, z) ((x & (y | z)) | (y & z)) +#define SHR(x, n) (x >> n) +#define ROTR(x, n) ((x >> n) | (x << (32 - n))) +#define S0(x) (ROTR(x, 2) ^ ROTR(x, 13) ^ ROTR(x, 22)) +#define S1(x) (ROTR(x, 6) ^ ROTR(x, 11) ^ ROTR(x, 25)) +#define s0(x) (ROTR(x, 7) ^ ROTR(x, 18) ^ SHR(x, 3)) +#define s1(x) (ROTR(x, 17) ^ ROTR(x, 19) ^ SHR(x, 10)) + +/* SHA256 round function */ +#define RND(a, b, c, d, e, f, g, h, k) \ + t0 = h + S1(e) + Ch(e, f, g) + k; \ + t1 = S0(a) + Maj(a, b, c); \ + d += t0; \ + h = t0 + t1; + +/* Adjusted round function for rotating state */ +#define RNDr(S, W, i, k) \ + RND(S[(64 - i) % 8], S[(65 - i) % 8], \ + S[(66 - i) % 8], S[(67 - i) % 8], \ + S[(68 - i) % 8], S[(69 - i) % 8], \ + S[(70 - i) % 8], S[(71 - i) % 8], \ + W[i] + k) + +/* + * SHA256 block compression function. The 256-bit state is transformed via + * the 512-bit input block to produce a new state. + */ +static void +SHA256_Transform(uint32_t * state, const unsigned char block[64]) +{ + uint32_t W[64]; + uint32_t S[8]; + uint32_t t0, t1; + int i; + + /* 1. Prepare message schedule W. */ + be32dec_vect(W, block, 64); + for (i = 16; i < 64; i++) + W[i] = s1(W[i - 2]) + W[i - 7] + s0(W[i - 15]) + W[i - 16]; + + /* 2. Initialize working variables. */ + memcpy(S, state, 32); + + /* 3. Mix. */ + RNDr(S, W, 0, 0x428a2f98); + RNDr(S, W, 1, 0x71374491); + RNDr(S, W, 2, 0xb5c0fbcf); + RNDr(S, W, 3, 0xe9b5dba5); + RNDr(S, W, 4, 0x3956c25b); + RNDr(S, W, 5, 0x59f111f1); + RNDr(S, W, 6, 0x923f82a4); + RNDr(S, W, 7, 0xab1c5ed5); + RNDr(S, W, 8, 0xd807aa98); + RNDr(S, W, 9, 0x12835b01); + RNDr(S, W, 10, 0x243185be); + RNDr(S, W, 11, 0x550c7dc3); + RNDr(S, W, 12, 0x72be5d74); + RNDr(S, W, 13, 0x80deb1fe); + RNDr(S, W, 14, 0x9bdc06a7); + RNDr(S, W, 15, 0xc19bf174); + RNDr(S, W, 16, 0xe49b69c1); + RNDr(S, W, 17, 0xefbe4786); + RNDr(S, W, 18, 0x0fc19dc6); + RNDr(S, W, 19, 0x240ca1cc); + RNDr(S, W, 20, 0x2de92c6f); + RNDr(S, W, 21, 0x4a7484aa); + RNDr(S, W, 22, 0x5cb0a9dc); + RNDr(S, W, 23, 0x76f988da); + RNDr(S, W, 24, 0x983e5152); + RNDr(S, W, 25, 0xa831c66d); + RNDr(S, W, 26, 0xb00327c8); + RNDr(S, W, 27, 0xbf597fc7); + RNDr(S, W, 28, 0xc6e00bf3); + RNDr(S, W, 29, 0xd5a79147); + RNDr(S, W, 30, 0x06ca6351); + RNDr(S, W, 31, 0x14292967); + RNDr(S, W, 32, 0x27b70a85); + RNDr(S, W, 33, 0x2e1b2138); + RNDr(S, W, 34, 0x4d2c6dfc); + RNDr(S, W, 35, 0x53380d13); + RNDr(S, W, 36, 0x650a7354); + RNDr(S, W, 37, 0x766a0abb); + RNDr(S, W, 38, 0x81c2c92e); + RNDr(S, W, 39, 0x92722c85); + RNDr(S, W, 40, 0xa2bfe8a1); + RNDr(S, W, 41, 0xa81a664b); + RNDr(S, W, 42, 0xc24b8b70); + RNDr(S, W, 43, 0xc76c51a3); + RNDr(S, W, 44, 0xd192e819); + RNDr(S, W, 45, 0xd6990624); + RNDr(S, W, 46, 0xf40e3585); + RNDr(S, W, 47, 0x106aa070); + RNDr(S, W, 48, 0x19a4c116); + RNDr(S, W, 49, 0x1e376c08); + RNDr(S, W, 50, 0x2748774c); + RNDr(S, W, 51, 0x34b0bcb5); + RNDr(S, W, 52, 0x391c0cb3); + RNDr(S, W, 53, 0x4ed8aa4a); + RNDr(S, W, 54, 0x5b9cca4f); + RNDr(S, W, 55, 0x682e6ff3); + RNDr(S, W, 56, 0x748f82ee); + RNDr(S, W, 57, 0x78a5636f); + RNDr(S, W, 58, 0x84c87814); + RNDr(S, W, 59, 0x8cc70208); + RNDr(S, W, 60, 0x90befffa); + RNDr(S, W, 61, 0xa4506ceb); + RNDr(S, W, 62, 0xbef9a3f7); + RNDr(S, W, 63, 0xc67178f2); + + /* 4. Mix local working variables into global state */ + for (i = 0; i < 8; i++) + state[i] += S[i]; +} + +static unsigned char PAD[64] = { + 0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +}; + +/* Add padding and terminating bit-count. */ +static void +SHA256_Pad(SHA256_CTX * ctx) +{ + unsigned char len[8]; + uint32_t r, plen; + + /* + * Convert length to a vector of bytes -- we do this now rather + * than later because the length will change after we pad. + */ + be64enc(len, ctx->count); + + /* Add 1--64 bytes so that the resulting length is 56 mod 64 */ + r = (ctx->count >> 3) & 0x3f; + plen = (r < 56) ? (56 - r) : (120 - r); + SHA256_Update(ctx, PAD, (size_t)plen); + + /* Add the terminating bit-count */ + SHA256_Update(ctx, len, 8); +} + +/* SHA-256 initialization. Begins a SHA-256 operation. */ +void +SHA256_Init(SHA256_CTX * ctx) +{ + + /* Zero bits processed so far */ + ctx->count = 0; + + /* Magic initialization constants */ + ctx->state[0] = 0x6A09E667; + ctx->state[1] = 0xBB67AE85; + ctx->state[2] = 0x3C6EF372; + ctx->state[3] = 0xA54FF53A; + ctx->state[4] = 0x510E527F; + ctx->state[5] = 0x9B05688C; + ctx->state[6] = 0x1F83D9AB; + ctx->state[7] = 0x5BE0CD19; +} + +/* Add bytes into the hash */ +void +SHA256_Update(SHA256_CTX * ctx, const void *in, size_t len) +{ + uint64_t bitlen; + uint32_t r; + const unsigned char *src = (const unsigned char *) (in); + + /* Number of bytes left in the buffer from previous updates */ + r = (ctx->count >> 3) & 0x3f; + + /* Convert the length into a number of bits */ + bitlen = len << 3; + + /* Update number of bits */ + ctx->count += bitlen; + + /* Handle the case where we don't need to perform any transforms */ + if (len < 64 - r) { + memcpy(&ctx->buf[r], src, len); + return; + } + + /* Finish the current block */ + memcpy(&ctx->buf[r], src, 64 - r); + SHA256_Transform(ctx->state, ctx->buf); + src += 64 - r; + len -= 64 - r; + + /* Perform complete blocks */ + while (len >= 64) { + SHA256_Transform(ctx->state, src); + src += 64; + len -= 64; + } + + /* Copy left over data into buffer */ + memcpy(ctx->buf, src, len); +} + +/* + * SHA-256 finalization. Pads the input data, exports the hash value, + * and clears the context state. + */ +void +SHA256_Final(uint8_t digest[32], SHA256_CTX * ctx) +{ + + /* Add padding */ + SHA256_Pad(ctx); + + /* Write the hash */ + be32enc_vect(digest, ctx->state, 32); + + /* Clear the context state */ + memset((void *)ctx, 0, sizeof(*ctx)); +} diff --git a/libbutl/sha256c.c.orig b/libbutl/sha256c.c.orig new file mode 100644 index 0000000..da9b02c --- /dev/null +++ b/libbutl/sha256c.c.orig @@ -0,0 +1,316 @@ +/*- + * Copyright 2005 Colin Percival + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include + +#ifdef _KERNEL +#include +#else +#include +#endif + +#include "sha256.h" + +#if BYTE_ORDER == BIG_ENDIAN + +/* Copy a vector of big-endian uint32_t into a vector of bytes */ +#define be32enc_vect(dst, src, len) \ + memcpy((void *)dst, (const void *)src, (size_t)len) + +/* Copy a vector of bytes into a vector of big-endian uint32_t */ +#define be32dec_vect(dst, src, len) \ + memcpy((void *)dst, (const void *)src, (size_t)len) + +#else /* BYTE_ORDER != BIG_ENDIAN */ + +/* + * Encode a length len/4 vector of (uint32_t) into a length len vector of + * (unsigned char) in big-endian form. Assumes len is a multiple of 4. + */ +static void +be32enc_vect(unsigned char *dst, const uint32_t *src, size_t len) +{ + size_t i; + + for (i = 0; i < len / 4; i++) + be32enc(dst + i * 4, src[i]); +} + +/* + * Decode a big-endian length len vector of (unsigned char) into a length + * len/4 vector of (uint32_t). Assumes len is a multiple of 4. + */ +static void +be32dec_vect(uint32_t *dst, const unsigned char *src, size_t len) +{ + size_t i; + + for (i = 0; i < len / 4; i++) + dst[i] = be32dec(src + i * 4); +} + +#endif /* BYTE_ORDER != BIG_ENDIAN */ + +/* Elementary functions used by SHA256 */ +#define Ch(x, y, z) ((x & (y ^ z)) ^ z) +#define Maj(x, y, z) ((x & (y | z)) | (y & z)) +#define SHR(x, n) (x >> n) +#define ROTR(x, n) ((x >> n) | (x << (32 - n))) +#define S0(x) (ROTR(x, 2) ^ ROTR(x, 13) ^ ROTR(x, 22)) +#define S1(x) (ROTR(x, 6) ^ ROTR(x, 11) ^ ROTR(x, 25)) +#define s0(x) (ROTR(x, 7) ^ ROTR(x, 18) ^ SHR(x, 3)) +#define s1(x) (ROTR(x, 17) ^ ROTR(x, 19) ^ SHR(x, 10)) + +/* SHA256 round function */ +#define RND(a, b, c, d, e, f, g, h, k) \ + t0 = h + S1(e) + Ch(e, f, g) + k; \ + t1 = S0(a) + Maj(a, b, c); \ + d += t0; \ + h = t0 + t1; + +/* Adjusted round function for rotating state */ +#define RNDr(S, W, i, k) \ + RND(S[(64 - i) % 8], S[(65 - i) % 8], \ + S[(66 - i) % 8], S[(67 - i) % 8], \ + S[(68 - i) % 8], S[(69 - i) % 8], \ + S[(70 - i) % 8], S[(71 - i) % 8], \ + W[i] + k) + +/* + * SHA256 block compression function. The 256-bit state is transformed via + * the 512-bit input block to produce a new state. + */ +static void +SHA256_Transform(uint32_t * state, const unsigned char block[64]) +{ + uint32_t W[64]; + uint32_t S[8]; + uint32_t t0, t1; + int i; + + /* 1. Prepare message schedule W. */ + be32dec_vect(W, block, 64); + for (i = 16; i < 64; i++) + W[i] = s1(W[i - 2]) + W[i - 7] + s0(W[i - 15]) + W[i - 16]; + + /* 2. Initialize working variables. */ + memcpy(S, state, 32); + + /* 3. Mix. */ + RNDr(S, W, 0, 0x428a2f98); + RNDr(S, W, 1, 0x71374491); + RNDr(S, W, 2, 0xb5c0fbcf); + RNDr(S, W, 3, 0xe9b5dba5); + RNDr(S, W, 4, 0x3956c25b); + RNDr(S, W, 5, 0x59f111f1); + RNDr(S, W, 6, 0x923f82a4); + RNDr(S, W, 7, 0xab1c5ed5); + RNDr(S, W, 8, 0xd807aa98); + RNDr(S, W, 9, 0x12835b01); + RNDr(S, W, 10, 0x243185be); + RNDr(S, W, 11, 0x550c7dc3); + RNDr(S, W, 12, 0x72be5d74); + RNDr(S, W, 13, 0x80deb1fe); + RNDr(S, W, 14, 0x9bdc06a7); + RNDr(S, W, 15, 0xc19bf174); + RNDr(S, W, 16, 0xe49b69c1); + RNDr(S, W, 17, 0xefbe4786); + RNDr(S, W, 18, 0x0fc19dc6); + RNDr(S, W, 19, 0x240ca1cc); + RNDr(S, W, 20, 0x2de92c6f); + RNDr(S, W, 21, 0x4a7484aa); + RNDr(S, W, 22, 0x5cb0a9dc); + RNDr(S, W, 23, 0x76f988da); + RNDr(S, W, 24, 0x983e5152); + RNDr(S, W, 25, 0xa831c66d); + RNDr(S, W, 26, 0xb00327c8); + RNDr(S, W, 27, 0xbf597fc7); + RNDr(S, W, 28, 0xc6e00bf3); + RNDr(S, W, 29, 0xd5a79147); + RNDr(S, W, 30, 0x06ca6351); + RNDr(S, W, 31, 0x14292967); + RNDr(S, W, 32, 0x27b70a85); + RNDr(S, W, 33, 0x2e1b2138); + RNDr(S, W, 34, 0x4d2c6dfc); + RNDr(S, W, 35, 0x53380d13); + RNDr(S, W, 36, 0x650a7354); + RNDr(S, W, 37, 0x766a0abb); + RNDr(S, W, 38, 0x81c2c92e); + RNDr(S, W, 39, 0x92722c85); + RNDr(S, W, 40, 0xa2bfe8a1); + RNDr(S, W, 41, 0xa81a664b); + RNDr(S, W, 42, 0xc24b8b70); + RNDr(S, W, 43, 0xc76c51a3); + RNDr(S, W, 44, 0xd192e819); + RNDr(S, W, 45, 0xd6990624); + RNDr(S, W, 46, 0xf40e3585); + RNDr(S, W, 47, 0x106aa070); + RNDr(S, W, 48, 0x19a4c116); + RNDr(S, W, 49, 0x1e376c08); + RNDr(S, W, 50, 0x2748774c); + RNDr(S, W, 51, 0x34b0bcb5); + RNDr(S, W, 52, 0x391c0cb3); + RNDr(S, W, 53, 0x4ed8aa4a); + RNDr(S, W, 54, 0x5b9cca4f); + RNDr(S, W, 55, 0x682e6ff3); + RNDr(S, W, 56, 0x748f82ee); + RNDr(S, W, 57, 0x78a5636f); + RNDr(S, W, 58, 0x84c87814); + RNDr(S, W, 59, 0x8cc70208); + RNDr(S, W, 60, 0x90befffa); + RNDr(S, W, 61, 0xa4506ceb); + RNDr(S, W, 62, 0xbef9a3f7); + RNDr(S, W, 63, 0xc67178f2); + + /* 4. Mix local working variables into global state */ + for (i = 0; i < 8; i++) + state[i] += S[i]; +} + +static unsigned char PAD[64] = { + 0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +}; + +/* Add padding and terminating bit-count. */ +static void +SHA256_Pad(SHA256_CTX * ctx) +{ + unsigned char len[8]; + uint32_t r, plen; + + /* + * Convert length to a vector of bytes -- we do this now rather + * than later because the length will change after we pad. + */ + be64enc(len, ctx->count); + + /* Add 1--64 bytes so that the resulting length is 56 mod 64 */ + r = (ctx->count >> 3) & 0x3f; + plen = (r < 56) ? (56 - r) : (120 - r); + SHA256_Update(ctx, PAD, (size_t)plen); + + /* Add the terminating bit-count */ + SHA256_Update(ctx, len, 8); +} + +/* SHA-256 initialization. Begins a SHA-256 operation. */ +void +SHA256_Init(SHA256_CTX * ctx) +{ + + /* Zero bits processed so far */ + ctx->count = 0; + + /* Magic initialization constants */ + ctx->state[0] = 0x6A09E667; + ctx->state[1] = 0xBB67AE85; + ctx->state[2] = 0x3C6EF372; + ctx->state[3] = 0xA54FF53A; + ctx->state[4] = 0x510E527F; + ctx->state[5] = 0x9B05688C; + ctx->state[6] = 0x1F83D9AB; + ctx->state[7] = 0x5BE0CD19; +} + +/* Add bytes into the hash */ +void +SHA256_Update(SHA256_CTX * ctx, const void *in, size_t len) +{ + uint64_t bitlen; + uint32_t r; + const unsigned char *src = in; + + /* Number of bytes left in the buffer from previous updates */ + r = (ctx->count >> 3) & 0x3f; + + /* Convert the length into a number of bits */ + bitlen = len << 3; + + /* Update number of bits */ + ctx->count += bitlen; + + /* Handle the case where we don't need to perform any transforms */ + if (len < 64 - r) { + memcpy(&ctx->buf[r], src, len); + return; + } + + /* Finish the current block */ + memcpy(&ctx->buf[r], src, 64 - r); + SHA256_Transform(ctx->state, ctx->buf); + src += 64 - r; + len -= 64 - r; + + /* Perform complete blocks */ + while (len >= 64) { + SHA256_Transform(ctx->state, src); + src += 64; + len -= 64; + } + + /* Copy left over data into buffer */ + memcpy(ctx->buf, src, len); +} + +/* + * SHA-256 finalization. Pads the input data, exports the hash value, + * and clears the context state. + */ +void +SHA256_Final(unsigned char digest[32], SHA256_CTX * ctx) +{ + + /* Add padding */ + SHA256_Pad(ctx); + + /* Write the hash */ + be32enc_vect(digest, ctx->state, 32); + + /* Clear the context state */ + memset((void *)ctx, 0, sizeof(*ctx)); +} + +#ifdef WEAK_REFS +/* When building libmd, provide weak references. Note: this is not + activated in the context of compiling these sources for internal + use in libcrypt. + */ +#undef SHA256_Init +__weak_reference(_libmd_SHA256_Init, SHA256_Init); +#undef SHA256_Update +__weak_reference(_libmd_SHA256_Update, SHA256_Update); +#undef SHA256_Final +__weak_reference(_libmd_SHA256_Final, SHA256_Final); +#undef SHA256_Transform +__weak_reference(_libmd_SHA256_Transform, SHA256_Transform); +#endif diff --git a/libbutl/small-vector.hxx b/libbutl/small-vector.hxx new file mode 100644 index 0000000..f910ad9 --- /dev/null +++ b/libbutl/small-vector.hxx @@ -0,0 +1,283 @@ +// file : libbutl/small-vector.hxx -*- C++ -*- +// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#ifndef LIBBUTL_SMALL_VECTOR_HXX +#define LIBBUTL_SMALL_VECTOR_HXX + +#include +#include +#include // size_t +#include // more(), forward() +#include // true_type + +namespace butl +{ + template + struct small_vector_buffer + { + // Size keeps track of the number of elements that are constructed in + // the buffer. Size equal N + 1 means the buffer is not allocated. + // + // Note that the names are decorated in order no to conflict with + // std::vector interface. + // + alignas (alignof (T)) char data_[sizeof (T) * N]; + bool free_ = true; + + // Note that the buffer should be constructed before std::vector and + // destroyed after (since std::vector's destructor will be destroying + // elements potentially residing in the buffer). This means that the + // buffer should be inherited from and before std::vector. + // + small_vector_buffer () = default; + + small_vector_buffer (small_vector_buffer&&) = delete; + small_vector_buffer (const small_vector_buffer&) = delete; + + small_vector_buffer& operator= (small_vector_buffer&&) = delete; + small_vector_buffer& operator= (const small_vector_buffer&) = delete; + }; + + template + class small_vector_allocator + { + public: + using buffer_type = small_vector_buffer; + + explicit + small_vector_allocator (buffer_type* b) noexcept: buf_ (b) {} + + // Allocator interface. + // + public: + using value_type = T; + + T* + allocate(std::size_t n) + { + assert (n >= N); // We should never be asked for less than N. + + if (n <= N) + { + buf_->free_ = false; + return reinterpret_cast (buf_->data_); + } + else + return static_cast (::operator new (sizeof (T) * n)); + } + + void + deallocate (void* p, std::size_t) noexcept + { + if (p == buf_->data_) + buf_->free_ = true; + else + ::operator delete (p); + } + + friend bool + operator== (small_vector_allocator x, small_vector_allocator y) noexcept + { + // We can use y to deallocate x's allocations if they use the same small + // buffer or neither uses its small buffer (which means all allocations, + // if any, have been from the shared heap). Of course this assumes no + // copy will be called to deallocate what has been allocated after said + // copy was made: + // + // A x; + // A y (x); + // p = x.allocate (); + // y.deallocate (p); // Ouch. + // + return (x.buf_ == y.buf_) || (x.buf_->free_ && y.buf_->free_); + } + + friend bool + operator!= (small_vector_allocator x, small_vector_allocator y) noexcept + { + return !(x == y); + } + + // It might get instantiated but should not be called. + // + small_vector_allocator + select_on_container_copy_construction () const noexcept + { + return small_vector_allocator (nullptr); + } + + // propagate_on_container_copy_assignment = false + // propagate_on_container_move_assignment = false + + // Swap is not supported (see explanation in small_vector::swap()). + // + using propagate_on_container_swap = std::true_type; + + void + swap (small_vector_allocator&) = delete; + + // Shouldn't be needed except to satisfy some static_assert's. + // + template + struct rebind {using other = small_vector_allocator;}; + + private: + buffer_type* buf_; + }; + + // Issues and limitations. + // + // - vector::reserve() may allocate more per the spec. But the three main + // C++ runtimes (libstdc++, libc++, and msvc) all seem to do the right + // thing. + // + // - What if in most cases the vector is empty? How can we avoid initial + // reserve? Provide no_reserve flag or some such? Is it really worth it? + // + // - swap() is deleted (see notes below). + // + template + class small_vector: private small_vector_buffer, + public std::vector> + { + public: + using allocator_type = small_vector_allocator; + using buffer_type = small_vector_buffer; + using base_type = std::vector>; + + small_vector () + : base_type (allocator_type (this)) + { + reserve (); + } + + small_vector (std::initializer_list v) + : base_type (allocator_type (this)) + { + if (v.size () <= N) + reserve (); + + static_cast (*this) = v; + } + + template + small_vector (I b, I e) + : base_type (allocator_type (this)) + { + // While we could optimize this for random access iterators, N will + // usually be pretty small. Let's hope the compiler sees this and does + // some magic for us. + // + std::size_t n (0); + for (I i (b); i != e && n <= N; ++i) ++n; + + if (n <= N) + reserve (); + + this->assign (b, e); + } + + explicit + small_vector (std::size_t n) + : base_type (allocator_type (this)) + { + if (n <= N) + reserve (); + + this->resize (n); + } + + small_vector (std::size_t n, const T& x) + : base_type (allocator_type (this)) + { + if (n <= N) + reserve (); + + this->assign (n, x); + } + + small_vector (const small_vector& v) + : buffer_type (), base_type (allocator_type (this)) + { + if (v.size () <= N) + reserve (); + + static_cast (*this) = v; + } + + small_vector& + operator= (const small_vector& v) + { + // Note: propagate_on_container_copy_assignment = false + // + static_cast (*this) = v; + return *this; + } + + small_vector (small_vector&& v) + : base_type (allocator_type (this)) + { + if (v.size () <= N) + reserve (); + + *this = std::move (v); // Delegate to operator=(&&). + } + + small_vector& + operator= (small_vector&& v) + { + // VC's implementation of operator=(&&) (both 14 and 15) frees the + // memory and then reallocated with capacity equal to v.size(). This is + // clearly sub-optimal (the existing buffer could be reused) so we hope + // this will be fixed eventually (VSO#367146; reportedly fixed for + // VC15U1). + // +#if defined(_MSC_VER) && _MSC_VER <= 1910 + if (v.size () < N) + { + clear (); + for (T& x: v) + push_back (std::move (x)); + v.clear (); + } + else +#endif + + // Note: propagate_on_container_move_assignment = false + // + static_cast (*this) = std::move (v); + + return *this; + } + + small_vector& + operator= (std::initializer_list v) + { + static_cast (*this) = v; + return *this; + } + + // Implementing swap() under small buffer optimization is not trivial, to + // say the least (think of swapping two such buffers of different sizes). + // One easy option would be to force both in to the heap. + // + void + swap (small_vector&) = delete; + + void + reserve (std::size_t n = N) + { + base_type::reserve (n < N ? N : n); + } + + void + shrink_to_fit () + { + if (this->capacity () > N) + base_type::shrink_to_fit (); + } + }; +} + +#endif // LIBBUTL_SMALL_VECTOR_HXX diff --git a/libbutl/standard-version.cxx b/libbutl/standard-version.cxx new file mode 100644 index 0000000..124f3de --- /dev/null +++ b/libbutl/standard-version.cxx @@ -0,0 +1,632 @@ +// file : libbutl/standard-version.cxx -*- C++ -*- +// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#include + +#include +#include // strtoull() +#include // size_t +#include // move() +#include // invalid_argument + +#include // alnum() + +using namespace std; + +namespace butl +{ + // Utility functions + // + static uint64_t + parse_num (const string& s, size_t& p, + const char* m, + uint64_t min = 0, uint64_t max = 999) + { + if (s[p] == '-' || s[p] == '+') // strtoull() allows these. + throw invalid_argument (m); + + const char* b (s.c_str () + p); + char* e (nullptr); + uint64_t r (strtoull (b, &e, 10)); + + if (b == e || r < min || r > max) + throw invalid_argument (m); + + p = e - s.c_str (); + return static_cast (r); + } + + static void + check_version (uint64_t vr, bool sn, standard_version::flags fl) + { + bool r; + if (vr == uint64_t (~0) && (fl & standard_version::allow_stub) != 0) + { + // Stub. + // + // Check that the snapshot flag is false. + // + r = !sn; + } + else + { + // Check that the version isn't too large, unless represents stub. + // + // AAABBBCCCDDDE + r = vr < 10000000000000ULL; + + // Check that E version component is consistent with the snapshot flag. + // Note that if the allow_earliest flag is set, then E can be 1 for the + // snapshot flag being false, denoting the earliest pre-release of the + // version. + // + if (r) + { + uint64_t e (vr % 10); + if ((fl & standard_version::allow_earliest) == 0) + r = e == (sn ? 1 : 0); + else + r = e == 1 || (e == 0 && !sn); + } + + // Check that pre-release number is consistent with the snapshot flag. + // + if (r) + { + uint64_t ab (vr / 10 % 1000); + + // Note that if ab is 0, it can either mean non-pre-release version in + // the absence of snapshot number, or 'a.0' pre-release otherwise. If + // ab is 500, it can only mean 'b.0', that must be followed by a + // snapshot number. + // + if (ab != 0) + r = ab != 500 || sn; + } + + // Check that the major, the minor and the patch versions are not + // simultaneously zeros. + // + if (r) + r = (vr / 10000) != 0; + } + + if (!r) + throw invalid_argument ("invalid project version"); + } + + // standard_version + // + standard_version:: + standard_version (const std::string& s, flags f) + { + auto bail = [] (const char* m) {throw invalid_argument (m);}; + + // Pre-parse the first component to see if the version starts with epoch, + // to keep the subsequent parsing straightforward. + // + bool ep (false); + { + char* e (nullptr); + strtoull (s.c_str (), &e, 10); + ep = *e == '~'; + } + + // Note that here and below p is less or equal n, and so s[p] is always + // valid. + // + size_t p (0), n (s.size ()); + + if (ep) + { + epoch = parse_num (s, p, "invalid epoch", 1, uint16_t (~0)); + ++p; // Skip '~'. + } + + uint16_t ma, mi, bf, ab (0); + bool earliest (false); + + ma = parse_num (s, p, "invalid major version"); + + // The only valid version that has no epoch, contains only the major + // version being equal to zero, that is optionally followed by the plus + // character, is the stub version, unless forbidden. + // + bool stub ((f & allow_stub) != 0 && !ep && ma == 0 && + (p == n || s[p] == '+')); + + if (stub) + version = uint64_t (~0); + else + { + if (s[p] != '.') + bail ("'.' expected after major version"); + + mi = parse_num (s, ++p, "invalid minor version"); + + if (s[p] != '.') + bail ("'.' expected after minor version"); + + bf = parse_num (s, ++p, "invalid patch version"); + + // AAABBBCCCDDDE + version = ma * 10000000000ULL + + mi * 10000000ULL + + bf * 10000ULL; + + if (version == 0) + bail ("0.0.0 version"); + + // Parse the pre-release component if present. + // + if (s[p] == '-') + { + char k (s[++p]); + + // If the last character in the string is dash, then this is the + // earliest version pre-release, unless forbidden. + // + if (k == '\0' && (f & allow_earliest) != 0) + earliest = true; + else + { + if (k != 'a' && k != 'b') + bail ("'a' or 'b' expected in pre-release"); + + if (s[++p] != '.') + bail ("'.' expected after pre-release letter"); + + ab = parse_num (s, ++p, "invalid pre-release", 0, 499); + + if (k == 'b') + ab += 500; + + // Parse the snapshot components if present. Note that pre-release + // number can't be zero for the final pre-release. + // + if (s[p] == '.') + parse_snapshot (s, ++p); + else if (ab == 0 || ab == 500) + bail ("invalid final pre-release"); + } + } + } + + if (s[p] == '+') + { + assert (!earliest); // Would bail out earlier (a or b expected after -). + + revision = parse_num (s, ++p, "invalid revision", 1, uint16_t (~0)); + } + + if (p != n) + bail ("junk after version"); + + if (ab != 0 || snapshot_sn != 0 || earliest) + version -= 10000 - ab * 10; + + if (snapshot_sn != 0 || earliest) + version += 1; + } + + standard_version:: + standard_version (uint64_t v, flags f) + : version (v) + { + check_version (v, false, f); + } + + standard_version:: + standard_version (uint64_t v, const std::string& s, flags f) + : version (v) + { + bool snapshot (!s.empty ()); + check_version (version, snapshot, f); + + if (snapshot) + { + size_t p (0); + parse_snapshot (s, p); + + if (p != s.size ()) + throw invalid_argument ("junk after snapshot"); + } + } + + standard_version:: + standard_version (uint16_t e, + uint64_t v, + const std::string& s, + uint16_t r, + flags f) + : standard_version (v, s, f) + { + if (stub () && e != 0) + throw invalid_argument ("epoch for stub"); + + // Can't initialize above due to ctor delegating. + // + epoch = e; + revision = r; + } + + standard_version:: + standard_version (uint16_t ep, + uint64_t vr, + uint64_t sn, + std::string si, + uint16_t rv, + flags fl) + : epoch (ep), + version (vr), + snapshot_sn (sn), + snapshot_id (move (si)), + revision (rv) + { + check_version (vr, true, fl); + + if (stub ()) + { + if (ep != 0) + throw invalid_argument ("epoch for stub"); + + if (sn != 0) + throw invalid_argument ("snapshot for stub"); + } + + if (!snapshot_id.empty () && (snapshot_id.size () > 16 || + snapshot_sn == 0 || + snapshot_sn == latest_sn)) + throw invalid_argument ("invalid snapshot"); + } + + void standard_version:: + parse_snapshot (const std::string& s, size_t& p) + { + // Note that snapshot id must be empty for 'z' snapshot number. + // + if (s[p] == 'z') + { + snapshot_sn = latest_sn; + ++p; + return; + } + + uint64_t sn (parse_num (s, + p, + "invalid snapshot number", + 1, latest_sn - 1)); + std::string id; + if (s[p] == '.') + { + char c; + for (++p; alnum (c = s[p]); ++p) + id += c; + + if (id.empty () || id.size () > 16) + throw invalid_argument ("invalid snapshot id"); + } + + snapshot_sn = sn; + snapshot_id = move (id); + } + + string standard_version:: + string_pre_release () const + { + std::string r; + + if ((alpha () && !earliest ()) || beta ()) + { + uint64_t ab (version / 10 % 1000); + + if (ab < 500) + { + r += "a."; + r += to_string (ab); + } + else + { + r += "b."; + r += to_string (ab - 500); + } + } + + return r; + } + + string standard_version:: + string_version () const + { + if (empty ()) + return ""; + + if (stub ()) + return "0"; + + std::string r (to_string (major ()) + '.' + to_string (minor ()) + '.' + + to_string (patch ())); + + if (alpha () || beta ()) + { + r += '-'; + r += string_pre_release (); + + if (snapshot ()) + r += '.'; + } + + return r; + } + + string standard_version:: + string_snapshot () const + { + std::string r; + + if (snapshot ()) + { + r = snapshot_sn == latest_sn ? "z" : to_string (snapshot_sn); + + if (!snapshot_id.empty ()) + { + r += '.'; + r += snapshot_id; + } + } + + return r; + } + + string standard_version:: + string_project () const + { + std::string r (string_version ()); + + if (snapshot ()) + r += string_snapshot (); // string_version() includes trailing dot. + + return r; + } + + string standard_version:: + string_project_id () const + { + std::string r (string_version ()); + + if (snapshot ()) // Trailing dot already in id. + { + r += (snapshot_sn == latest_sn ? "z" : + snapshot_id.empty () ? to_string (snapshot_sn) : + snapshot_id); + } + + return r; + } + + string standard_version:: + string () const + { + std::string r; + + if (epoch != 0) + { + r = to_string (epoch); + r += '~'; + } + + r += string_project (); + + if (revision != 0) + { + r += '+'; + r += to_string (revision); + } + + return r; + } + + // standard_version_constraint + // + standard_version_constraint:: + standard_version_constraint (const std::string& s) + { + using std::string; // Not to confuse with string(). + + auto bail = [] (const string& m) {throw invalid_argument (m);}; + const char* spaces (" \t"); + + size_t p (0); + char c (s[p]); + + if (c == '(' || c == '[') // Can be '\0'. + { + bool min_open = c == '('; + + p = s.find_first_not_of (spaces, ++p); + if (p == string::npos) + bail ("no min version"); + + size_t e (s.find_first_of (spaces, p)); + + standard_version min_version; + + try + { + min_version = standard_version (s.substr (p, e - p), + standard_version::allow_earliest); + } + catch (const invalid_argument& e) + { + bail (string ("invalid min version: ") + e.what ()); + } + + p = s.find_first_not_of (spaces, e); + if (p == string::npos) + bail ("no max version"); + + e = s.find_first_of (" \t])", p); + + standard_version max_version; + + try + { + max_version = standard_version (s.substr (p, e - p), + standard_version::allow_earliest); + } + catch (const invalid_argument& e) + { + bail (string ("invalid max version: ") + e.what ()); + } + + p = s.find_first_of ("])", e); + if (p == string::npos) + bail ("no closing bracket"); + + bool max_open = s[p] == ')'; + + if (++p != s.size ()) + bail ("junk after constraint"); + + // Verify and copy the constraint. + // + *this = standard_version_constraint (min_version, min_open, + max_version, max_open); + } + else + { + enum comparison {eq, lt, gt, le, ge}; + comparison operation (eq); // Uninitialized warning. + + if (s.compare (0, p = 2, "==") == 0) + operation = eq; + else if (s.compare (0, p = 2, ">=") == 0) + operation = ge; + else if (s.compare (0, p = 2, "<=") == 0) + operation = le; + else if (s.compare (0, p = 1, ">") == 0) + operation = gt; + else if (s.compare (0, p = 1, "<") == 0) + operation = lt; + else + bail ("invalid constraint"); + + p = s.find_first_not_of (spaces, p); + + if (p == string::npos) + bail ("no version"); + + standard_version v; + + try + { + v = standard_version (s.substr (p), + operation != comparison::eq + ? standard_version::allow_earliest + : standard_version::none); + } + catch (const invalid_argument& e) + { + bail (string ("invalid version: ") + e.what ()); + } + + // Verify and copy the constraint. + // + switch (operation) + { + case comparison::eq: + *this = standard_version_constraint (v); + break; + case comparison::lt: + *this = standard_version_constraint (nullopt, true, move (v), true); + break; + case comparison::le: + *this = standard_version_constraint (nullopt, true, move (v), false); + break; + case comparison::gt: + *this = standard_version_constraint (move (v), true, nullopt, true); + break; + case comparison::ge: + *this = standard_version_constraint (move (v), false, nullopt, true); + break; + } + } + } + + standard_version_constraint:: + standard_version_constraint (optional mnv, bool mno, + optional mxv, bool mxo) + : min_version (move (mnv)), + max_version (move (mxv)), + min_open (mno), + max_open (mxo) + { + assert ( + // Min and max versions can't both be absent. + // + (min_version || max_version) && + + // Version should be non-empty and not a stub. + // + (!min_version || (!min_version->empty () && !min_version->stub ())) && + (!max_version || (!max_version->empty () && !max_version->stub ())) && + + // Absent version endpoint (infinity) should be open. + // + (min_version || min_open) && (max_version || max_open)); + + if (min_version && max_version) + { + if (*min_version > *max_version) + throw invalid_argument ("min version is greater than max version"); + + if (*min_version == *max_version) + { + if (min_open || max_open) + throw invalid_argument ("equal version endpoints not closed"); + + if (min_version->earliest ()) + throw invalid_argument ("equal version endpoints are earliest"); + } + } + } + + string standard_version_constraint:: + string () const + { + assert (!empty ()); + + if (!min_version) + return (max_open ? "< " : "<= ") + max_version->string (); + + if (!max_version) + return (min_open ? "> " : ">= ") + min_version->string (); + + if (*min_version == *max_version) + return "== " + min_version->string (); + + return (min_open ? '(' : '[') + min_version->string () + ' ' + + max_version->string () + (max_open ? ')' : ']'); + } + + bool standard_version_constraint:: + satisfies (const standard_version& v) const noexcept + { + bool s (true); + + if (min_version) + { + int i (v.compare (*min_version)); + s = min_open ? i > 0 : i >= 0; + } + + if (s && max_version) + { + int i (v.compare (*max_version)); + s = max_open ? i < 0 : i <= 0; + } + + return s; + } +} diff --git a/libbutl/standard-version.hxx b/libbutl/standard-version.hxx new file mode 100644 index 0000000..22078f5 --- /dev/null +++ b/libbutl/standard-version.hxx @@ -0,0 +1,257 @@ +// file : libbutl/standard-version.hxx -*- C++ -*- +// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#ifndef LIBBUTL_STANDARD_VERSION_HXX +#define LIBBUTL_STANDARD_VERSION_HXX + +#include +#include // uint*_t +#include // size_t +#include + +#include + +#include + +namespace butl +{ + // The build2 "standard version" (specific, earliest and stub): + // + // [~]..[-(a|b).[.[.]]][+] + // [~]..- + // 0[+] + // + struct LIBBUTL_EXPORT standard_version + { + // Invariants: + // + // 1. allow_earliest + // ? (E == 1) || (snapshot_sn == 0) + // : (E == 0) == (snapshot_sn == 0) + // + // 2. version != 0 || allow_stub && epoch == 0 && snapshot_sn == 0 + // + // 3. snapshot_sn != latest_sn && snapshot_sn != 0 || snapshot_id.empty () + // + static const std::uint64_t latest_sn = std::uint64_t (~0); + + std::uint16_t epoch = 0; // 0 if not specified. + std::uint64_t version = 0; // AAABBBCCCDDDE + std::uint64_t snapshot_sn = 0; // 0 if not specifed, latest_sn if 'z'. + std::string snapshot_id; // Empty if not specified. + std::uint16_t revision = 0; // 0 if not specified. + + std::uint16_t major () const noexcept; + std::uint16_t minor () const noexcept; + std::uint16_t patch () const noexcept; + + // Note: 0 is ambiguous (-a.0.z). + // + std::uint16_t pre_release () const noexcept; + + // Note: return empty if the corresponding component is unspecified. + // + std::string string () const; // Package version. + std::string string_project () const; // Project version (no epoch/rev). + std::string string_project_id () const; // Project version id (no snapsn). + std::string string_version () const; // Version only (no snapshot). + std::string string_pre_release () const; // Pre-release part only (a.1). + std::string string_snapshot () const; // Snapshot part only (1234.1f23). + + bool empty () const noexcept {return version == 0;} + + bool alpha () const noexcept; + bool beta () const noexcept; + bool snapshot () const noexcept {return snapshot_sn != 0;} + + // Represented by DDDE in version being 0001 and snapshot_sn being 0. + // + // Note that the earliest version is a final alpha pre-release. + // + bool + earliest () const noexcept; + + bool + stub () const noexcept {return version == std::uint64_t (~0);} + + // Comparison of empty or stub versions doesn't make sense. + // + int + compare (const standard_version& v) const noexcept + { + if (epoch != v.epoch) + return epoch < v.epoch ? -1 : 1; + + if (version != v.version) + return version < v.version ? -1 : 1; + + if (snapshot_sn != v.snapshot_sn) + return snapshot_sn < v.snapshot_sn ? -1 : 1; + + if (revision != v.revision) + return revision < v.revision ? -1 : 1; + + return 0; + } + + // Parse the version. Throw std::invalid_argument if the format is not + // recognizable or components are invalid. + // + enum flags + { + none = 0, + allow_earliest = 0x01, // Allow ..- form. + allow_stub = 0x02 // Allow 0[+] form. + }; + + explicit + standard_version (const std::string&, flags = none); + + explicit + standard_version (std::uint64_t version, flags = none); + + standard_version (std::uint64_t version, + const std::string& snapshot, + flags = none); + + standard_version (std::uint16_t epoch, + std::uint64_t version, + const std::string& snapshot, + std::uint16_t revision, + flags = none); + + standard_version (std::uint16_t epoch, + std::uint64_t version, + std::uint64_t snapshot_sn, + std::string snapshot_id, + std::uint16_t revision, + flags = none); + + // Create empty version. + // + standard_version () = default; + + private: + void + parse_snapshot (const std::string&, std::size_t&); + }; + + inline bool + operator< (const standard_version& x, const standard_version& y) noexcept + { + return x.compare (y) < 0; + } + + inline bool + operator> (const standard_version& x, const standard_version& y) noexcept + { + return x.compare (y) > 0; + } + + inline bool + operator== (const standard_version& x, const standard_version& y) noexcept + { + return x.compare (y) == 0; + } + + inline bool + operator<= (const standard_version& x, const standard_version& y) noexcept + { + return x.compare (y) <= 0; + } + + inline bool + operator>= (const standard_version& x, const standard_version& y) noexcept + { + return x.compare (y) >= 0; + } + + inline bool + operator!= (const standard_version& x, const standard_version& y) noexcept + { + return !(x == y); + } + + inline std::ostream& + operator<< (std::ostream& o, const standard_version& x) + { + return o << x.string (); + } + + inline standard_version::flags + operator& (standard_version::flags, standard_version::flags); + + inline standard_version::flags + operator| (standard_version::flags, standard_version::flags); + + inline standard_version::flags + operator&= (standard_version::flags&, standard_version::flags); + + inline standard_version::flags + operator|= (standard_version::flags&, standard_version::flags); + + // The build2 "standard version" constraint: + // + // ('==' | '>' | '<' | '>=' | '<=') + // ('(' | '[') (')' | ']') + // + struct LIBBUTL_EXPORT standard_version_constraint + { + butl::optional min_version; + butl::optional max_version; + bool min_open; + bool max_open; + + // Parse the version constraint. Throw std::invalid_argument on error. + // + explicit + standard_version_constraint (const std::string&); + + // Throw std::invalid_argument if the specified version range is invalid. + // + standard_version_constraint ( + butl::optional min_version, bool min_open, + butl::optional max_version, bool max_open); + + explicit + standard_version_constraint (const standard_version& v) + : standard_version_constraint (v, false, v, false) {} + + standard_version_constraint () = default; + + std::string + string () const; + + bool + empty () const noexcept {return !min_version && !max_version;} + + bool + satisfies (const standard_version&) const noexcept; + }; + + inline bool + operator== (const standard_version_constraint& x, + const standard_version_constraint& y) + { + return x.min_version == y.min_version && x.max_version == y.max_version && + x.min_open == y.min_open && x.max_open == y.max_open; + } + + inline bool + operator!= (const standard_version_constraint& x, + const standard_version_constraint& y) + { + return !(x == y); + } + + inline std::ostream& + operator<< (std::ostream& o, const standard_version_constraint& x) + { + return o << x.string (); + } +} + +#include + +#endif // LIBBUTL_STANDARD_VERSION_HXX diff --git a/libbutl/standard-version.ixx b/libbutl/standard-version.ixx new file mode 100644 index 0000000..5f656fa --- /dev/null +++ b/libbutl/standard-version.ixx @@ -0,0 +1,100 @@ +// file : libbutl/standard-version.ixx -*- C++ -*- +// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +namespace butl +{ + inline std::uint16_t standard_version:: + major () const noexcept + { + std::uint64_t e (version % 10); + std::uint64_t v (version / 10); + std::uint64_t ab (v % 1000); + if (ab != 0 || e == 1) + v += 1000 - ab; + + return static_cast (v / 1000000000 % 1000); + } + + inline std::uint16_t standard_version:: + minor () const noexcept + { + std::uint64_t e (version % 10); + std::uint64_t v (version / 10); + std::uint64_t ab (v % 1000); + if (ab != 0 || e == 1) + v += 1000 - ab; + + return static_cast (v / 1000000 % 1000); + } + + inline std::uint16_t standard_version:: + patch () const noexcept + { + std::uint64_t e (version % 10); + std::uint64_t v (version / 10); + std::uint64_t ab (v % 1000); + if (ab != 0 || e == 1) + v += 1000 - ab; + + return static_cast (v / 1000 % 1000); + } + + inline std::uint16_t standard_version:: + pre_release () const noexcept + { + std::uint64_t ab (version / 10 % 1000); + if (ab > 500) + ab -= 500; + + return static_cast (ab); + } + + inline bool standard_version:: + alpha () const noexcept + { + std::uint64_t abe (version % 10000); + return abe > 0 && abe < 5000 && !stub (); + } + + inline bool standard_version:: + beta () const noexcept + { + std::uint64_t abe (version % 10000); + return abe > 5000 && !stub (); + } + + inline bool standard_version:: + earliest () const noexcept + { + return version % 10000 == 1 && !snapshot () && !stub (); + } + + inline standard_version::flags + operator& (standard_version::flags x, standard_version::flags y) + { + return x &= y; + } + + inline standard_version::flags + operator| (standard_version::flags x, standard_version::flags y) + { + return x |= y; + } + + inline standard_version::flags + operator&= (standard_version::flags& x, standard_version::flags y) + { + return x = static_cast ( + static_cast (x) & + static_cast (y)); + } + + inline standard_version::flags + operator|= (standard_version::flags& x, standard_version::flags y) + { + return x = static_cast ( + static_cast (x) | + static_cast (y)); + } +} diff --git a/libbutl/string-parser.cxx b/libbutl/string-parser.cxx new file mode 100644 index 0000000..c579db0 --- /dev/null +++ b/libbutl/string-parser.cxx @@ -0,0 +1,132 @@ +// file : libbutl/string-parser.cxx -*- C++ -*- +// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#include + +#include // move() + +using namespace std; + +namespace butl +{ + // Utility functions + // + inline static bool + space (char c) noexcept + { + return c == ' ' || c == '\t'; + } + + // string_parser + // + vector> string_parser:: + parse_quoted_position (const string& s, bool unquote) + { + vector> r; + for (auto b (s.begin ()), i (b), e (s.end ()); i != e; ) + { + for (; i != e && space (*i); ++i) ; // Skip spaces. + + if (i == e) // No more strings. + break; + + string s; + char quoting ('\0'); // Current quoting mode, can be used as bool. + size_t pos (i - b); // String position. + + for (; i != e; ++i) + { + char c (*i); + + if (!quoting) + { + if (space (c)) // End of string. + break; + + if (c == '"' || c == '\'') // Begin of quoted substring. + { + quoting = c; + + if (!unquote) + s += c; + + continue; + } + } + else if (c == quoting) // End of quoted substring. + { + quoting = '\0'; + + if (!unquote) + s += c; + + continue; + } + + s += c; + } + + if (quoting) + throw invalid_string (i - b, "unterminated quoted string"); + + r.emplace_back (move (s), pos); + } + + return r; + } + + vector string_parser:: + parse_quoted (const string& s, bool unquote) + { + vector> sp (parse_quoted_position (s, unquote)); + + vector r; + r.reserve (sp.size ()); + for (auto& s: sp) + r.emplace_back (move (s.first)); + + return r; + } + + string string_parser:: + unquote (const string& s) + { + string r; + char quoting ('\0'); // Current quoting mode, can be used as bool. + + for (auto i (s.begin ()), e (s.end ()); i != e; ++i) + { + char c (*i); + + if (!quoting) + { + if (c == '"' || c == '\'') // Begin of quoted substring. + { + quoting = c; + continue; + } + } + else if (c == quoting) // End of quoted substring. + { + quoting = '\0'; + continue; + } + + r += c; + } + + return r; + } + + vector string_parser:: + unquote (const vector& v) + { + vector r; + r.reserve (v.size ()); + for (auto& s: v) + r.emplace_back (unquote (s)); + + return r; + } +} diff --git a/libbutl/string-parser.hxx b/libbutl/string-parser.hxx new file mode 100644 index 0000000..56ba348 --- /dev/null +++ b/libbutl/string-parser.hxx @@ -0,0 +1,56 @@ +// file : libbutl/string-parser.hxx -*- C++ -*- +// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#ifndef LIBBUTL_STRING_PARSER_HXX +#define LIBBUTL_STRING_PARSER_HXX + +#include +#include +#include // size_t +#include // pair +#include // invalid_argument + +#include + +namespace butl +{ + class LIBBUTL_EXPORT invalid_string: public std::invalid_argument + { + public: + invalid_string (std::size_t p, const std::string& d) + : invalid_argument (d), position (p) {} + + std::size_t position; // Zero-based. + }; + + class LIBBUTL_EXPORT string_parser + { + public: + // Parse a whitespace-separated list of strings. Can contain single or + // double quoted substrings. No escaping is supported. If unquote is true, + // return one-level unquoted values. Throw invalid_string in case of + // invalid quoting. + // + static std::vector + parse_quoted (const std::string&, bool unquote); + + // As above but return a list of string and zero-based position pairs. + // Position is useful for issuing diagnostics about an invalid string + // during second-level parsing. + // + static std::vector> + parse_quoted_position (const std::string&, bool unquote); + + // Remove a single level of quotes. Note that the format or the + // correctness of the quotation is not validated. + // + static std::string + unquote (const std::string&); + + static std::vector + unquote (const std::vector&); + }; +} + +#endif // LIBBUTL_STRING_PARSER_HXX diff --git a/libbutl/string-table.hxx b/libbutl/string-table.hxx new file mode 100644 index 0000000..6898a52 --- /dev/null +++ b/libbutl/string-table.hxx @@ -0,0 +1,98 @@ +// file : libbutl/string-table.hxx -*- C++ -*- +// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#ifndef LIBBUTL_STRING_TABLE_HXX +#define LIBBUTL_STRING_TABLE_HXX + +#include +#include +#include + +#include + +#include + +namespace butl +{ + // A pool of strings and, optionally, other accompanying data in which + // each entry is assigned an individual index (or id) of type I (e.g., + // uint8_t, uint16_t, etc., depending on how many entries are expected). + // Index value 0 is reserved to indicate the "no entry" condition. + // + template + struct string_table_element + { + const I i; + const D d; + }; + + template + struct string_table_element + { + const I i; + const std::string d; + }; + + // For custom data the options are to call the data member 'key' or to + // specialize this traits. + // + template + struct string_table_traits + { + static const std::string& + key (const D& d) {return d.key;} + }; + + template <> + struct string_table_traits + { + static const std::string& + key (const std::string& d) {return d;} + }; + + template + struct string_table + { + // Insert new entry unless one already exists. + // + I + insert (const D&); + + // Find existing. + // + I + find (const std::string& k) const + { + auto i (map_.find (key_type (&k))); + return i != map_.end () ? i->second.i : 0; + } + + // Reverse lookup. + // + const D& + operator[] (I i) const {assert (i > 0); return vec_[i - 1]->second.d;} + + I + size () const {return static_cast (vec_.size ());} + + bool + empty () const {return vec_.empty ();} + + void + clear () {vec_.clear (); map_.clear ();} + + private: + using key_type = butl::map_key; + using value_type = string_table_element; + using map_type = std::unordered_map; + using traits = string_table_traits; + + map_type map_; + std::vector vec_; + }; +} + +#include + +#endif // LIBBUTL_STRING_TABLE_HXX diff --git a/libbutl/string-table.txx b/libbutl/string-table.txx new file mode 100644 index 0000000..b248ef3 --- /dev/null +++ b/libbutl/string-table.txx @@ -0,0 +1,33 @@ +// file : libbutl/string-table.txx -*- C++ -*- +// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#include // numeric_limits +#include // size_t +#include + +namespace butl +{ + template + I string_table:: + insert (const D& d) + { + std::size_t i (vec_.size () + 1); + + // Note: move(d) would be tricky since the key still points to it. + // + auto r (map_.emplace ( + key_type (&traits::key (d)), + value_type {static_cast (i), d})); + + if (r.second) + { + assert (i <= std::numeric_limits::max ()); + + r.first->first.p = &traits::key (r.first->second.d); // Update key. + vec_.push_back (r.first); + } + + return r.first->second.i; + } +} diff --git a/libbutl/strptime.c b/libbutl/strptime.c new file mode 100644 index 0000000..8bbfac5 --- /dev/null +++ b/libbutl/strptime.c @@ -0,0 +1,629 @@ +/*- + * Copyright (c) 2014 Gary Mills + * Copyright 2011, Nexenta Systems, Inc. All rights reserved. + * Copyright (c) 1994 Powerdog Industries. All rights reserved. + * + * Copyright (c) 2011 The FreeBSD Foundation + * All rights reserved. + * Portions of this software were developed by David Chisnall + * under sponsorship from the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY POWERDOG INDUSTRIES ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE POWERDOG INDUSTRIES BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * The views and conclusions contained in the software and documentation + * are those of the authors and should not be interpreted as representing + * official policies, either expressed or implied, of Powerdog Industries. + */ + +// Fallback implementation of strptime() designed for Windows/gcc combination +// when neither POSIX strptime() nor proper std::get_time() are available. No +// locale support is provided, so the function works as if "C" locale is set. +// FreeBSD libc strptime.c source file is taken as a basis (saved in +// strptime.orig). POSIX non-compliant extensions are removed. Most of +// #include directives are removed as reference internal headers, some +// #define directives are added instead. The content of included +// timelocal.{h,c} files is mostly commented out, having just required +// lc_time_T struct and _C_time_locale constant definitions left. Otherwise the +// code is kept untouched as much as possible. +// + +#include // isspace(), isdigit() +#include // _strnicmp() + +#include "timelocal.h" // lc_time_T +#include "timelocal.c" // _C_time_locale + +#define isspace_l(c, l) isspace(c) +#define isdigit_l(c, l) isdigit(c) +#define strncasecmp_l(s1, s2, n, l) _strnicmp(s1, s2, n) + +#define isleap(y) ((((y) % 4) == 0 && ((y) % 100) != 0) || ((y) % 400) == 0) +#define FIX_LOCALE(l) l=l + +#define TM_YEAR_BASE 1900 +#define TM_SUNDAY 0 +#define TM_MONDAY 1 + +typedef unsigned char u_char; + +// Stubs replacing the real locale support. +// +typedef const lc_time_T* locale_t; + +static locale_t +__get_locale () +{ + return &_C_time_locale; +} + +static const struct lc_time_T * +__get_current_time_locale (locale_t l) +{ + return l; +} + +// From this point the code is unchanged, if not to count non-standard +// specifiers support removal and the replacement of tabs with spaces. +// +static char * +_strptime(const char *, const char *, struct tm *, int *, locale_t); + +#define asizeof(a) ((int)(sizeof(a) / sizeof((a)[0]))) + +#define FLAG_NONE (1 << 0) +#define FLAG_YEAR (1 << 1) +#define FLAG_MONTH (1 << 2) +#define FLAG_YDAY (1 << 3) +#define FLAG_MDAY (1 << 4) +#define FLAG_WDAY (1 << 5) + +/* + * Calculate the week day of the first day of a year. Valid for + * the Gregorian calendar, which began Sept 14, 1752 in the UK + * and its colonies. Ref: + * http://en.wikipedia.org/wiki/Determination_of_the_day_of_the_week + */ + +static int +first_wday_of(int year) +{ + return (((2 * (3 - (year / 100) % 4)) + (year % 100) + + ((year % 100) / 4) + (isleap(year) ? 6 : 0) + 1) % 7); +} + +// Remove Glibc extensions (%F, %Z, %z, %s) to make implementation compliant +// with POSIX strptime() and to simplify porting. +// +static char * +_strptime(const char *buf, const char *fmt, struct tm *tm, int *GMTp, + locale_t locale) +{ + char c; + const char *ptr; + int day_offset = -1, wday_offset; + int week_offset; + int i, len; + int flags; + int Ealternative, Oalternative; + const struct lc_time_T *tptr = __get_current_time_locale(locale); + static int start_of_month[2][13] = { + {0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365}, + {0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366} + }; + + flags = FLAG_NONE; + + ptr = fmt; + while (*ptr != 0) { + c = *ptr++; + + if (c != '%') { + if (isspace_l((unsigned char)c, locale)) + while (*buf != 0 && + isspace_l((unsigned char)*buf, locale)) + buf++; + else if (c != *buf++) + return (NULL); + continue; + } + + // Skip according to + // http://pubs.opengroup.org/onlinepubs/9699919799/functions/strptime.html + // + c = *ptr; + if (c == '+' || c == '0') + ptr++; + + Ealternative = 0; + Oalternative = 0; +label: + c = *ptr++; + switch (c) { + case '%': + if (*buf++ != '%') + return (NULL); + break; + + case 'C': + if (!isdigit_l((unsigned char)*buf, locale)) + return (NULL); + + /* XXX This will break for 3-digit centuries. */ + len = 2; + for (i = 0; len && *buf != 0 && + isdigit_l((unsigned char)*buf, locale); buf++) { + i *= 10; + i += *buf - '0'; + len--; + } + if (i < 19) + return (NULL); + + tm->tm_year = i * 100 - TM_YEAR_BASE; + flags |= FLAG_YEAR; + + break; + + case 'c': + buf = _strptime(buf, tptr->c_fmt, tm, GMTp, locale); + if (buf == NULL) + return (NULL); + flags |= FLAG_WDAY | FLAG_MONTH | FLAG_MDAY | FLAG_YEAR; + break; + + case 'D': + buf = _strptime(buf, "%m/%d/%y", tm, GMTp, locale); + if (buf == NULL) + return (NULL); + flags |= FLAG_MONTH | FLAG_MDAY | FLAG_YEAR; + break; + + case 'E': + if (Ealternative || Oalternative) + break; + Ealternative++; + goto label; + + case 'O': + if (Ealternative || Oalternative) + break; + Oalternative++; + goto label; + + case 'R': + buf = _strptime(buf, "%H:%M", tm, GMTp, locale); + if (buf == NULL) + return (NULL); + break; + + case 'r': + buf = _strptime(buf, tptr->ampm_fmt, tm, GMTp, locale); + if (buf == NULL) + return (NULL); + break; + + case 'T': + buf = _strptime(buf, "%H:%M:%S", tm, GMTp, locale); + if (buf == NULL) + return (NULL); + break; + + case 'X': + buf = _strptime(buf, tptr->X_fmt, tm, GMTp, locale); + if (buf == NULL) + return (NULL); + break; + + case 'x': + buf = _strptime(buf, tptr->x_fmt, tm, GMTp, locale); + if (buf == NULL) + return (NULL); + flags |= FLAG_MONTH | FLAG_MDAY | FLAG_YEAR; + break; + + case 'j': + if (!isdigit_l((unsigned char)*buf, locale)) + return (NULL); + + len = 3; + for (i = 0; len && *buf != 0 && + isdigit_l((unsigned char)*buf, locale); buf++){ + i *= 10; + i += *buf - '0'; + len--; + } + if (i < 1 || i > 366) + return (NULL); + + tm->tm_yday = i - 1; + flags |= FLAG_YDAY; + + break; + + case 'M': + case 'S': + if (*buf == 0 || + isspace_l((unsigned char)*buf, locale)) + break; + + if (!isdigit_l((unsigned char)*buf, locale)) + return (NULL); + + len = 2; + for (i = 0; len && *buf != 0 && + isdigit_l((unsigned char)*buf, locale); buf++){ + i *= 10; + i += *buf - '0'; + len--; + } + + if (c == 'M') { + if (i > 59) + return (NULL); + tm->tm_min = i; + } else { + if (i > 60) + return (NULL); + tm->tm_sec = i; + } + + break; + + case 'H': + case 'I': + case 'k': + case 'l': + /* + * Of these, %l is the only specifier explicitly + * documented as not being zero-padded. However, + * there is no harm in allowing zero-padding. + * + * XXX The %l specifier may gobble one too many + * digits if used incorrectly. + */ + if (!isdigit_l((unsigned char)*buf, locale)) + return (NULL); + + len = 2; + for (i = 0; len && *buf != 0 && + isdigit_l((unsigned char)*buf, locale); buf++) { + i *= 10; + i += *buf - '0'; + len--; + } + if (c == 'H' || c == 'k') { + if (i > 23) + return (NULL); + } else if (i > 12) + return (NULL); + + tm->tm_hour = i; + + break; + + case 'p': + /* + * XXX This is bogus if parsed before hour-related + * specifiers. + */ + len = strlen(tptr->am); + if (strncasecmp_l(buf, tptr->am, len, locale) == 0) { + if (tm->tm_hour > 12) + return (NULL); + if (tm->tm_hour == 12) + tm->tm_hour = 0; + buf += len; + break; + } + + len = strlen(tptr->pm); + if (strncasecmp_l(buf, tptr->pm, len, locale) == 0) { + if (tm->tm_hour > 12) + return (NULL); + if (tm->tm_hour != 12) + tm->tm_hour += 12; + buf += len; + break; + } + + return (NULL); + + case 'A': + case 'a': + for (i = 0; i < asizeof(tptr->weekday); i++) { + len = strlen(tptr->weekday[i]); + if (strncasecmp_l(buf, tptr->weekday[i], + len, locale) == 0) + break; + len = strlen(tptr->wday[i]); + if (strncasecmp_l(buf, tptr->wday[i], + len, locale) == 0) + break; + } + if (i == asizeof(tptr->weekday)) + return (NULL); + + buf += len; + tm->tm_wday = i; + flags |= FLAG_WDAY; + break; + + case 'U': + case 'W': + /* + * XXX This is bogus, as we can not assume any valid + * information present in the tm structure at this + * point to calculate a real value, so just check the + * range for now. + */ + if (!isdigit_l((unsigned char)*buf, locale)) + return (NULL); + + len = 2; + for (i = 0; len && *buf != 0 && + isdigit_l((unsigned char)*buf, locale); buf++) { + i *= 10; + i += *buf - '0'; + len--; + } + if (i > 53) + return (NULL); + + if (c == 'U') + day_offset = TM_SUNDAY; + else + day_offset = TM_MONDAY; + + + week_offset = i; + + break; + + case 'w': + if (!isdigit_l((unsigned char)*buf, locale)) + return (NULL); + + i = *buf - '0'; + if (i > 6) + return (NULL); + + tm->tm_wday = i; + flags |= FLAG_WDAY; + + break; + + case 'e': + /* + * With %e format, our strftime(3) adds a blank space + * before single digits. + */ + if (*buf != 0 && + isspace_l((unsigned char)*buf, locale)) + buf++; + /* FALLTHROUGH */ + case 'd': + /* + * The %e specifier was once explicitly documented as + * not being zero-padded but was later changed to + * equivalent to %d. There is no harm in allowing + * such padding. + * + * XXX The %e specifier may gobble one too many + * digits if used incorrectly. + */ + if (!isdigit_l((unsigned char)*buf, locale)) + return (NULL); + + len = 2; + for (i = 0; len && *buf != 0 && + isdigit_l((unsigned char)*buf, locale); buf++) { + i *= 10; + i += *buf - '0'; + len--; + } + if (i > 31) + return (NULL); + + tm->tm_mday = i; + flags |= FLAG_MDAY; + + break; + + case 'B': + case 'b': + case 'h': + for (i = 0; i < asizeof(tptr->month); i++) { + if (Oalternative) { + if (c == 'B') { + len = strlen(tptr->alt_month[i]); + if (strncasecmp_l(buf, + tptr->alt_month[i], + len, locale) == 0) + break; + } + } else { + len = strlen(tptr->month[i]); + if (strncasecmp_l(buf, tptr->month[i], + len, locale) == 0) + break; + } + } + /* + * Try the abbreviated month name if the full name + * wasn't found and Oalternative was not requested. + */ + if (i == asizeof(tptr->month) && !Oalternative) { + for (i = 0; i < asizeof(tptr->month); i++) { + len = strlen(tptr->mon[i]); + if (strncasecmp_l(buf, tptr->mon[i], + len, locale) == 0) + break; + } + } + if (i == asizeof(tptr->month)) + return (NULL); + + tm->tm_mon = i; + buf += len; + flags |= FLAG_MONTH; + + break; + + case 'm': + if (!isdigit_l((unsigned char)*buf, locale)) + return (NULL); + + len = 2; + for (i = 0; len && *buf != 0 && + isdigit_l((unsigned char)*buf, locale); buf++) { + i *= 10; + i += *buf - '0'; + len--; + } + if (i < 1 || i > 12) + return (NULL); + + tm->tm_mon = i - 1; + flags |= FLAG_MONTH; + + break; + + case 'Y': + case 'y': + if (*buf == 0 || + isspace_l((unsigned char)*buf, locale)) + break; + + if (!isdigit_l((unsigned char)*buf, locale)) + return (NULL); + + len = (c == 'Y') ? 4 : 2; + for (i = 0; len && *buf != 0 && + isdigit_l((unsigned char)*buf, locale); buf++) { + i *= 10; + i += *buf - '0'; + len--; + } + if (c == 'Y') + i -= TM_YEAR_BASE; + if (c == 'y' && i < 69) + i += 100; + if (i < 0) + return (NULL); + + tm->tm_year = i; + flags |= FLAG_YEAR; + + break; + + case 'n': + case 't': + while (isspace_l((unsigned char)*buf, locale)) + buf++; + break; + + default: + return (NULL); + } + } + + if (!(flags & FLAG_YDAY) && (flags & FLAG_YEAR)) { + if ((flags & (FLAG_MONTH | FLAG_MDAY)) == + (FLAG_MONTH | FLAG_MDAY)) { + tm->tm_yday = start_of_month[isleap(tm->tm_year + + TM_YEAR_BASE)][tm->tm_mon] + (tm->tm_mday - 1); + flags |= FLAG_YDAY; + } else if (day_offset != -1) { + /* Set the date to the first Sunday (or Monday) + * of the specified week of the year. + */ + if (!(flags & FLAG_WDAY)) { + tm->tm_wday = day_offset; + flags |= FLAG_WDAY; + } + tm->tm_yday = (7 - + first_wday_of(tm->tm_year + TM_YEAR_BASE) + + day_offset) % 7 + (week_offset - 1) * 7 + + tm->tm_wday - day_offset; + flags |= FLAG_YDAY; + } + } + + if ((flags & (FLAG_YEAR | FLAG_YDAY)) == (FLAG_YEAR | FLAG_YDAY)) { + if (!(flags & FLAG_MONTH)) { + i = 0; + while (tm->tm_yday >= + start_of_month[isleap(tm->tm_year + + TM_YEAR_BASE)][i]) + i++; + if (i > 12) { + i = 1; + tm->tm_yday -= + start_of_month[isleap(tm->tm_year + + TM_YEAR_BASE)][12]; + tm->tm_year++; + } + tm->tm_mon = i - 1; + flags |= FLAG_MONTH; + } + if (!(flags & FLAG_MDAY)) { + tm->tm_mday = tm->tm_yday - + start_of_month[isleap(tm->tm_year + TM_YEAR_BASE)] + [tm->tm_mon] + 1; + flags |= FLAG_MDAY; + } + if (!(flags & FLAG_WDAY)) { + i = 0; + wday_offset = first_wday_of(tm->tm_year); + while (i++ <= tm->tm_yday) { + if (wday_offset++ >= 6) + wday_offset = 0; + } + tm->tm_wday = wday_offset; + flags |= FLAG_WDAY; + } + } + + return ((char *)buf); +} + +static char * +strptime_l(const char * __restrict buf, const char * __restrict fmt, + struct tm * __restrict tm, locale_t loc) +{ + char *ret; + int gmt; + FIX_LOCALE(loc); + + gmt = 0; + ret = _strptime(buf, fmt, tm, &gmt, loc); + + return (ret); +} + +static char * +strptime(const char * __restrict buf, const char * __restrict fmt, + struct tm * __restrict tm) +{ + return strptime_l(buf, fmt, tm, __get_locale()); +} diff --git a/libbutl/strptime.c.orig b/libbutl/strptime.c.orig new file mode 100644 index 0000000..2be6358 --- /dev/null +++ b/libbutl/strptime.c.orig @@ -0,0 +1,689 @@ +/*- + * Copyright (c) 2014 Gary Mills + * Copyright 2011, Nexenta Systems, Inc. All rights reserved. + * Copyright (c) 1994 Powerdog Industries. All rights reserved. + * + * Copyright (c) 2011 The FreeBSD Foundation + * All rights reserved. + * Portions of this software were developed by David Chisnall + * under sponsorship from the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY POWERDOG INDUSTRIES ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE POWERDOG INDUSTRIES BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * The views and conclusions contained in the software and documentation + * are those of the authors and should not be interpreted as representing + * official policies, either expressed or implied, of Powerdog Industries. + */ + +#include +#ifndef lint +#ifndef NOID +static char copyright[] __unused = +"@(#) Copyright (c) 1994 Powerdog Industries. All rights reserved."; +static char sccsid[] __unused = "@(#)strptime.c 0.1 (Powerdog) 94/03/27"; +#endif /* !defined NOID */ +#endif /* not lint */ +__FBSDID("$FreeBSD$"); + +#include "namespace.h" +#include +#include +#include +#include +#include +#include +#include "un-namespace.h" +#include "libc_private.h" +#include "timelocal.h" +#include "tzfile.h" + +static char * _strptime(const char *, const char *, struct tm *, int *, locale_t); + +#define asizeof(a) (sizeof(a) / sizeof((a)[0])) + +#define FLAG_NONE (1 << 0) +#define FLAG_YEAR (1 << 1) +#define FLAG_MONTH (1 << 2) +#define FLAG_YDAY (1 << 3) +#define FLAG_MDAY (1 << 4) +#define FLAG_WDAY (1 << 5) + +/* + * Calculate the week day of the first day of a year. Valid for + * the Gregorian calendar, which began Sept 14, 1752 in the UK + * and its colonies. Ref: + * http://en.wikipedia.org/wiki/Determination_of_the_day_of_the_week + */ + +static int +first_wday_of(int year) +{ + return (((2 * (3 - (year / 100) % 4)) + (year % 100) + + ((year % 100) / 4) + (isleap(year) ? 6 : 0) + 1) % 7); +} + +static char * +_strptime(const char *buf, const char *fmt, struct tm *tm, int *GMTp, + locale_t locale) +{ + char c; + const char *ptr; + int day_offset = -1, wday_offset; + int week_offset; + int i, len; + int flags; + int Ealternative, Oalternative; + const struct lc_time_T *tptr = __get_current_time_locale(locale); + static int start_of_month[2][13] = { + {0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365}, + {0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366} + }; + + flags = FLAG_NONE; + + ptr = fmt; + while (*ptr != 0) { + c = *ptr++; + + if (c != '%') { + if (isspace_l((unsigned char)c, locale)) + while (*buf != 0 && + isspace_l((unsigned char)*buf, locale)) + buf++; + else if (c != *buf++) + return (NULL); + continue; + } + + Ealternative = 0; + Oalternative = 0; +label: + c = *ptr++; + switch (c) { + case '%': + if (*buf++ != '%') + return (NULL); + break; + + case '+': + buf = _strptime(buf, tptr->date_fmt, tm, GMTp, locale); + if (buf == NULL) + return (NULL); + flags |= FLAG_WDAY | FLAG_MONTH | FLAG_MDAY | FLAG_YEAR; + break; + + case 'C': + if (!isdigit_l((unsigned char)*buf, locale)) + return (NULL); + + /* XXX This will break for 3-digit centuries. */ + len = 2; + for (i = 0; len && *buf != 0 && + isdigit_l((unsigned char)*buf, locale); buf++) { + i *= 10; + i += *buf - '0'; + len--; + } + if (i < 19) + return (NULL); + + tm->tm_year = i * 100 - TM_YEAR_BASE; + flags |= FLAG_YEAR; + + break; + + case 'c': + buf = _strptime(buf, tptr->c_fmt, tm, GMTp, locale); + if (buf == NULL) + return (NULL); + flags |= FLAG_WDAY | FLAG_MONTH | FLAG_MDAY | FLAG_YEAR; + break; + + case 'D': + buf = _strptime(buf, "%m/%d/%y", tm, GMTp, locale); + if (buf == NULL) + return (NULL); + flags |= FLAG_MONTH | FLAG_MDAY | FLAG_YEAR; + break; + + case 'E': + if (Ealternative || Oalternative) + break; + Ealternative++; + goto label; + + case 'O': + if (Ealternative || Oalternative) + break; + Oalternative++; + goto label; + + case 'F': + buf = _strptime(buf, "%Y-%m-%d", tm, GMTp, locale); + if (buf == NULL) + return (NULL); + flags |= FLAG_MONTH | FLAG_MDAY | FLAG_YEAR; + break; + + case 'R': + buf = _strptime(buf, "%H:%M", tm, GMTp, locale); + if (buf == NULL) + return (NULL); + break; + + case 'r': + buf = _strptime(buf, tptr->ampm_fmt, tm, GMTp, locale); + if (buf == NULL) + return (NULL); + break; + + case 'T': + buf = _strptime(buf, "%H:%M:%S", tm, GMTp, locale); + if (buf == NULL) + return (NULL); + break; + + case 'X': + buf = _strptime(buf, tptr->X_fmt, tm, GMTp, locale); + if (buf == NULL) + return (NULL); + break; + + case 'x': + buf = _strptime(buf, tptr->x_fmt, tm, GMTp, locale); + if (buf == NULL) + return (NULL); + flags |= FLAG_MONTH | FLAG_MDAY | FLAG_YEAR; + break; + + case 'j': + if (!isdigit_l((unsigned char)*buf, locale)) + return (NULL); + + len = 3; + for (i = 0; len && *buf != 0 && + isdigit_l((unsigned char)*buf, locale); buf++){ + i *= 10; + i += *buf - '0'; + len--; + } + if (i < 1 || i > 366) + return (NULL); + + tm->tm_yday = i - 1; + flags |= FLAG_YDAY; + + break; + + case 'M': + case 'S': + if (*buf == 0 || + isspace_l((unsigned char)*buf, locale)) + break; + + if (!isdigit_l((unsigned char)*buf, locale)) + return (NULL); + + len = 2; + for (i = 0; len && *buf != 0 && + isdigit_l((unsigned char)*buf, locale); buf++){ + i *= 10; + i += *buf - '0'; + len--; + } + + if (c == 'M') { + if (i > 59) + return (NULL); + tm->tm_min = i; + } else { + if (i > 60) + return (NULL); + tm->tm_sec = i; + } + + break; + + case 'H': + case 'I': + case 'k': + case 'l': + /* + * Of these, %l is the only specifier explicitly + * documented as not being zero-padded. However, + * there is no harm in allowing zero-padding. + * + * XXX The %l specifier may gobble one too many + * digits if used incorrectly. + */ + if (!isdigit_l((unsigned char)*buf, locale)) + return (NULL); + + len = 2; + for (i = 0; len && *buf != 0 && + isdigit_l((unsigned char)*buf, locale); buf++) { + i *= 10; + i += *buf - '0'; + len--; + } + if (c == 'H' || c == 'k') { + if (i > 23) + return (NULL); + } else if (i > 12) + return (NULL); + + tm->tm_hour = i; + + break; + + case 'p': + /* + * XXX This is bogus if parsed before hour-related + * specifiers. + */ + len = strlen(tptr->am); + if (strncasecmp_l(buf, tptr->am, len, locale) == 0) { + if (tm->tm_hour > 12) + return (NULL); + if (tm->tm_hour == 12) + tm->tm_hour = 0; + buf += len; + break; + } + + len = strlen(tptr->pm); + if (strncasecmp_l(buf, tptr->pm, len, locale) == 0) { + if (tm->tm_hour > 12) + return (NULL); + if (tm->tm_hour != 12) + tm->tm_hour += 12; + buf += len; + break; + } + + return (NULL); + + case 'A': + case 'a': + for (i = 0; i < asizeof(tptr->weekday); i++) { + len = strlen(tptr->weekday[i]); + if (strncasecmp_l(buf, tptr->weekday[i], + len, locale) == 0) + break; + len = strlen(tptr->wday[i]); + if (strncasecmp_l(buf, tptr->wday[i], + len, locale) == 0) + break; + } + if (i == asizeof(tptr->weekday)) + return (NULL); + + buf += len; + tm->tm_wday = i; + flags |= FLAG_WDAY; + break; + + case 'U': + case 'W': + /* + * XXX This is bogus, as we can not assume any valid + * information present in the tm structure at this + * point to calculate a real value, so just check the + * range for now. + */ + if (!isdigit_l((unsigned char)*buf, locale)) + return (NULL); + + len = 2; + for (i = 0; len && *buf != 0 && + isdigit_l((unsigned char)*buf, locale); buf++) { + i *= 10; + i += *buf - '0'; + len--; + } + if (i > 53) + return (NULL); + + if (c == 'U') + day_offset = TM_SUNDAY; + else + day_offset = TM_MONDAY; + + + week_offset = i; + + break; + + case 'w': + if (!isdigit_l((unsigned char)*buf, locale)) + return (NULL); + + i = *buf - '0'; + if (i > 6) + return (NULL); + + tm->tm_wday = i; + flags |= FLAG_WDAY; + + break; + + case 'e': + /* + * With %e format, our strftime(3) adds a blank space + * before single digits. + */ + if (*buf != 0 && + isspace_l((unsigned char)*buf, locale)) + buf++; + /* FALLTHROUGH */ + case 'd': + /* + * The %e specifier was once explicitly documented as + * not being zero-padded but was later changed to + * equivalent to %d. There is no harm in allowing + * such padding. + * + * XXX The %e specifier may gobble one too many + * digits if used incorrectly. + */ + if (!isdigit_l((unsigned char)*buf, locale)) + return (NULL); + + len = 2; + for (i = 0; len && *buf != 0 && + isdigit_l((unsigned char)*buf, locale); buf++) { + i *= 10; + i += *buf - '0'; + len--; + } + if (i > 31) + return (NULL); + + tm->tm_mday = i; + flags |= FLAG_MDAY; + + break; + + case 'B': + case 'b': + case 'h': + for (i = 0; i < asizeof(tptr->month); i++) { + if (Oalternative) { + if (c == 'B') { + len = strlen(tptr->alt_month[i]); + if (strncasecmp_l(buf, + tptr->alt_month[i], + len, locale) == 0) + break; + } + } else { + len = strlen(tptr->month[i]); + if (strncasecmp_l(buf, tptr->month[i], + len, locale) == 0) + break; + } + } + /* + * Try the abbreviated month name if the full name + * wasn't found and Oalternative was not requested. + */ + if (i == asizeof(tptr->month) && !Oalternative) { + for (i = 0; i < asizeof(tptr->month); i++) { + len = strlen(tptr->mon[i]); + if (strncasecmp_l(buf, tptr->mon[i], + len, locale) == 0) + break; + } + } + if (i == asizeof(tptr->month)) + return (NULL); + + tm->tm_mon = i; + buf += len; + flags |= FLAG_MONTH; + + break; + + case 'm': + if (!isdigit_l((unsigned char)*buf, locale)) + return (NULL); + + len = 2; + for (i = 0; len && *buf != 0 && + isdigit_l((unsigned char)*buf, locale); buf++) { + i *= 10; + i += *buf - '0'; + len--; + } + if (i < 1 || i > 12) + return (NULL); + + tm->tm_mon = i - 1; + flags |= FLAG_MONTH; + + break; + + case 's': + { + char *cp; + int sverrno; + long n; + time_t t; + + sverrno = errno; + errno = 0; + n = strtol_l(buf, &cp, 10, locale); + if (errno == ERANGE || (long)(t = n) != n) { + errno = sverrno; + return (NULL); + } + errno = sverrno; + buf = cp; + if (gmtime_r(&t, tm) == NULL) + return (NULL); + *GMTp = 1; + flags |= FLAG_YDAY | FLAG_WDAY | FLAG_MONTH | + FLAG_MDAY | FLAG_YEAR; + } + break; + + case 'Y': + case 'y': + if (*buf == 0 || + isspace_l((unsigned char)*buf, locale)) + break; + + if (!isdigit_l((unsigned char)*buf, locale)) + return (NULL); + + len = (c == 'Y') ? 4 : 2; + for (i = 0; len && *buf != 0 && + isdigit_l((unsigned char)*buf, locale); buf++) { + i *= 10; + i += *buf - '0'; + len--; + } + if (c == 'Y') + i -= TM_YEAR_BASE; + if (c == 'y' && i < 69) + i += 100; + if (i < 0) + return (NULL); + + tm->tm_year = i; + flags |= FLAG_YEAR; + + break; + + case 'Z': + { + const char *cp; + char *zonestr; + + for (cp = buf; *cp && + isupper_l((unsigned char)*cp, locale); ++cp) { + /*empty*/} + if (cp - buf) { + zonestr = alloca(cp - buf + 1); + strncpy(zonestr, buf, cp - buf); + zonestr[cp - buf] = '\0'; + tzset(); + if (0 == strcmp(zonestr, "GMT") || + 0 == strcmp(zonestr, "UTC")) { + *GMTp = 1; + } else if (0 == strcmp(zonestr, tzname[0])) { + tm->tm_isdst = 0; + } else if (0 == strcmp(zonestr, tzname[1])) { + tm->tm_isdst = 1; + } else { + return (NULL); + } + buf += cp - buf; + } + } + break; + + case 'z': + { + int sign = 1; + + if (*buf != '+') { + if (*buf == '-') + sign = -1; + else + return (NULL); + } + + buf++; + i = 0; + for (len = 4; len > 0; len--) { + if (isdigit_l((unsigned char)*buf, locale)) { + i *= 10; + i += *buf - '0'; + buf++; + } else + return (NULL); + } + + tm->tm_hour -= sign * (i / 100); + tm->tm_min -= sign * (i % 100); + *GMTp = 1; + } + break; + + case 'n': + case 't': + while (isspace_l((unsigned char)*buf, locale)) + buf++; + break; + + default: + return (NULL); + } + } + + if (!(flags & FLAG_YDAY) && (flags & FLAG_YEAR)) { + if ((flags & (FLAG_MONTH | FLAG_MDAY)) == + (FLAG_MONTH | FLAG_MDAY)) { + tm->tm_yday = start_of_month[isleap(tm->tm_year + + TM_YEAR_BASE)][tm->tm_mon] + (tm->tm_mday - 1); + flags |= FLAG_YDAY; + } else if (day_offset != -1) { + /* Set the date to the first Sunday (or Monday) + * of the specified week of the year. + */ + if (!(flags & FLAG_WDAY)) { + tm->tm_wday = day_offset; + flags |= FLAG_WDAY; + } + tm->tm_yday = (7 - + first_wday_of(tm->tm_year + TM_YEAR_BASE) + + day_offset) % 7 + (week_offset - 1) * 7 + + tm->tm_wday - day_offset; + flags |= FLAG_YDAY; + } + } + + if ((flags & (FLAG_YEAR | FLAG_YDAY)) == (FLAG_YEAR | FLAG_YDAY)) { + if (!(flags & FLAG_MONTH)) { + i = 0; + while (tm->tm_yday >= + start_of_month[isleap(tm->tm_year + + TM_YEAR_BASE)][i]) + i++; + if (i > 12) { + i = 1; + tm->tm_yday -= + start_of_month[isleap(tm->tm_year + + TM_YEAR_BASE)][12]; + tm->tm_year++; + } + tm->tm_mon = i - 1; + flags |= FLAG_MONTH; + } + if (!(flags & FLAG_MDAY)) { + tm->tm_mday = tm->tm_yday - + start_of_month[isleap(tm->tm_year + TM_YEAR_BASE)] + [tm->tm_mon] + 1; + flags |= FLAG_MDAY; + } + if (!(flags & FLAG_WDAY)) { + i = 0; + wday_offset = first_wday_of(tm->tm_year); + while (i++ <= tm->tm_yday) { + if (wday_offset++ >= 6) + wday_offset = 0; + } + tm->tm_wday = wday_offset; + flags |= FLAG_WDAY; + } + } + + return ((char *)buf); +} + +char * +strptime_l(const char * __restrict buf, const char * __restrict fmt, + struct tm * __restrict tm, locale_t loc) +{ + char *ret; + int gmt; + FIX_LOCALE(loc); + + gmt = 0; + ret = _strptime(buf, fmt, tm, &gmt, loc); + if (ret && gmt) { + time_t t = timegm(tm); + + localtime_r(&t, tm); + } + + return (ret); +} + +char * +strptime(const char * __restrict buf, const char * __restrict fmt, + struct tm * __restrict tm) +{ + return strptime_l(buf, fmt, tm, __get_locale()); +} diff --git a/libbutl/tab-parser.cxx b/libbutl/tab-parser.cxx new file mode 100644 index 0000000..bf0a7dd --- /dev/null +++ b/libbutl/tab-parser.cxx @@ -0,0 +1,88 @@ +// file : libbutl/tab-parser.cxx -*- C++ -*- +// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#include + +#include +#include + +#include + +using namespace std; + +namespace butl +{ + using parsing = tab_parsing; + + // tab_parser + // + tab_fields tab_parser:: + next () + { + tab_fields r; + + // Read lines until a non-empty one or EOF is encountered. In the first + // case parse the line and bail out. + // + // Note that we check for character presence in the stream prior to the + // getline() call, to prevent it from setting the failbit. + // + while (!is_.eof () && is_.peek () != istream::traits_type::eof ()) + { + string s; + getline (is_, s); + + ++line_; + + // Skip empty line. + // + auto i (s.begin ()); + auto e (s.end ()); + for (; i != e && (*i == ' ' || *i == '\t'); ++i) ; // Skip spaces. + + if (i == e || *i == '#') + continue; + + r.line = line_; + r.end_column = s.size () + 1; // Newline position. + + vector> sp; + + try + { + sp = string_parser::parse_quoted_position (s, false); + } + catch (const invalid_string& e) + { + throw parsing (name_, line_, e.position + 1, e.what ()); + } + + for (auto& s: sp) + r.emplace_back (tab_field ({move (s.first), s.second + 1})); + + break; + } + + return r; + } + + // tab_parsing + // + static string + format (const string& n, uint64_t l, uint64_t c, const string& d) + { + ostringstream os; + if (!n.empty ()) + os << n << ':'; + os << l << ':' << c << ": error: " << d; + return os.str (); + } + + tab_parsing:: + tab_parsing (const string& n, uint64_t l, uint64_t c, const string& d) + : runtime_error (format (n, l, c, d)), + name (n), line (l), column (c), description (d) + { + } +} diff --git a/libbutl/tab-parser.hxx b/libbutl/tab-parser.hxx new file mode 100644 index 0000000..311c54b --- /dev/null +++ b/libbutl/tab-parser.hxx @@ -0,0 +1,72 @@ +// file : libbutl/tab-parser.hxx -*- C++ -*- +// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#ifndef LIBBUTL_TAB_PARSER_HXX +#define LIBBUTL_TAB_PARSER_HXX + +#include +#include +#include +#include // uint64_t +#include // runtime_error + +#include + +namespace butl +{ + class LIBBUTL_EXPORT tab_parsing: public std::runtime_error + { + public: + tab_parsing (const std::string& name, + std::uint64_t line, + std::uint64_t column, + const std::string& description); + + std::string name; + std::uint64_t line; + std::uint64_t column; + std::string description; + }; + + // Line and columns are useful for issuing diagnostics about invalid or + // missing fields. + // + struct tab_field + { + std::string value; // Field string (quoting preserved). + std::uint64_t column; // Field start column number (one-based). + }; + + struct tab_fields: std::vector + { + std::uint64_t line; // Line number (one-based). + std::uint64_t end_column; // End-of-line column (line length). + }; + + // Read and parse lines consisting of space-separated fields. Field can + // contain single or double quoted substrings (with spaces) which are + // interpreted but preserved. No escaping of the quote characters is + // supported. Blank lines and lines that start with # (collectively called + // empty lines) are ignored. + // + class LIBBUTL_EXPORT tab_parser + { + public: + tab_parser (std::istream& is, const std::string& name) + : is_ (is), name_ (name) {} + + // Return next line of fields. Skip empty lines. Empty result denotes the + // end of stream. + // + tab_fields + next (); + + private: + std::istream& is_; + const std::string name_; + std::uint64_t line_ = 0; + }; +} + +#endif // LIBBUTL_TAB_PARSER_HXX diff --git a/libbutl/target-triplet.cxx b/libbutl/target-triplet.cxx new file mode 100644 index 0000000..fc805a4 --- /dev/null +++ b/libbutl/target-triplet.cxx @@ -0,0 +1,145 @@ +// file : libbutl/target-triplet.cxx -*- C++ -*- +// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#include + +#include // invalid_argument + +using namespace std; + +namespace butl +{ + target_triplet:: + target_triplet (const std::string& s) + { + using std::string; + + auto bad = [](const char* m) {throw invalid_argument (m);}; + + // Find the first and the last components. The first is CPU and the last is + // (part of) SYSTEM, that we know for sure. + // + string::size_type f (s.find ('-')), l (s.rfind ('-')); + + if (f == 0 || f == string::npos) + bad ("missing cpu"); + + cpu.assign (s, 0, f); + + // If we have something in between, then the first component after CPU is + // VENDOR. Unless it is a first component of two-component system, as in + // i686-linux-gnu. + // + if (f != l) + { + // [f, p) is VENDOR. + // + string::size_type p (s.find ('-', ++f)), n (p - f); + + if (n == 0) + bad ("empty vendor"); + + // Do we have all four components? If so, then we don't need to do any + // special recognition of two-component systems. + // + if (l != p) + { + l = s.rfind ('-', --l); + + if (l != p) + bad ("too many components"); + + // Handle the none-* case here. + // + if (s.compare (l + 1, 5, "none-") == 0) + l += 5; + } + else + { + // See if this is one of the well-known non-vendors. + // + if (s.compare (f, n, "linux") == 0 || + s.compare (f, n, "kfreebsd") == 0) + { + l = f - 1; + n = 0; // No VENDOR. + } + } + + // Handle special VENDOR values. + // + if (n != 0) + { + if (s.compare (f, n, "pc") != 0 && + s.compare (f, n, "none") != 0 && + s.compare (f, n, "unknown") != 0) + vendor.assign (s, f, n); + } + } + + // (l, npos) is SYSTEM + // + system.assign (s, ++l, string::npos); + + if (system.empty ()) + bad ("missing os/kernel/abi"); + + if (system.front () == '-' || system.back () == '-') + bad ("invalid os/kernel/abi"); + + // Extract VERSION for some recognized systems. + // + string::size_type v (0); + if (system.compare (0, (v = 6), "darwin") == 0 || + system.compare (0, (v = 7), "freebsd") == 0 || + system.compare (0, (v = 7), "openbsd") == 0 || + system.compare (0, (v = 6), "netbsd") == 0 || + system.compare (0, (v = 7), "solaris") == 0 || + system.compare (0, (v = 3), "aix") == 0 || + system.compare (0, (v = 4), "hpux") == 0 || + system.compare (0, (v = 10), "win32-msvc") == 0) + { + version.assign (system, v, string::npos); + system.resize (system.size () - version.size ()); + } + + // Determine class for some recognized systems. + // + if (system.compare (0, 5, "linux") == 0) + class_ = "linux"; + else if (vendor == "apple" && system == "darwin") + class_ = "macos"; + else if (system == "freebsd" || system == "openbsd" || system == "netbsd") + class_ = "bsd"; + else if (system.compare (0, 5, "win32") == 0 || system == "mingw32") + class_ = "windows"; + else + class_ = "other"; + } + + std::string target_triplet:: + string () const + { + std::string r (cpu); + + if (!vendor.empty ()) + { + if (!r.empty ()) r += '-'; + r += vendor; + } + + if (!system.empty ()) + { + if (!r.empty ()) r += '-'; + r += system; + } + + if (!version.empty ()) + { + r += version; + } + + return r; + } +} diff --git a/libbutl/target-triplet.hxx b/libbutl/target-triplet.hxx new file mode 100644 index 0000000..214a781 --- /dev/null +++ b/libbutl/target-triplet.hxx @@ -0,0 +1,155 @@ +// file : libbutl/target-triplet.hxx -*- C++ -*- +// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#ifndef LIBBUTL_TARGET_TRIPLET_HXX +#define LIBBUTL_TARGET_TRIPLET_HXX + +#include +#include + +#include + +namespace butl +{ + // This is the ubiquitous 'target triplet' that loosely has the CPU-VENDOR-OS + // form which, these days, quite often takes the CPU-VENDOR-OS-ABI form. Plus + // some fields can sometimes be omitted. This looseness makes it hard to base + // any kind of decisions on the triplet without canonicalizing it and then + // splitting it into components. The way we are going to split it is like + // this: + // + // CPU + // + // This one is reasonably straightforward. Note that we always expect at + // least two components with the first being the CPU. In other words, we + // don't try to guess what just 'mingw32' might mean like config.sub does. + // + // VENDOR + // + // This can be a machine vendor as in i686-apple-darwin8, a toolchain vendor + // as in i686-lfs-linux-gnu, or something else as in arm-softfloat-linux-gnu. + // Just as we think vendor is pretty irrelevant and can be ignored, comes + // MinGW-W64 and calls itself *-w64-mingw32. While it is tempting to + // attribute w64 to OS-ABI, the MinGW-W64 folks insist it is a (presumably + // toolchain) vendor. + // + // Another example where the vendor seems to be reused for something else + // entirely is the Intel's MIC architecture: x86_64-k1om-linux. + // + // To make things more regular we also convert the information-free vendor + // names 'pc', 'unknown' and 'none' to the empty name. + // + // OS/KERNEL-OS/OS-ABI + // + // This is where things get really messy and instead of trying to guess, we + // call the entire thing SYSTEM. Except, in certain cases, we factor out the + // trailing version, again, to make SYSTEM easier to compare to. For example, + // *-darwin14.5.0 becomes 'darwin' and '14.5.0'. + // + // Again, to make things more regular, if the first component in SYSTEM is + // none, then it is removed (so *-none-eabi becomes just 'eabi'). + // + // Values for two-component systems (e.g., linux-gnu) that don't specify + // VENDOR explicitly are inherently ambiguous: is 'linux' VENDOR or part of + // SYSTEM? The only way to handle this is to recognize their specific names + // as special cases and this is what we do for some of the more common + // ones. The alternative would be to first run such names through config.sub + // which adds explicit VENDOR and this could be a reasonable fallback + // strategy for (presumably less common) cases were we don't split things + // correctly. + // + // Note also that the version splitting is only done for certain commonly- + // used targets. + // + // Some examples of canonicalization and splitting: + // + // x86_64-apple-darwin14.5.0 x86_64 apple darwin 14.5.0 + // x86_64-unknown-freebsd10.2 x86_64 freebsd 10.2 + // i686-elf i686 elf + // arm-eabi arm eabi + // arm-none-eabi arm eabi + // arm-none-linux-gnueabi arm linux-gnueabi + // arm-softfloat-linux-gnu arm softfloat linux-gnu + // i686-pc-mingw32 i686 mingw32 + // i686-w64-mingw32 i686 w64 mingw32 + // i686-lfs-linux-gnu i686 lfs linux-gnu + // x86_64-unknown-linux-gnu x86_64 linux-gnu + // x86_64-linux-gnux32 x86_64 linux-gnux32 + // x86_64-microsoft-win32-msvc14.0 x86_64 microsoft win32-msvc 14.0 + // + // Similar to version splitting, for certain commonly-used targets we also + // derive the "target class" which can be used as a shorthand, more + // convenient way to identify a targets. If the target is not recognized, + // then the special 'other' value is used. Currently the following classes + // are recognized: + // + // linux *-*-linux-* + // macos *-apple-darwin* + // bsd *-*-(freebsd|openbsd|netbsd)* + // windows *-*-win32-* | *-*-mingw32 + // + // References: + // + // 1. The libtool repository contains the PLATFORM file that lists many known + // triplets. + // + // 2. LLVM has the Triple class with similar goals. + // + struct LIBBUTL_EXPORT target_triplet + { + std::string cpu; + std::string vendor; + std::string system; + std::string version; + std::string class_; + + // Assemble and returning the canonical (i.e., the one we round-trip) + // target triplet string. + // + std::string + string () const; + + bool + empty () const {return cpu.empty ();} + + int + compare (const target_triplet& y) const + { + int r; + return + (r = cpu.compare (y.cpu)) != 0 ? r : + (r = vendor.compare (y.vendor)) != 0 ? r : + (r = system.compare (y.system)) != 0 ? r : + ( version.compare (y.version)); + } + + // Parse the triplet throw std::invalid_argument if the triplet is not + // recognizable. + // + explicit + target_triplet (const std::string&); + + target_triplet () = default; + }; + + inline bool + operator== (const target_triplet& x, const target_triplet& y) + { + return x.compare (y) == 0; + } + + inline bool + operator!= (const target_triplet& x, const target_triplet& y) + { + return !(x == y); + } + + inline std::ostream& + operator<< (std::ostream& o, const target_triplet& x) + { + return o << x.string (); + } +} + +#endif // LIBBUTL_TARGET_TRIPLET_HXX diff --git a/libbutl/timelocal.c b/libbutl/timelocal.c new file mode 100644 index 0000000..7194341 --- /dev/null +++ b/libbutl/timelocal.c @@ -0,0 +1,157 @@ +/*- + * Copyright (c) 2001 Alexey Zelkin + * Copyright (c) 1997 FreeBSD Inc. + * All rights reserved. + * + * Copyright (c) 2011 The FreeBSD Foundation + * All rights reserved. + * Portions of this software were developed by David Chisnall + * under sponsorship from the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* +#include +__FBSDID("$FreeBSD$"); + +#include + +#include "ldpart.h" +#include "timelocal.h" + +struct xlocale_time { + struct xlocale_component header; + char *buffer; + struct lc_time_T locale; +}; + +struct xlocale_time __xlocale_global_time; + +#define LCTIME_SIZE (sizeof(struct lc_time_T) / sizeof(char *)) + +*/ +static const struct lc_time_T _C_time_locale = { + { + "Jan", "Feb", "Mar", "Apr", "May", "Jun", + "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" + }, { + "January", "February", "March", "April", "May", "June", + "July", "August", "September", "October", "November", "December" + }, { + "Sun", "Mon", "Tue", "Wed", + "Thu", "Fri", "Sat" + }, { + "Sunday", "Monday", "Tuesday", "Wednesday", + "Thursday", "Friday", "Saturday" + }, + + /* X_fmt */ + "%H:%M:%S", + + /* + * x_fmt + * Since the C language standard calls for + * "date, using locale's date format," anything goes. + * Using just numbers (as here) makes Quakers happier; + * it's also compatible with SVR4. + */ + "%m/%d/%y", + + /* + * c_fmt + */ + "%a %b %e %H:%M:%S %Y", + + /* am */ + "AM", + + /* pm */ + "PM", + + /* date_fmt */ + "%a %b %e %H:%M:%S %Z %Y", + + /* alt_month + * Standalone months forms for %OB + */ + { + "January", "February", "March", "April", "May", "June", + "July", "August", "September", "October", "November", "December" + }, + + /* md_order + * Month / day order in dates + */ + "md", + + /* ampm_fmt + * To determine 12-hour clock format time (empty, if N/A) + */ + "%I:%M:%S %p" +}; + +/* +static void destruct_time(void *v) +{ + struct xlocale_time *l = v; + if (l->buffer) + free(l->buffer); + free(l); +} + +#include +struct lc_time_T * +__get_current_time_locale(locale_t loc) +{ + return (loc->using_time_locale + ? &((struct xlocale_time *)loc->components[XLC_TIME])->locale + : (struct lc_time_T *)&_C_time_locale); +} + +static int +time_load_locale(struct xlocale_time *l, int *using_locale, const char *name) +{ + struct lc_time_T *time_locale = &l->locale; + return (__part_load_locale(name, using_locale, + &l->buffer, "LC_TIME", + LCTIME_SIZE, LCTIME_SIZE, + (const char **)time_locale)); +} +int +__time_load_locale(const char *name) +{ + return time_load_locale(&__xlocale_global_time, + &__xlocale_global_locale.using_time_locale, name); +} +void* __time_load(const char* name, locale_t loc) +{ + struct xlocale_time *new = calloc(sizeof(struct xlocale_time), 1); + new->header.header.destructor = destruct_time; + if (time_load_locale(new, &loc->using_time_locale, name) == _LDP_ERROR) + { + xlocale_release(new); + return NULL; + } + return new; +} +*/ diff --git a/libbutl/timelocal.c.orig b/libbutl/timelocal.c.orig new file mode 100644 index 0000000..48c3509 --- /dev/null +++ b/libbutl/timelocal.c.orig @@ -0,0 +1,153 @@ +/*- + * Copyright (c) 2001 Alexey Zelkin + * Copyright (c) 1997 FreeBSD Inc. + * All rights reserved. + * + * Copyright (c) 2011 The FreeBSD Foundation + * All rights reserved. + * Portions of this software were developed by David Chisnall + * under sponsorship from the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include + +#include "ldpart.h" +#include "timelocal.h" + +struct xlocale_time { + struct xlocale_component header; + char *buffer; + struct lc_time_T locale; +}; + +struct xlocale_time __xlocale_global_time; + +#define LCTIME_SIZE (sizeof(struct lc_time_T) / sizeof(char *)) + +static const struct lc_time_T _C_time_locale = { + { + "Jan", "Feb", "Mar", "Apr", "May", "Jun", + "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" + }, { + "January", "February", "March", "April", "May", "June", + "July", "August", "September", "October", "November", "December" + }, { + "Sun", "Mon", "Tue", "Wed", + "Thu", "Fri", "Sat" + }, { + "Sunday", "Monday", "Tuesday", "Wednesday", + "Thursday", "Friday", "Saturday" + }, + + /* X_fmt */ + "%H:%M:%S", + + /* + * x_fmt + * Since the C language standard calls for + * "date, using locale's date format," anything goes. + * Using just numbers (as here) makes Quakers happier; + * it's also compatible with SVR4. + */ + "%m/%d/%y", + + /* + * c_fmt + */ + "%a %b %e %H:%M:%S %Y", + + /* am */ + "AM", + + /* pm */ + "PM", + + /* date_fmt */ + "%a %b %e %H:%M:%S %Z %Y", + + /* alt_month + * Standalone months forms for %OB + */ + { + "January", "February", "March", "April", "May", "June", + "July", "August", "September", "October", "November", "December" + }, + + /* md_order + * Month / day order in dates + */ + "md", + + /* ampm_fmt + * To determine 12-hour clock format time (empty, if N/A) + */ + "%I:%M:%S %p" +}; + +static void destruct_time(void *v) +{ + struct xlocale_time *l = v; + if (l->buffer) + free(l->buffer); + free(l); +} + +#include +struct lc_time_T * +__get_current_time_locale(locale_t loc) +{ + return (loc->using_time_locale + ? &((struct xlocale_time *)loc->components[XLC_TIME])->locale + : (struct lc_time_T *)&_C_time_locale); +} + +static int +time_load_locale(struct xlocale_time *l, int *using_locale, const char *name) +{ + struct lc_time_T *time_locale = &l->locale; + return (__part_load_locale(name, using_locale, + &l->buffer, "LC_TIME", + LCTIME_SIZE, LCTIME_SIZE, + (const char **)time_locale)); +} +int +__time_load_locale(const char *name) +{ + return time_load_locale(&__xlocale_global_time, + &__xlocale_global_locale.using_time_locale, name); +} +void* __time_load(const char* name, locale_t loc) +{ + struct xlocale_time *new = calloc(sizeof(struct xlocale_time), 1); + new->header.header.destructor = destruct_time; + if (time_load_locale(new, &loc->using_time_locale, name) == _LDP_ERROR) + { + xlocale_release(new); + return NULL; + } + return new; +} diff --git a/libbutl/timelocal.h b/libbutl/timelocal.h new file mode 100644 index 0000000..d9b77b9 --- /dev/null +++ b/libbutl/timelocal.h @@ -0,0 +1,65 @@ +/*- + * Copyright (c) 1997-2002 FreeBSD Project. + * All rights reserved. + * + * Copyright (c) 2011 The FreeBSD Foundation + * All rights reserved. + * Portions of this software were developed by David Chisnall + * under sponsorship from the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _TIMELOCAL_H_ +#define _TIMELOCAL_H_ +/* +#include "xlocale_private.h" +*/ + +/* + * Private header file for the strftime and strptime localization + * stuff. + */ +struct lc_time_T { + const char *mon[12]; + const char *month[12]; + const char *wday[7]; + const char *weekday[7]; + const char *X_fmt; + const char *x_fmt; + const char *c_fmt; + const char *am; + const char *pm; + const char *date_fmt; + const char *alt_month[12]; + const char *md_order; + const char *ampm_fmt; +}; + +/* +struct lc_time_T *__get_current_time_locale(locale_t); +int __time_load_locale(const char *); + +*/ +#endif /* !_TIMELOCAL_H_ */ diff --git a/libbutl/timelocal.h.orig b/libbutl/timelocal.h.orig new file mode 100644 index 0000000..2e44415 --- /dev/null +++ b/libbutl/timelocal.h.orig @@ -0,0 +1,61 @@ +/*- + * Copyright (c) 1997-2002 FreeBSD Project. + * All rights reserved. + * + * Copyright (c) 2011 The FreeBSD Foundation + * All rights reserved. + * Portions of this software were developed by David Chisnall + * under sponsorship from the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _TIMELOCAL_H_ +#define _TIMELOCAL_H_ +#include "xlocale_private.h" + +/* + * Private header file for the strftime and strptime localization + * stuff. + */ +struct lc_time_T { + const char *mon[12]; + const char *month[12]; + const char *wday[7]; + const char *weekday[7]; + const char *X_fmt; + const char *x_fmt; + const char *c_fmt; + const char *am; + const char *pm; + const char *date_fmt; + const char *alt_month[12]; + const char *md_order; + const char *ampm_fmt; +}; + +struct lc_time_T *__get_current_time_locale(locale_t); +int __time_load_locale(const char *); + +#endif /* !_TIMELOCAL_H_ */ diff --git a/libbutl/timestamp.cxx b/libbutl/timestamp.cxx new file mode 100644 index 0000000..1f12f41 --- /dev/null +++ b/libbutl/timestamp.cxx @@ -0,0 +1,612 @@ +// file : libbutl/timestamp.cxx -*- C++ -*- +// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#include + +#include // localtime_{r,s}(), gmtime_{r,s}(), strptime(), timegm() +#include // EINVAL + +#include // tm, time_t, mktime() +#include // strtoull() +#include +#include // put_time(), setw(), dec, right +#include // strlen(), memcpy() +#include +#include // pair, make_pair() +#include // runtime_error + +#include // throw_generic_error() + +using namespace std; + +// libstdc++ prior to GCC 5 does not have std::put_time() so we have to invent +// our own. Detecting the "prior to GCC 5" condition, however, is not easy: +// libstdc++ is used by other compilers (e.g., Clang) so we cannot just use +// __GNUC__. There is __GLIBCXX__ but it is a date which is updated with +// every release, including bugfixes (so, there can be some 4.7.X release with +// a date greater than 5.0.0). +// +// So what we going to do here is "offer" our implementation and let the ADL +// pick one. If there is std::put_time(), then it will be preferred because +// of the std::tm argument. +// +#ifdef __GLIBCXX__ + +#include // tm, strftime() +#include + +namespace details +{ + struct put_time_data + { + const std::tm* tm; + const char* fmt; + }; + + inline put_time_data + put_time (const std::tm* tm, const char* fmt) + { + return put_time_data {tm, fmt}; + } + + inline ostream& + operator<< (ostream& os, const put_time_data& d) + { + char buf[256]; + if (strftime (buf, sizeof (buf), d.fmt, d.tm) != 0) + os << buf; + else + os.setstate (ostream::badbit); + return os; + } +} + +using namespace details; + +#endif + +// Thread-safe implementations of gmtime() and localtime(). +// +// Normally we would provide POSIX function replacement for Windows if the +// original function is absent. However, MinGW GCC can sometimes provide them. +// And so to avoid name clashes we hide them in the details namespace. +// +// Previously we have used gmtime_s() and localtime_s() for gmtime() and +// localtime() implementations for Windows, but that required Security-Enhanced +// version of CRT to be present, which is not always the case. In particular if +// MinGW is configured with --disable-secure-api option then declarations of +// *_s() functions are not available. So we use ::gmtime() and ::localtime() +// for that purpose. Note that according to MSDN "gmtime and localtime all use +// one common tm structure per thread for the conversion", which mean that they +// are thread-safe. +// +namespace details +{ + static tm* + gmtime (const time_t* t, tm* r) + { +#ifdef _WIN32 + const tm* gt (::gmtime (t)); + if (gt == nullptr) + return nullptr; + + *r = *gt; + return r; +#else + return gmtime_r (t, r); +#endif + } + + static tm* + localtime (const time_t* t, tm* r) + { +#ifdef _WIN32 + const tm* lt (::localtime (t)); + if (lt == nullptr) + return nullptr; + + *r = *lt; + return r; +#else + return localtime_r (t, r); +#endif + } +} + +namespace butl +{ + ostream& + to_stream (ostream& os, + const timestamp& ts, + const char* format, + bool special, + bool local) + { + if (special) + { + if (ts == timestamp_unknown) + return os << ""; + + if (ts == timestamp_nonexistent) + return os << ""; + } + + time_t t (system_clock::to_time_t (ts)); + + std::tm tm; + if ((local + ? details::localtime (&t, &tm) + : details::gmtime (&t, &tm)) == nullptr) + throw_generic_error (errno); + + using namespace chrono; + + timestamp sec (system_clock::from_time_t (t)); + nanoseconds ns (duration_cast (ts - sec)); + + char fmt[256]; + size_t n (strlen (format)); + if (n + 1 > sizeof (fmt)) + throw_generic_error (EINVAL); + memcpy (fmt, format, n + 1); + + // Chunk the format string into fragments that we feed to put_time() and + // those that we handle ourselves. Watch out for the escapes (%%). + // + size_t i (0), j (0); // put_time()'s range. + for (; j != n; ++j) + { + if (fmt[j] == '%' && j + 1 != n) + { + if (fmt[j + 1] == '[') + { + if (os.width () != 0) + throw runtime_error ( + "padding is not supported when printing nanoseconds"); + + // Our fragment. First see if we need to call put_time(). + // + if (i != j) + { + fmt[j] = '\0'; + if (!(os << put_time (&tm, fmt + i))) + return os; + } + + j += 2; // Character after '['. + if (j == n) + throw_generic_error (EINVAL); + + char d ('\0'); + if (fmt[j] != 'N') + { + d = fmt[j]; + if (++j == n || fmt[j] != 'N') + throw_generic_error (EINVAL); + } + + if (++j == n || fmt[j] != ']') + throw_generic_error (EINVAL); + + if (ns != nanoseconds::zero ()) + { + if (d != '\0') + os << d; + + ostream::fmtflags fl (os.flags ()); + char fc (os.fill ('0')); + os << dec << right << setw (9) << ns.count (); + os.fill (fc); + os.flags (fl); + } + + i = j + 1; // j is incremented in the for-loop header. + } + else + ++j; // Skip % and the next character to handle %%. + } + } + + // Do we need to call put_time() one last time? + // + if (i != j) + { + if (!(os << put_time (&tm, fmt + i))) + return os; + } + + return os; + } + + ostream& + operator<< (ostream& os, const duration& d) + { + if (os.width () != 0) // We always print nanosecond. + throw runtime_error ( + "padding is not supported when printing nanoseconds"); + + timestamp ts; // Epoch. + ts += d; + + time_t t (system_clock::to_time_t (ts)); + + const char* fmt (nullptr); + const char* unt ("nanoseconds"); + if (t >= 365 * 24 * 60 * 60) + { + fmt = "%Y-%m-%d %H:%M:%S"; + unt = "years"; + } + else if (t >= 31 * 24 * 60 * 60) + { + fmt = "%m-%d %H:%M:%S"; + unt = "months"; + } + else if (t >= 24 * 60 * 60) + { + fmt = "%d %H:%M:%S"; + unt = "days"; + } + else if (t >= 60 * 60) + { + fmt = "%H:%M:%S"; + unt = "hours"; + } + else if (t >= 60) + { + fmt = "%M:%S"; + unt = "minutes"; + } + else if (t >= 1) + { + fmt = "%S"; + unt = "seconds"; + } + + if (fmt != nullptr) + { + std::tm tm; + if (details::gmtime (&t, &tm) == nullptr) + throw_generic_error (errno); + + if (t >= 24 * 60 * 60) + tm.tm_mday -= 1; // Make day of the month to be a zero-based number. + + if (t >= 31 * 24 * 60 * 60) + tm.tm_mon -= 1; // Make month of the year to be a zero-based number. + + if (t >= 365 * 24 * 60 * 60) + // Make the year to be a 1970-based number. Negative values allowed + // according to the POSIX specification. + // + tm.tm_year -= 1970; + + if (!(os << put_time (&tm, fmt))) + return os; + } + + using namespace chrono; + + timestamp sec (system_clock::from_time_t (t)); + nanoseconds ns (duration_cast (ts - sec)); + + if (ns != nanoseconds::zero ()) + { + if (fmt != nullptr) + { + ostream::fmtflags fl (os.flags ()); + char fc (os.fill ('0')); + os << '.' << dec << right << setw (9) << ns.count (); + os.fill (fc); + os.flags (fl); + } + else + os << ns.count (); + + os << ' ' << unt; + } + else if (fmt == nullptr) + os << '0'; + + return os; + } +} + +// Implementation of strptime() and timegm() for Windows. +// +// Here we have several cases. If this is VC++, then we implement strptime() +// via C++11 std::get_time(). And if this is MINGW GCC (or, more precisely, +// libstdc++), then we have several problems. Firstly, GCC prior to 5 doesn't +// implement std::get_time(). Secondly, GCC 5 and even 6 have buggy +// std::get_time() (it cannot parse single-digit days). So what we are going +// to do in this case is use a FreeBSD-based strptime() implementation. +// +#ifdef _WIN32 + +#ifdef __GLIBCXX__ + +// Fallback to a FreeBSD-based implementation. +// +extern "C" +{ +#include "strptime.c" +} + +#else // NOT __GLIBCXX__ + +#include // tm +#include +#include +#include +#include +#include // strlen() + +// VC++ std::get_time()-based implementation. +// +static char* +strptime (const char* input, const char* format, tm* time) +{ + istringstream is (input); + + // The original strptime() function behaves according to the process' C + // locale (set with std::setlocale()), which can differ from the process C++ + // locale (set with std::locale::global()). + // + is.imbue (locale (setlocale (LC_ALL, nullptr))); + + if (!(is >> get_time (time, format))) + return nullptr; + else + // tellg() behaves as UnformattedInputFunction, so returns failure status + // if eofbit is set. + // + return const_cast ( + input + (is.eof () + ? strlen (input) + : static_cast (is.tellg ()))); +} + +#endif // __GLIBCXX__ + +#include // time_t, tm, mktime() + +static time_t +timegm (tm* ctm) +{ + const time_t e (static_cast (-1)); + + // We will use an example to explain how it works. Say *ctm contains 9 AM of + // some day. Note that no time zone information is available. + // + // Convert it to the time from Epoch as if it's in the local time zone. + // + ctm->tm_isdst = -1; + time_t t (mktime (ctm)); + if (t == e) + return e; + + // Let's say we are in Moscow, and t contains the time passed from Epoch till + // 9 AM MSK. But that is not what we need. What we need is the time passed + // from Epoch till 9 AM GMT. This is some bigger number, as it takes longer + // to achieve the same calendar time for more Western location. So we need to + // find that offset, and increment t with it to obtain the desired value. The + // offset is effectively the time difference between MSK and GMT time zones. + // + tm gtm; + if (details::gmtime (&t, >m) == nullptr) + return e; + + // gmtime() being called for the timepoint t returns 6 AM. So now we have + // *ctm and gtm, which value difference (3 hours) reflects the desired + // offset. The only problem is that we can not deduct gtm from *ctm, to get + // the offset expressed as time_t. To do that we need to apply to both of + // them the same conversion function transforming std::tm to std::time_t. The + // mktime() can do that, so the expression (mktime(ctm) - mktime(>m)) + // calculates the desired offset. + // + // To ensure mktime() works exactly the same way for both cases, we need to + // reset Daylight Saving Time flag for each of *ctm and gtm. + // + ctm->tm_isdst = 0; + time_t lt (mktime (ctm)); + if (lt == e) + return e; + + gtm.tm_isdst = 0; + time_t gt (mktime (>m)); + if (gt == e) + return e; + + // C11 standard specifies time_t to be a real type (integer and real floating + // types are collectively called real types). So we can not consider it to be + // signed. + // + return lt > gt ? t + (lt - gt) : t - (gt - lt); +} + +#endif // _WIN32 + +namespace butl +{ + static pair + from_string (const char* input, const char* format, const char** end) + { + auto bad_val = [] () {throw_generic_error (EINVAL);}; + + // See if we have our specifier. + // + size_t i (0); + size_t n (strlen (format)); + for (; i != n; ++i) + { + if (format[i] == '%' && i + 1 != n) + { + if (format[i + 1] == '[') + break; + else + ++i; // To handle %%. + } + } + + // Call the fraction of a second as just fraction from now on. + // + using namespace chrono; + nanoseconds ns (nanoseconds::zero ()); + + if (i == n) + { + // No %[], so just parse with strptime(). + // + tm t = tm (); + const char* p (strptime (input, format, &t)); + if (p == nullptr) + bad_val (); + + if (end != nullptr) + *end = p; + else if (*p != '\0') + bad_val (); // Input is not fully read. + + t.tm_isdst = -1; + return make_pair (t, ns); + } + + // Now the overall plan is: + // + // 1. Parse the fraction part of the input string to obtain nanoseconds. + // + // 2. Remove fraction part from the input string. + // + // 3. Remove %[] from the format string. + // + // 4. Re-parse the modified input with the modified format to fill the + // std::tm structure. + // + // Parse the %[] specifier. + // + assert (format[i] == '%'); + string fm (format, i++); // Start assembling the new format string. + + assert (format[i] == '['); + if (++i == n) + bad_val (); + + char d (format[i]); // Delimiter character. + if (++i == n) + bad_val (); + + char f (format[i]); // Fraction specifier character. + if ((f != 'N' && f != 'U' && f != 'M') || ++i == n) + bad_val (); + + if (format[i++] != ']') + bad_val (); + + // Parse the input with the initial part of the format string, the one + // that preceeds the %[] specifier. The returned pointer will be the + // position we need to start from to parse the fraction. + // + tm t = tm (); + + // What if %[] is first, there is nothing before it? According to the + // strptime() documentation an empty format string is a valid one. + // + const char* p (strptime (input, fm.c_str (), &t)); + if (p == nullptr) + bad_val (); + + // Start assembling the new input string. + // + string in (input, p - input); + size_t fn (0); // Fraction size. + + if (d == *p) + { + // Fraction present in the input. + // + + // Read fraction digits. + // + char buf [10]; + size_t i (0); + size_t n (f == 'N' ? 9 : (f == 'U' ? 6 : 3)); + for (++p; i < n && *p >= '0' && *p <= '9'; ++i, ++p) + buf[i] = *p; + + if (i < n) + bad_val (); + + buf[n] = '\0'; + fn = n; + + // Calculate nanoseconds. + // + char* e (nullptr); + unsigned long long t (strtoull (buf, &e, 10)); + assert (e == buf + n); + + switch (f) + { + case 'N': ns = nanoseconds (t); break; + case 'U': ns = microseconds (t); break; + case 'M': ns = milliseconds (t); break; + default: assert (false); + } + + // Actually the idea to fully remove the fraction from the input string, + // and %[] from the format string, has a flaw. After the fraction removal + // the spaces around it will be "swallowed" with a single space in the + // format string. So, as an example, for the input: + // + // 2016-02-21 19:31:10 .384902285 GMT + // + // And the format: + // + // %Y-%m-%d %H:%M:%S %[.N] + // + // The unparsed tail of the input will be 'GMT' while expected to be + // ' GMT'. To fix that we will not remove, but replace the mentioned + // parts with some non-space character. + // + fm += '-'; + in += '-'; + } + + fm += format + i; + in += p; + + // Reparse the modified input with the modified format. + // + t = tm (); + const char* b (in.c_str ()); + p = strptime (b, fm.c_str (), &t); + + if (p == nullptr) + bad_val (); + + if (end != nullptr) + *end = input + (p - b + fn); + else if (*p != '\0') + bad_val (); // Input is not fully read. + + t.tm_isdst = -1; + return make_pair (t, ns); + } + + timestamp + from_string (const char* input, + const char* format, + bool local, + const char** end) + { + pair t (from_string (input, format, end)); + + time_t time (local ? mktime (&t.first) : timegm (&t.first)); + if (time == -1) + throw_generic_error (errno); + + return timestamp::clock::from_time_t (time) + + chrono::duration_cast (t.second); + } +} diff --git a/libbutl/timestamp.hxx b/libbutl/timestamp.hxx new file mode 100644 index 0000000..1a960b7 --- /dev/null +++ b/libbutl/timestamp.hxx @@ -0,0 +1,146 @@ +// file : libbutl/timestamp.hxx -*- C++ -*- +// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#ifndef LIBBUTL_TIMESTAMP_HXX +#define LIBBUTL_TIMESTAMP_HXX + +#include +#include + +#include + +#include + +namespace butl +{ + // On all three main platforms that we target (GNU/Linux, Windows (both + // VC++ and GCC/MinGW64), and MacOS X) with recent C++ runtimes, + // system_clock has nanoseconds resolution and counts from the UNIX + // epoch. The latter is important since struct stat also returns times + // based on UNIX epoch. + // + // The underlying type for nanoseconds duration is signed integer type + // of at least 64 bits (currently int64_t, available as duration::rep). + // Because it is signed, we will overflow in year 2262 but by then the + // underlying type will most likely have changed to something larger + // than 64-bit. + // + // So to support other platforms that could possibly use a different + // system_clock resolutions (e.g., microseconds), we actually not going + // to assume anywhere (except perhaps timestamp.cxx) that we are dealing + // with nanoseconds or the 64-bit underlying type. + // + using std::chrono::system_clock; + + // Note that the default-initialized timestamp has the timestamp_nonexistent + // value. + // + using timestamp = system_clock::time_point; + using duration = system_clock::duration; + + // Generally-useful special values. Note that unknown is less than + // nonexistent and is less than any non-special value. + // + const timestamp::rep timestamp_unknown_rep = -1; + const timestamp timestamp_unknown = timestamp (duration (-1)); + const timestamp::rep timestamp_nonexistent_rep = 0; + const timestamp timestamp_nonexistent = timestamp (duration (0)); + + // Print human-readable representation of the timestamp. + // + // By default the timestamp is printed by localtime_r() in the local + // timezone, so tzset() from should be called prior to using the + // corresponding operator or the to_stream() function (normally from main() + // or equivalent). + // + // The format argument in the to_stream() function is the put_time() format + // string except that it also supports the nanoseconds conversion specifier + // in the form %[N] where is the optional single delimiter character, + // for example '.'. If the nanoseconds part is 0, then it is not printed + // (nor the delimiter character). Otherwise, if necessary, the nanoseconds + // part is padded to 9 characters with leading zeros. + // + // The special argument in the to_stream() function indicates whether the + // special timestamp_unknown and timestamp_nonexistent values should be + // printed as '' and '', respectively. + // + // The local argument in the to_stream() function indicates whether to use + // localtime_r() or gmtime_r(). + // + // Note also that these operators/function may throw std::system_error. + // + // Finally, padding is not fully supported by these operators/function. They + // throw runtime_error if nanoseconds conversion specifier is present and + // the stream's width field has been set to non-zero value before the call. + // + // Potential improvements: + // - add flag to to_stream() to use + // - support %[U] (microseconds) and %[M] (milliseconds). + // - make to_stream() a manipulator, similar to put_time() + // - support %(N) version for non-optional printing + // - support for suffix %[N], for example %[N nsec] + // + LIBBUTL_EXPORT std::ostream& + to_stream (std::ostream&, + const timestamp&, + const char* format, + bool special, + bool local); + + inline std::ostream& + operator<< (std::ostream& os, const timestamp& ts) + { + return to_stream (os, ts, "%Y-%m-%d %H:%M:%S%[.N]", true, true); + } + + LIBBUTL_EXPORT std::ostream& + operator<< (std::ostream&, const duration&); + + // Parse human-readable representation of the timestamp. + // + // The format argument is the strptime() format string except that it also + // supports the fraction of a second specifier in the form %[], where + // is the optional single delimiter character, for example '.', and + // is one of the 'N', 'U', 'M' characters, denoting nanoseconds, + // microseconds and milliseconds, respectively. + // + // The delimiter is mandatory. If no such character is encountered at + // the corresponding position of the input string, the function behaves as + // if no %[] specifier were provided. Only single %[] specifier in the + // format string is currently supported. + // + // If the delimiter is present, then it should be followed by 9 (N), 6 (U), + // or 3 (M) digit value padded with leading zeros if necessary. + // + // If the local argument is true, then the input is assume to be local time + // and the result is returned as local time as well. Otherwise, UCT is used + // in both cases. + // + // If the end argument is not NULL, then it points to the first character + // that was not parsed. Otherwise, throw invalid_argument in case of any + // unparsed characters. + // + // Throw std::system_error on input/format mismatch and underlying time + // conversion function failures. + // + // Note that internally from_string() calls strptime(), which behaves + // according to the process' C locale (set with std::setlocale()) and not + // the C++ locale (set with std::locale::global()). However the behaviour + // can be affected by std::locale::global() as well, as it itself calls + // std::setlocale() for the locale with a name. + // + // Potential improvements: + // - support %() version for non-optional component but with optional + // delimiter + // - ability to parse local, return UTC and vice-versa + // - handle timezone parsing + // + LIBBUTL_EXPORT timestamp + from_string (const char* input, + const char* format, + bool local, + const char** end = nullptr); +} + +#endif // LIBBUTL_TIMESTAMP_HXX diff --git a/libbutl/utility.cxx b/libbutl/utility.cxx new file mode 100644 index 0000000..8ce2cbc --- /dev/null +++ b/libbutl/utility.cxx @@ -0,0 +1,143 @@ +// file : libbutl/utility.cxx -*- C++ -*- +// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#include + +#ifdef _WIN32 +# include +#endif + +#include +#include +#include + +namespace butl +{ + using namespace std; + +#ifndef __cpp_lib_uncaught_exceptions + +#ifdef __cpp_thread_local + thread_local +#else + __thread +#endif + bool exception_unwinding_dtor_ = false; + +#ifdef _WIN32 + bool& + exception_unwinding_dtor () {return exception_unwinding_dtor_;} +#endif + +#endif + + [[noreturn]] void + throw_generic_error (int errno_code, const char* what) + { + if (what == nullptr) + throw system_error (errno_code, generic_category ()); + else + throw system_error (errno_code, generic_category (), what); + } + + [[noreturn]] void +#ifndef _WIN32 + throw_system_error (int system_code, int) + { + throw system_error (system_code, system_category ()); +#else + throw_system_error (int system_code, int fallback_errno_code) + { + // Here we work around MinGW libstdc++ that interprets Windows system error + // codes (for example those returned by GetLastError()) as errno codes. The + // resulting system_error description will have the following form: + // + // : + // + // Also note that the fallback-related description suffix is stripped by + // our custom operator<<(ostream, exception) for the common case (see + // below). + // + throw system_error (fallback_errno_code, + system_category (), + win32::error_msg (system_code)); +#endif + } +} + +namespace std +{ + using namespace butl; + + ostream& + operator<< (ostream& o, const exception& e) + { + const char* d (e.what ()); + const char* s (d); + + // Strip the leading junk (colons and spaces). + // + // Note that error descriptions for ios_base::failure exceptions thrown by + // fdstream can have the ': ' prefix for libstdc++ (read more in comment + // for throw_ios_failure()). + // + for (; *s == ' ' || *s == ':'; ++s) ; + + // Strip the trailing junk (periods, spaces, newlines). + // + // Note that msvcrt adds some junk like this: + // + // Invalid data.\r\n + // + size_t n (string::traits_type::length (s)); + for (; n > 0; --n) + { + switch (s[n-1]) + { + case '\r': + case '\n': + case '.': + case ' ': continue; + } + + break; + } + + // Strip the suffix for system_error thrown by + // throw_system_error(system_code) on Windows. For example for the + // ERROR_INVALID_DATA error code the original description will be + // 'Invalid data. : Success' for MinGW libstdc++ and + // 'Invalid data. : Success.' for msvcrt. + // + if (n >= 11 && + string::traits_type::compare (s + n - 11, ". : Success", 11) == 0) + n -= 11; + + // Lower-case the first letter if the beginning looks like a word (the + // second character is the lower-case letter or space). + // + char c; + bool lc (n > 0 && alpha (c = s[0]) && c == ucase (c) && + (n == 1 || (alpha (c = s[1]) && c == lcase (c)) || c == ' ')); + + // Print the description as is if no adjustment is required. + // + if (!lc && s == d && s[n] == '\0') + o << d; + else + { + // We need to produce the resulting description and then write it + // with a single formatted output operation. + // + string r (s, n); + + if (lc) + r[0] = lcase (r[0]); + + o << r; + } + + return o; + } +} diff --git a/libbutl/utility.hxx b/libbutl/utility.hxx new file mode 100644 index 0000000..448f386 --- /dev/null +++ b/libbutl/utility.hxx @@ -0,0 +1,264 @@ +// file : libbutl/utility.hxx -*- C++ -*- +// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#ifndef LIBBUTL_UTILITY_HXX +#define LIBBUTL_UTILITY_HXX + +#include +#include // ostream +#include // size_t +#include // move(), forward() +#include // strcmp(), strlen() +#include // uncaught_exceptions +#include // exception, uncaught_exception(s)() +#include // thread_local + +//#include // hash + +#include + +namespace butl +{ + // Throw std::system_error with generic_category or system_category, + // respectively. + // + // The generic version should be used for portable errno codes (those that + // are mapped to std::errc). The system version should be used for platform- + // specific codes, for example, additional errno codes on POSIX systems or + // the result of GetLastError() on Windows. + // + // See also the exception sanitization below. + // + [[noreturn]] LIBBUTL_EXPORT void + throw_generic_error (int errno_code, const char* what = nullptr); + + [[noreturn]] LIBBUTL_EXPORT void + throw_system_error (int system_code, int fallback_errno_code = 0); + + // Convert ASCII character/string case. If there is no upper/lower case + // counterpart, leave the character unchanged. The POSIX locale (also known + // as C locale) must be the current application locale. Otherwise the + // behavior is undefined. + // + // Note that the POSIX locale specifies behaviour on data consisting + // entirely of characters from the portable character set (subset of ASCII + // including 103 non-negative characters and English alphabet letters in + // particular) and the control character set (more about them at + // http://pubs.opengroup.org/onlinepubs/009696899/basedefs/xbd_chap06.html). + // + // Also note that according to the POSIX locale definition the case + // conversion can be applied only to [A-Z] and [a-z] character ranges being + // translated to each other (more about that at + // http://pubs.opengroup.org/onlinepubs/009695399/basedefs/xbd_chap07.html#tag_07_02) + // + char ucase (char); + std::string ucase (const char*, std::size_t = std::string::npos); + std::string ucase (const std::string&); + std::string& ucase (std::string&); + void ucase (char*, std::size_t); + + char lcase (char); + std::string lcase (const char*, std::size_t = std::string::npos); + std::string lcase (const std::string&); + std::string& lcase (std::string&); + void lcase (char*, std::size_t); + + // Compare ASCII characters/strings ignoring case. Behave as if characters + // had been converted to the lower case and then byte-compared. Return a + // negative, zero or positive value if the left hand side is less, equal or + // greater than the right hand side, respectivelly. The POSIX locale (also + // known as C locale) must be the current application locale. Otherwise the + // behavior is undefined. + // + // The optional size argument specifies the maximum number of characters + // to compare. + // + int casecmp (char, char); + + int casecmp (const std::string&, const std::string&, + std::size_t = std::string::npos); + + int casecmp (const std::string&, const char*, + std::size_t = std::string::npos); + + int casecmp (const char*, const char*, std::size_t = std::string::npos); + + // Case-insensitive key comparators (i.e., to be used in sets, maps, etc). + // + struct case_compare_string + { + bool operator() (const std::string& x, const std::string& y) const + { + return casecmp (x, y) < 0; + } + }; + + struct case_compare_c_string + { + bool operator() (const char* x, const char* y) const + { + return casecmp (x, y) < 0; + } + }; + + bool + alpha (char); + + bool + digit (char); + + bool + alnum (char); + + // Key comparators (i.e., to be used in sets, maps, etc). + // + struct compare_c_string + { + bool operator() (const char* x, const char* y) const noexcept + { + return std::strcmp (x, y) < 0; + } + }; + + struct compare_pointer_target + { + template + bool operator() (const P& x, const P& y) const {return *x < *y;} + }; + + //struct hash_pointer_target + //{ + // template + // std::size_t operator() (const P& x) const {return std::hash (*x);} + //}; + + // Combine one or more hash values. + // + inline std::size_t + combine_hash (std::size_t s, std::size_t h) + { + // Magic formula from boost::hash_combine(). + // + return s ^ (h + 0x9e3779b9 + (s << 6) + (s >> 2)); + } + + template + inline std::size_t + combine_hash (std::size_t s, std::size_t h, S... hs) + { + return combine_hash (combine_hash (s, h), hs...); + } + + // Support for reverse iteration using range-based for-loop: + // + // for (... : reverse_iterate (x)) ... + // + template + class reverse_range + { + T x_; + + public: + reverse_range (T&& x): x_ (std::forward (x)) {} + + auto begin () const -> decltype (this->x_.rbegin ()) {return x_.rbegin ();} + auto end () const -> decltype (this->x_.rend ()) {return x_.rend ();} + }; + + template + inline reverse_range + reverse_iterate (T&& x) {return reverse_range (std::forward (x));} + + // Call a function if there is an exception. + // + + template + struct exception_guard; + + template + inline exception_guard + make_exception_guard (F f) + { + return exception_guard (std::move (f)); + } + +#ifdef __cpp_lib_uncaught_exceptions + template + struct exception_guard + { + exception_guard (F f) + : f_ (std::move (f)), + u_ (std::uncaught_exceptions ()) {} + + ~exception_guard () + { + if (u_ != std::uncaught_exceptions ()) + f_ (); + } + + private: + F f_; + int u_; + }; +#else + // Fallback implementation using a TLS flag. + // + // True means we are in the body of a destructor that is being called as + // part of the exception stack unwindining. + // + extern +#ifdef __cpp_thread_local + thread_local +#else + __thread +#endif + bool exception_unwinding_dtor_; + + // On Windows one cannot export a thread-local variable so we have to + // use a wrapper functions. + // +#ifdef _WIN32 + LIBBUTL_EXPORT bool& + exception_unwinding_dtor (); +#else + inline bool& + exception_unwinding_dtor () {return exception_unwinding_dtor_;} +#endif + + template + struct exception_guard + { + exception_guard (F f): f_ (std::move (f)) {} + ~exception_guard () + { + if (std::uncaught_exception ()) + { + exception_unwinding_dtor () = true; + f_ (); + exception_unwinding_dtor () = false; + } + } + + private: + F f_; + }; +#endif +} + +namespace std +{ + // Sanitize the exception description before printing. This includes: + // + // - stripping leading colons and spaces (see fdstream.cxx) + // - stripping trailing newlines, periods, and spaces + // - stripping system error redundant suffix (see utility.cxx) + // - lower-case the first letter if the beginning looks like a word + // + LIBBUTL_EXPORT ostream& + operator<< (ostream&, const exception&); +} + +#include + +#endif // LIBBUTL_UTILITY_HXX diff --git a/libbutl/utility.ixx b/libbutl/utility.ixx new file mode 100644 index 0000000..e45a729 --- /dev/null +++ b/libbutl/utility.ixx @@ -0,0 +1,136 @@ +// file : libbutl/utility.ixx -*- C++ -*- +// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#ifndef _WIN32 +# include // strcasecmp(), strncasecmp() +#else +# include // _stricmp(), _strnicmp() +#endif + +#include // toupper(), tolower(), isalpha(), isdigit(), isalnum() + +namespace butl +{ + inline char + ucase (char c) + { + return std::toupper (c); + } + + inline void + ucase (char* s, std::size_t n) + { + for (const char* e (s + n); s != e; ++s) + *s = ucase (*s); + } + + inline std::string& + ucase (std::string& s) + { + if (size_t n = s.size ()) + { + s.front () = s.front (); // Force copy in CoW. + ucase (const_cast (s.data ()), n); + } + return s; + } + + inline std::string + ucase (const char* s, std::size_t n) + { + std::string r (s, n == std::string::npos ? std::strlen (s) : n); + return ucase (r); + } + + inline std::string + ucase (const std::string& s) + { + return ucase (s.c_str (), s.size ()); + } + + inline char + lcase (char c) + { + return std::tolower (c); + } + + inline void + lcase (char* s, std::size_t n) + { + for (const char* e (s + n); s != e; ++s) + *s = lcase (*s); + } + + inline std::string& + lcase (std::string& s) + { + if (size_t n = s.size ()) + { + s.front () = s.front (); // Force copy in CoW. + lcase (const_cast (s.data ()), n); + } + return s; + } + + inline std::string + lcase (const char* s, std::size_t n) + { + std::string r (s, n == std::string::npos ? std::strlen (s) : n); + return lcase (r); + } + + inline std::string + lcase (const std::string& s) + { + return lcase (s.c_str (), s.size ()); + } + + inline int + casecmp (char l, char r) + { + l = lcase (l); + r = lcase (r); + return l < r ? -1 : (l > r ? 1 : 0); + } + + inline int + casecmp (const char* l, const char* r, std::size_t n) + { +#ifndef _WIN32 + return n == std::string::npos ? strcasecmp (l, r) : strncasecmp (l, r, n); +#else + return n == std::string::npos ? _stricmp (l, r) : _strnicmp (l, r, n); +#endif + } + + inline int + casecmp (const std::string& l, const std::string& r, std::size_t n) + { + return casecmp (l.c_str (), r.c_str (), n); + } + + inline int + casecmp (const std::string& l, const char* r, std::size_t n) + { + return casecmp (l.c_str (), r, n); + } + + inline bool + alpha (char c) + { + return std::isalpha (c); + } + + inline bool + digit (char c) + { + return std::isdigit (c); + } + + inline bool + alnum (char c) + { + return std::isalnum (c); + } +} diff --git a/libbutl/vector-view.hxx b/libbutl/vector-view.hxx new file mode 100644 index 0000000..98b314c --- /dev/null +++ b/libbutl/vector-view.hxx @@ -0,0 +1,120 @@ +// file : libbutl/vector-view.hxx -*- C++ -*- +// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#ifndef LIBBUTL_VECTOR_VIEW_HXX +#define LIBBUTL_VECTOR_VIEW_HXX + +#include +#include // size_t, ptrdiff_t +#include // swap() +#include // reverse_iterator +#include // out_of_range + +namespace butl +{ + // In our version a const view allows the modification of the elements + // unless T is made const (the same semantics as in smart pointers). + // + // @@ If T is const T1, could be useful to have a c-tor from vector. + // + template + class vector_view + { + public: + using value_type = T; + using pointer = T*; + using reference = T&; + using const_pointer = const T*; + using const_reference = const T&; + using size_type = std::size_t; + using difference_type = std::ptrdiff_t; + + using iterator = T*; + using const_iterator = const T*; + using reverse_iterator = std::reverse_iterator; + using const_reverse_iterator = std::reverse_iterator; + + // construct/copy/destroy: + // + vector_view (): data_ (nullptr), size_ (0) {} + vector_view (T* d, size_type s): data_ (d), size_ (s) {} + + template + vector_view (std::vector& v) + : data_ (v.data ()), size_ (v.size ()) {} + + template + vector_view (const std::vector& v) + : data_ (v.data ()), size_ (v.size ()) {} + + template + vector_view (const vector_view& v) + : data_ (v.data ()), size_ (v.size ()) {} + + vector_view (vector_view&&) = default; + vector_view (const vector_view&) = default; + vector_view& operator= (vector_view&&) = default; + vector_view& operator= (const vector_view&) = default; + + // iterators: + // + iterator begin() const {return data_;} + iterator end() const {return data_ + size_;} + + const_iterator cbegin() const {return data_;} + const_iterator cend() const {return data_ + size_;} + + reverse_iterator rbegin() const {return reverse_iterator (end ());} + reverse_iterator rend() const {return reverse_iterator (begin ());} + + const_reverse_iterator crbegin() const { + return const_reverse_iterator (cend ());} + const_reverse_iterator crend() const { + return const_reverse_iterator (cbegin ());} + + // capacity: + // + size_type size() const {return size_;} + bool empty() const {return size_ == 0;} + + // element access: + // + reference operator[](size_type n) const {return data_[n];} + reference front() const {return data_[0];} + reference back() const {return data_[size_ - 1];} + + reference at(size_type n) const + { + if (n >= size_) + throw std::out_of_range ("index out of range"); + return data_[n]; + } + + // data access: + // + T* data() const {return data_;} + + // modifiers: + // + void assign (T* d, size_type s) {data_ = d; size_ = s;} + void clear () {data_ = nullptr; size_ = 0;} + void swap (vector_view& v) { + std::swap (data_, v.data_); std::swap (size_, v.size_);} + + private: + T* data_; + size_type size_; + }; + + //@@ TODO. + // + template bool operator== (vector_view l, vector_view r); + template bool operator!= (vector_view l, vector_view r); + template bool operator< (vector_view l, vector_view r); + template bool operator> (vector_view l, vector_view r); + template bool operator<= (vector_view l, vector_view r); + template bool operator>= (vector_view l, vector_view r); +} + +#endif // LIBBUTL_VECTOR_VIEW_HXX diff --git a/libbutl/version.hxx.in b/libbutl/version.hxx.in new file mode 100644 index 0000000..780d21a --- /dev/null +++ b/libbutl/version.hxx.in @@ -0,0 +1,40 @@ +// file : libbutl/version.hxx.in -*- C++ -*- +// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#ifndef LIBBUTL_VERSION // Note: using the version macro itself. + +// Note: using build2 standard versioning scheme. The numeric version format +// is AAABBBCCCDDDE where: +// +// AAA - major version number +// BBB - minor version number +// CCC - bugfix version number +// DDD - alpha / beta (DDD + 500) version number +// E - final (0) / snapshot (1) +// +// When DDDE is not 0, 1 is subtracted from AAABBBCCC. For example: +// +// Version AAABBBCCCDDDE +// +// 0.1.0 0000010000000 +// 0.1.2 0000010010000 +// 1.2.3 0010020030000 +// 2.2.0-a.1 0020019990010 +// 3.0.0-b.2 0029999995020 +// 2.2.0-a.1.z 0020019990011 +// +#define LIBBUTL_VERSION $libbutl.version.project_number$ULL +#define LIBBUTL_VERSION_STR "$libbutl.version.project$" +#define LIBBUTL_VERSION_ID "$libbutl.version.project_id$" + +#define LIBBUTL_VERSION_MAJOR $libbutl.version.major$ +#define LIBBUTL_VERSION_MINOR $libbutl.version.minor$ +#define LIBBUTL_VERSION_PATCH $libbutl.version.patch$ + +#define LIBBUTL_PRE_RELEASE $libbutl.version.pre_release$ + +#define LIBBUTL_SNAPSHOT $libbutl.version.snapshot_sn$ULL +#define LIBBUTL_SNAPSHOT_ID "$libbutl.version.snapshot_id$" + +#endif // LIBBUTL_VERSION diff --git a/libbutl/win32-utility.cxx b/libbutl/win32-utility.cxx new file mode 100644 index 0000000..c16f8cc --- /dev/null +++ b/libbutl/win32-utility.cxx @@ -0,0 +1,54 @@ +// file : libbutl/win32-utility.cxx -*- C++ -*- +// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#include + +// Note that while we exclude it from the buildfile-based builds, we still +// need these guards for build2 bootstrap. +// +#ifdef _WIN32 + +#include // unique_ptr + +using namespace std; + +namespace butl +{ + namespace win32 + { + struct msg_deleter + { + void operator() (char* p) const {LocalFree (p);} + }; + + string + error_msg (DWORD code) + { + char* msg; + if (!FormatMessageA ( + FORMAT_MESSAGE_ALLOCATE_BUFFER | + FORMAT_MESSAGE_FROM_SYSTEM | + FORMAT_MESSAGE_IGNORE_INSERTS | + FORMAT_MESSAGE_MAX_WIDTH_MASK, + 0, + code, + MAKELANGID (LANG_NEUTRAL, SUBLANG_DEFAULT), + (char*)&msg, + 0, + 0)) + return "unknown error code " + to_string (code); + + unique_ptr m (msg); + return msg; + } + + string + last_error_msg () + { + return error_msg (GetLastError ()); + } + } +} + +#endif // _WIN32 diff --git a/libbutl/win32-utility.hxx b/libbutl/win32-utility.hxx new file mode 100644 index 0000000..cdaf846 --- /dev/null +++ b/libbutl/win32-utility.hxx @@ -0,0 +1,54 @@ +// file : libbutl/win32-utility.hxx -*- C++ -*- +// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#ifndef LIBBUTL_WIN32_UTILITY_HXX +#define LIBBUTL_WIN32_UTILITY_HXX + +// Use this header to include and a couple of Win32-specific +// utilities. +// + +#ifdef _WIN32 + +// Try to include so that it doesn't mess other things up. +// +#ifndef WIN32_LEAN_AND_MEAN +# define WIN32_LEAN_AND_MEAN +# ifndef NOMINMAX // No min and max macros. +# define NOMINMAX +# include +# undef NOMINMAX +# else +# include +# endif +# undef WIN32_LEAN_AND_MEAN +#else +# ifndef NOMINMAX +# define NOMINMAX +# include +# undef NOMINMAX +# else +# include +# endif +#endif + +#include + +#include + +namespace butl +{ + namespace win32 + { + LIBBUTL_EXPORT std::string + error_msg (DWORD code); + + LIBBUTL_EXPORT std::string + last_error_msg (); + } +}; + +#endif // _WIN32 + +#endif // LIBBUTL_WIN32_UTILITY_HXX -- cgit v1.1