aboutsummaryrefslogtreecommitdiff
path: root/butl
diff options
context:
space:
mode:
Diffstat (limited to 'butl')
-rw-r--r--butl/timestamp64
-rw-r--r--butl/timestamp.cxx291
2 files changed, 345 insertions, 10 deletions
diff --git a/butl/timestamp b/butl/timestamp
index 10090c5..90bca08 100644
--- a/butl/timestamp
+++ b/butl/timestamp
@@ -38,19 +38,22 @@ namespace butl
// Generally-useful special values.
//
- const timestamp timestamp_unknown {duration {-1}};
- const timestamp timestamp_nonexistent {duration {0}};
+ const timestamp timestamp_unknown = timestamp (duration (-1));
+ const timestamp timestamp_nonexistent = timestamp (duration (10));
- // Human-readable representation. By default the timestamp is printed by
- // localtime_r() in the local timezone, so tzset() from <time.h> should be
- // called prior to using the corresponding operator or the to_stream()
- // function (normally from main() or equivalent).
+ // Print human-readable representation of the timestamp.
+ //
+ // By default the timestamp is printed by localtime_r() in the local
+ // timezone, so tzset() from <time.h> should be called prior to using the
+ // corresponding operator or the to_stream() function (normally from main()
+ // or equivalent).
//
// The format argument in the to_stream() function is the put_time() format
// string except that it also supports the nanoseconds conversion specifier
// in the form %[<d>N] where <d> is the optional single delimiter character,
- // for example '.'. If the nanoseconds part is 0, then it is not printed (nor
- // the delimiter character).
+ // for example '.'. If the nanoseconds part is 0, then it is not printed
+ // (nor the delimiter character). Otherwise, if necessary, the nanoseconds
+ // part is padded to 9 characters with leading zeros.
//
// The special argument in the to_stream() function indicates whether the
// special timestamp_unknown and timestamp_nonexistent values should be
@@ -87,6 +90,51 @@ namespace butl
std::ostream&
operator<< (std::ostream&, const duration&);
+
+ // Parse human-readable representation of the timestamp.
+ //
+ // The format argument is the strptime() format string except that it also
+ // supports the fraction of a second specifier in the form %[<d><f>], where
+ // <d> is the optional single delimiter character, for example '.', and <f>
+ // is one of the 'N', 'U', 'M' characters, denoting nanoseconds,
+ // microseconds and milliseconds, respectively.
+ //
+ // The delimiter <d> is mandatory. If no such character is encountered at
+ // the corresponding position of the input string, the function behaves as
+ // if no %[] specifier were provided. Only single %[] specifier in the
+ // format string is currently supported.
+ //
+ // If the delimiter is present, then it should be followed by 9 (N), 6 (U),
+ // or 3 (M) digit value padded with leading zeros if necessary.
+ //
+ // If the local argument is true, then the input is assume to be local time
+ // and the result is returned as local time as well. Otherwise, UCT is used
+ // in both cases.
+ //
+ // If the end argument is not NULL, then it points to the first character
+ // that was not parsed. Otherwise, throw invalid_argument in case of any
+ // unparsed characters.
+ //
+ // Throw std::system_error on input/format mismatch and underlying time
+ // conversion function failures.
+ //
+ // Note that internally from_string() calls strptime(), which behaves
+ // according to the process' C locale (set with std::setlocale()) and not
+ // the C++ locale (set with std::locale::global()). Meanwhile the behaviour
+ // can be affected by std::locale::global() as well, as it itself calls
+ // std::setlocale() for the locale with a name.
+ //
+ // Potential improvements:
+ // - support %() version for non-optional component but with optional
+ // delimiter
+ // - ability to parse local, return UTC and vice-versa
+ // - handle timezone parsing
+ //
+ timestamp
+ from_string (const char* input,
+ const char* format,
+ bool local,
+ const char** end = nullptr);
};
#endif // BUTL_TIMESTAMP
diff --git a/butl/timestamp.cxx b/butl/timestamp.cxx
index 6299ba3..f3966b7 100644
--- a/butl/timestamp.cxx
+++ b/butl/timestamp.cxx
@@ -4,13 +4,16 @@
#include <butl/timestamp>
-#include <time.h> // localtime_r(), gmtime_r()
+#include <time.h> // localtime_r(), gmtime_r(), strptime(), timegm()
#include <errno.h> // EINVAL
-#include <ctime> // tm, strftime()
+#include <ctime> // tm, time_t, strftime(), mktime()
+#include <cstdlib> // strtoull()
+#include <cassert>
#include <iomanip> // put_time(), setw(), dec, right
#include <cstring> // strlen(), memcpy()
#include <ostream>
+#include <utility> // pair, make_pair()
#include <stdexcept> // runtime_error
#include <system_error>
@@ -28,6 +31,9 @@ using namespace std;
// of the std::tm argument.
//
#ifdef __GLIBCXX__
+#include <ctime> // tm, strftime()
+#include <ostream>
+
namespace details
{
struct put_time_data
@@ -253,3 +259,284 @@ namespace butl
return os;
}
}
+
+// VC++ implementation of strptime() via std::get_time().
+//
+// To debug fallback functions with GCC, uncomment the following defines.
+//
+//#define _MSC_VER
+//#define strptime strptime_
+//#define timegm timegm_
+
+#ifdef _MSC_VER
+#include <ctime> // time_t, tm, mktime(), gmtime()
+#include <locale>
+#include <clocale>
+#include <sstream>
+#include <iomanip>
+#include <cstring> // strlen()
+
+namespace details
+{
+ static char*
+ strptime (const char* input, const char* format, tm* time)
+ {
+ istringstream is (input);
+
+ // The original strptime() function behaves according to the process' C
+ // locale (set with std::setlocale()), which can differ from the process
+ // C++ locale (set with std::locale::global()).
+ //
+ is.imbue (locale (setlocale (LC_ALL, nullptr)));
+
+ if (!(is >> get_time (time, format)))
+ return nullptr;
+ else
+ // tellg () behaves as UnformattedInputFunction, so returns failure
+ // status if eofbit is set.
+ //
+ return const_cast<char*> (
+ input + (is.eof ()
+ ? strlen (input)
+ : static_cast<size_t> (is.tellg ())));
+ }
+
+ static time_t
+ timegm (tm* ctm)
+ {
+ const time_t e (static_cast<time_t> (-1));
+
+ // We will use an example to explain how it works. Say *ctm contains 9 AM
+ // of some day. Note that no time zone information is available.
+ //
+ // Convert it to the time from Epoch as if it's in the local time zone.
+ //
+ ctm->tm_isdst = -1;
+ time_t t (mktime (ctm));
+ if (t == e)
+ return e;
+
+ // Let's say we are in Moscow, and t contains the time passed from Epoch
+ // till 9 AM MSK. But that is not what we need. What we need is the time
+ // passed from Epoch till 9 AM GMT. This is some bigger number, as it takes
+ // longer to achieve the same calendar time for more Western location. So
+ // we need to find that offset, and increment t with it to obtain the
+ // desired value. The offset is effectively the time difference between MSK
+ // and GMT time zones.
+ //
+ tm gtm;
+ if (gmtime_r (&t, &gtm) == nullptr)
+ return e;
+
+ // gmtime_r() being called for the timepoint t returns 6 AM. So now we
+ // have *ctm and gtm, which value difference (3 hours) reflects the
+ // desired offset. The only problem is that we can not deduct gtm from
+ // *ctm, to get the offset expressed as time_t. To do that we need to apply
+ // to both of them the same conversion function transforming std::tm to
+ // std::time_t. The mktime() can do that, so the expression (mktime(ctm) -
+ // mktime(&gtm)) calculates the desired offset.
+ //
+ // To ensure mktime() works exactly the same way for both cases, we need
+ // to reset Daylight Saving Time flag for each of *ctm and gtm.
+ //
+ ctm->tm_isdst = 0;
+ time_t lt (mktime (ctm));
+ if (lt == e)
+ return e;
+
+ gtm.tm_isdst = 0;
+ time_t gt (mktime (&gtm));
+ if (gt == e)
+ return e;
+
+ // C11 standard specifies time_t to be a real type (integer and real
+ // floating types are collectively called real types). So we can not
+ // consider it to be signed.
+ //
+ return lt > gt ? t + (lt - gt) : t - (gt - lt);
+ }
+}
+
+using namespace details;
+#endif
+
+namespace butl
+{
+ static pair<tm, chrono::nanoseconds>
+ from_string (const char* input, const char* format, const char** end)
+ {
+ auto bad_val = []() {throw system_error (EINVAL, system_category ());};
+
+ // See if we have our specifier.
+ //
+ size_t i (0);
+ size_t n (strlen (format));
+ for (; i != n; ++i)
+ {
+ if (format[i] == '%' && i + 1 != n)
+ {
+ if (format[i + 1] == '[')
+ break;
+ else
+ ++i; // To handle %%.
+ }
+ }
+
+ // Call the fraction of a second as just fraction from now on.
+ //
+ using namespace chrono;
+ nanoseconds ns (nanoseconds::zero ());
+
+ if (i == n)
+ {
+ // No %[], so just parse with strptime().
+ //
+ tm t {};
+ const char* p (strptime (input, format, &t));
+ if (p == nullptr)
+ bad_val ();
+
+ if (end != nullptr)
+ *end = p;
+ else if (*p != '\0')
+ bad_val (); // Input is not fully read.
+
+ return make_pair (t, ns);
+ }
+
+ // Now the overall plan is:
+ //
+ // 1. Parse the fraction part of the input string to obtain nanoseconds.
+ //
+ // 2. Remove fraction part from the input string.
+ //
+ // 3. Remove %[] from the format string.
+ //
+ // 4. Re-parse the modified input with the modified format to fill the
+ // std::tm structure.
+ //
+ // Parse the %[] specifier.
+ //
+ assert (format[i] == '%');
+ string fm (format, i++); // Start assembling the new format string.
+
+ assert (format[i] == '[');
+ if (++i == n)
+ bad_val ();
+
+ char d (format[i]); // Delimiter character.
+ if (++i == n)
+ bad_val ();
+
+ char f (format[i]); // Fraction specifier character.
+ if ((f != 'N' && f != 'U' && f != 'M') || ++i == n)
+ bad_val ();
+
+ if (format[i++] != ']')
+ bad_val ();
+
+ // Parse the input with the initial part of the format string, the one
+ // that preceeds the %[] specifier. The returned pointer will be the
+ // position we need to start from to parse the fraction.
+ //
+ tm t {};
+
+ // What if %[] is first, there is nothing before it? According to the
+ // strptime() documentation an empty format string is a valid one.
+ //
+ const char* p (strptime (input, fm.c_str (), &t));
+ if (p == nullptr)
+ bad_val ();
+
+ // Start assembling the new input string.
+ //
+ string in (input, p - input);
+ size_t fn (0); // Fraction size.
+
+ if (d == *p)
+ {
+ // Fraction present in the input.
+ //
+
+ // Read fraction digits.
+ //
+ char buf [10];
+ size_t i (0);
+ size_t n (f == 'N' ? 9 : (f == 'U' ? 6 : 3));
+ for (++p; i < n && *p >= '0' && *p <= '9'; ++i, ++p)
+ buf[i] = *p;
+
+ if (i < n)
+ bad_val ();
+
+ buf[n] = '\0';
+ fn = n;
+
+ // Calculate nanoseconds.
+ //
+ char* e (nullptr);
+ unsigned long long t (strtoull (buf, &e, 10));
+ assert (e == buf + n);
+
+ switch (f)
+ {
+ case 'N': ns = nanoseconds (t); break;
+ case 'U': ns = microseconds (t); break;
+ case 'M': ns = milliseconds (t); break;
+ default: assert (false);
+ }
+
+ // Actually the idea to fully remove the fraction from the input string,
+ // and %[] from the format string, has a flaw. After the fraction removal
+ // the spaces around it will be "swallowed" with a single space in the
+ // format string. So, as an example, for the input:
+ //
+ // 2016-02-21 19:31:10 .384902285 GMT
+ //
+ // And the format:
+ //
+ // %Y-%m-%d %H:%M:%S %[.N]
+ //
+ // The unparsed tail of the input will be 'GMT' while expected to be
+ // ' GMT'. To fix that we will not remove, but replace the mentioned
+ // parts with some non-space character.
+ //
+ fm += '-';
+ in += '-';
+ }
+
+ fm += format + i;
+ in += p;
+
+ // Reparse the modified input with the modified format.
+ //
+ t = {};
+ const char* b (in.c_str ());
+ p = strptime (b, fm.c_str (), &t);
+
+ if (p == nullptr)
+ bad_val ();
+
+ if (end != nullptr)
+ *end = input + (p - b + fn);
+ else if (*p != '\0')
+ bad_val (); // Input is not fully read.
+
+ return make_pair (t, ns);
+ }
+
+ timestamp
+ from_string (const char* input,
+ const char* format,
+ bool local,
+ const char** end)
+ {
+ pair<tm, chrono::nanoseconds> t (from_string (input, format, end));
+
+ time_t time (local ? mktime (&t.first) : timegm (&t.first));
+ if (time == -1)
+ throw system_error (errno, system_category ());
+
+ return timestamp::clock::from_time_t (time) + t.second;
+ }
+}