From 0937885bc90bd2c2d938bd2e16e0c164e80625fe Mon Sep 17 00:00:00 2001 From: Karen Arutyunov Date: Tue, 21 Jul 2020 18:09:47 +0300 Subject: Release version 65.1.0+4 Fix Clang 10 compilation errors Canonicalize license in manifests Add glue buildfile --- README-DEV | 2 +- buildfile | 7 + libicui18n/README-DEV | 20 + libicui18n/libicui18n/basictz.cpp | 562 +++++++ libicui18n/libicui18n/basictz.cpp.patch | 11 + libicui18n/libicui18n/buildfile | 14 +- libicui18n/libicui18n/dtitvfmt.cpp | 1661 +++++++++++++++++++ libicui18n/libicui18n/dtitvfmt.cpp.patch | 11 + libicui18n/libicui18n/olsontz.cpp | 1080 ++++++++++++ libicui18n/libicui18n/olsontz.cpp.patch | 11 + libicui18n/libicui18n/rbtz.cpp | 959 +++++++++++ libicui18n/libicui18n/rbtz.cpp.patch | 29 + libicui18n/libicui18n/stsearch.cpp | 483 ++++++ libicui18n/libicui18n/stsearch.cpp.patch | 11 + libicui18n/libicui18n/vtzone.cpp | 2633 ++++++++++++++++++++++++++++++ libicui18n/libicui18n/vtzone.cpp.patch | 11 + libicui18n/manifest | 6 +- libicuio/libicuio/buildfile | 8 +- libicuio/manifest | 4 +- libicuuc/README-DEV | 5 + libicuuc/libicu/buildfile | 8 +- libicuuc/libicu/uniset.cpp | 2356 ++++++++++++++++++++++++++ libicuuc/libicu/uniset.cpp.patch | 11 + libicuuc/manifest | 6 +- 24 files changed, 9888 insertions(+), 21 deletions(-) create mode 100644 buildfile create mode 100644 libicui18n/libicui18n/basictz.cpp create mode 100644 libicui18n/libicui18n/basictz.cpp.patch create mode 100644 libicui18n/libicui18n/dtitvfmt.cpp create mode 100644 libicui18n/libicui18n/dtitvfmt.cpp.patch create mode 100644 libicui18n/libicui18n/olsontz.cpp create mode 100644 libicui18n/libicui18n/olsontz.cpp.patch create mode 100644 libicui18n/libicui18n/rbtz.cpp create mode 100644 libicui18n/libicui18n/rbtz.cpp.patch create mode 100644 libicui18n/libicui18n/stsearch.cpp create mode 100644 libicui18n/libicui18n/stsearch.cpp.patch create mode 100644 libicui18n/libicui18n/vtzone.cpp create mode 100644 libicui18n/libicui18n/vtzone.cpp.patch create mode 100644 libicuuc/libicu/uniset.cpp create mode 100644 libicuuc/libicu/uniset.cpp.patch diff --git a/README-DEV b/README-DEV index 6e41d8c..e848405 100644 --- a/README-DEV +++ b/README-DEV @@ -41,7 +41,7 @@ are packaged as follows: src libraries headers tools Debian/Ubuntu: icu libicu65 libicu-dev icu-devtools -Fedora/RHEL: icu libicu libicu-devel - +Fedora/RHEL: icu libicu libicu-devel icu Search for the Debian and Fedora packages at https://packages.debian.org/search and https://apps.fedoraproject.org/packages/. diff --git a/buildfile b/buildfile new file mode 100644 index 0000000..1d24734 --- /dev/null +++ b/buildfile @@ -0,0 +1,7 @@ +# file : buildfile +# license : Unicode License; see accompanying LICENSE file + +# Glue buildfile that "pulls" all the packages. + +import pkgs = {*/ -upstream/} +./: $pkgs diff --git a/libicui18n/README-DEV b/libicui18n/README-DEV index 3744292..2857ff7 100644 --- a/libicui18n/README-DEV +++ b/libicui18n/README-DEV @@ -5,3 +5,23 @@ See ../README-DEV for general notes on ICU packaging. Symlink the required upstream directories into libicui18n/: $ ln -s ../../upstream/icu4c/source/i18n libicui18n + +Fix Clang 10 compilation errors: + +$ cp libicui18n/i18n/stsearch.cpp libicui18n +$ patch -p0 getRawOffset() + tr1.getFrom()->getDSTSavings() + == tr1.getTo()->getRawOffset() + tr1.getTo()->getDSTSavings()) + && (tr1.getFrom()->getDSTSavings() != 0 && tr1.getTo()->getDSTSavings() != 0)) { + getNextTransition(tr1.getTime(), FALSE, tr1); + } else { + break; + } + } + while (TRUE) { + if (avail2 + && tr2.getTime() <= end + && (tr2.getFrom()->getRawOffset() + tr2.getFrom()->getDSTSavings() + == tr2.getTo()->getRawOffset() + tr2.getTo()->getDSTSavings()) + && (tr2.getFrom()->getDSTSavings() != 0 && tr2.getTo()->getDSTSavings() != 0)) { + tz.getNextTransition(tr2.getTime(), FALSE, tr2); + } else { + break; + } + } + } + + UBool inRange1 = (avail1 && tr1.getTime() <= end); + UBool inRange2 = (avail2 && tr2.getTime() <= end); + if (!inRange1 && !inRange2) { + // No more transition in the range + break; + } + if (!inRange1 || !inRange2) { + return FALSE; + } + if (tr1.getTime() != tr2.getTime()) { + return FALSE; + } + if (ignoreDstAmount) { + if (tr1.getTo()->getRawOffset() + tr1.getTo()->getDSTSavings() + != tr2.getTo()->getRawOffset() + tr2.getTo()->getDSTSavings() + || (tr1.getTo()->getDSTSavings() != 0 && tr2.getTo()->getDSTSavings() == 0) + || (tr1.getTo()->getDSTSavings() == 0 && tr2.getTo()->getDSTSavings() != 0)) { + return FALSE; + } + } else { + if (tr1.getTo()->getRawOffset() != tr2.getTo()->getRawOffset() || + tr1.getTo()->getDSTSavings() != tr2.getTo()->getDSTSavings()) { + return FALSE; + } + } + time = tr1.getTime(); + } + return TRUE; +} + +void +BasicTimeZone::getSimpleRulesNear(UDate date, InitialTimeZoneRule*& initial, + AnnualTimeZoneRule*& std, AnnualTimeZoneRule*& dst, UErrorCode& status) const { + initial = NULL; + std = NULL; + dst = NULL; + if (U_FAILURE(status)) { + return; + } + int32_t initialRaw, initialDst; + UnicodeString initialName; + + AnnualTimeZoneRule *ar1 = NULL; + AnnualTimeZoneRule *ar2 = NULL; + UnicodeString name; + + UBool avail; + TimeZoneTransition tr; + // Get the next transition + avail = getNextTransition(date, FALSE, tr); + if (avail) { + tr.getFrom()->getName(initialName); + initialRaw = tr.getFrom()->getRawOffset(); + initialDst = tr.getFrom()->getDSTSavings(); + + // Check if the next transition is either DST->STD or STD->DST and + // within roughly 1 year from the specified date + UDate nextTransitionTime = tr.getTime(); + if (((tr.getFrom()->getDSTSavings() == 0 && tr.getTo()->getDSTSavings() != 0) + || (tr.getFrom()->getDSTSavings() != 0 && tr.getTo()->getDSTSavings() == 0)) + && (date + MILLIS_PER_YEAR > nextTransitionTime)) { + + int32_t year, month, dom, dow, doy, mid; + UDate d; + + // Get local wall time for the next transition time + Grego::timeToFields(nextTransitionTime + initialRaw + initialDst, + year, month, dom, dow, doy, mid); + int32_t weekInMonth = Grego::dayOfWeekInMonth(year, month, dom); + // Create DOW rule + DateTimeRule *dtr = new DateTimeRule(month, weekInMonth, dow, mid, DateTimeRule::WALL_TIME); + tr.getTo()->getName(name); + + // Note: SimpleTimeZone does not support raw offset change. + // So we always use raw offset of the given time for the rule, + // even raw offset is changed. This will result that the result + // zone to return wrong offset after the transition. + // When we encounter such case, we do not inspect next next + // transition for another rule. + ar1 = new AnnualTimeZoneRule(name, initialRaw, tr.getTo()->getDSTSavings(), + dtr, year, AnnualTimeZoneRule::MAX_YEAR); + + if (tr.getTo()->getRawOffset() == initialRaw) { + // Get the next next transition + avail = getNextTransition(nextTransitionTime, FALSE, tr); + if (avail) { + // Check if the next next transition is either DST->STD or STD->DST + // and within roughly 1 year from the next transition + if (((tr.getFrom()->getDSTSavings() == 0 && tr.getTo()->getDSTSavings() != 0) + || (tr.getFrom()->getDSTSavings() != 0 && tr.getTo()->getDSTSavings() == 0)) + && nextTransitionTime + MILLIS_PER_YEAR > tr.getTime()) { + + // Get local wall time for the next transition time + Grego::timeToFields(tr.getTime() + tr.getFrom()->getRawOffset() + tr.getFrom()->getDSTSavings(), + year, month, dom, dow, doy, mid); + weekInMonth = Grego::dayOfWeekInMonth(year, month, dom); + // Generate another DOW rule + dtr = new DateTimeRule(month, weekInMonth, dow, mid, DateTimeRule::WALL_TIME); + tr.getTo()->getName(name); + ar2 = new AnnualTimeZoneRule(name, tr.getTo()->getRawOffset(), tr.getTo()->getDSTSavings(), + dtr, year - 1, AnnualTimeZoneRule::MAX_YEAR); + + // Make sure this rule can be applied to the specified date + avail = ar2->getPreviousStart(date, tr.getFrom()->getRawOffset(), tr.getFrom()->getDSTSavings(), TRUE, d); + if (!avail || d > date + || initialRaw != tr.getTo()->getRawOffset() + || initialDst != tr.getTo()->getDSTSavings()) { + // We cannot use this rule as the second transition rule + delete ar2; + ar2 = NULL; + } + } + } + } + if (ar2 == NULL) { + // Try previous transition + avail = getPreviousTransition(date, TRUE, tr); + if (avail) { + // Check if the previous transition is either DST->STD or STD->DST. + // The actual transition time does not matter here. + if ((tr.getFrom()->getDSTSavings() == 0 && tr.getTo()->getDSTSavings() != 0) + || (tr.getFrom()->getDSTSavings() != 0 && tr.getTo()->getDSTSavings() == 0)) { + + // Generate another DOW rule + Grego::timeToFields(tr.getTime() + tr.getFrom()->getRawOffset() + tr.getFrom()->getDSTSavings(), + year, month, dom, dow, doy, mid); + weekInMonth = Grego::dayOfWeekInMonth(year, month, dom); + dtr = new DateTimeRule(month, weekInMonth, dow, mid, DateTimeRule::WALL_TIME); + tr.getTo()->getName(name); + + // second rule raw/dst offsets should match raw/dst offsets + // at the given time + ar2 = new AnnualTimeZoneRule(name, initialRaw, initialDst, + dtr, ar1->getStartYear() - 1, AnnualTimeZoneRule::MAX_YEAR); + + // Check if this rule start after the first rule after the specified date + avail = ar2->getNextStart(date, tr.getFrom()->getRawOffset(), tr.getFrom()->getDSTSavings(), FALSE, d); + if (!avail || d <= nextTransitionTime) { + // We cannot use this rule as the second transition rule + delete ar2; + ar2 = NULL; + } + } + } + } + if (ar2 == NULL) { + // Cannot find a good pair of AnnualTimeZoneRule + delete ar1; + ar1 = NULL; + } else { + // The initial rule should represent the rule before the previous transition + ar1->getName(initialName); + initialRaw = ar1->getRawOffset(); + initialDst = ar1->getDSTSavings(); + } + } + } + else { + // Try the previous one + avail = getPreviousTransition(date, TRUE, tr); + if (avail) { + tr.getTo()->getName(initialName); + initialRaw = tr.getTo()->getRawOffset(); + initialDst = tr.getTo()->getDSTSavings(); + } else { + // No transitions in the past. Just use the current offsets + getOffset(date, FALSE, initialRaw, initialDst, status); + if (U_FAILURE(status)) { + return; + } + } + } + // Set the initial rule + initial = new InitialTimeZoneRule(initialName, initialRaw, initialDst); + + // Set the standard and daylight saving rules + if (ar1 != NULL && ar2 != NULL) { + if (ar1->getDSTSavings() != 0) { + dst = ar1; + std = ar2; + } else { + std = ar1; + dst = ar2; + } + } +} + +void +BasicTimeZone::getTimeZoneRulesAfter(UDate start, InitialTimeZoneRule*& initial, + UVector*& transitionRules, UErrorCode& status) const { + if (U_FAILURE(status)) { + return; + } + + const InitialTimeZoneRule *orgini; + const TimeZoneRule **orgtrs = NULL; + TimeZoneTransition tzt; + UBool avail; + UVector *orgRules = NULL; + int32_t ruleCount; + TimeZoneRule *r = NULL; + UBool *done = NULL; + InitialTimeZoneRule *res_initial = NULL; + UVector *filteredRules = NULL; + UnicodeString name; + int32_t i; + UDate time, t; + UDate *newTimes = NULL; + UDate firstStart; + UBool bFinalStd = FALSE, bFinalDst = FALSE; + + // Original transition rules + ruleCount = countTransitionRules(status); + if (U_FAILURE(status)) { + return; + } + orgRules = new UVector(ruleCount, status); + if (U_FAILURE(status)) { + return; + } + orgtrs = (const TimeZoneRule**)uprv_malloc(sizeof(TimeZoneRule*)*ruleCount); + if (orgtrs == NULL) { + status = U_MEMORY_ALLOCATION_ERROR; + goto error; + } + getTimeZoneRules(orgini, orgtrs, ruleCount, status); + if (U_FAILURE(status)) { + goto error; + } + for (i = 0; i < ruleCount; i++) { + orgRules->addElement(orgtrs[i]->clone(), status); + if (U_FAILURE(status)) { + goto error; + } + } + uprv_free(orgtrs); + orgtrs = NULL; + + avail = getPreviousTransition(start, TRUE, tzt); + if (!avail) { + // No need to filter out rules only applicable to time before the start + initial = orgini->clone(); + transitionRules = orgRules; + return; + } + + done = (UBool*)uprv_malloc(sizeof(UBool)*ruleCount); + if (done == NULL) { + status = U_MEMORY_ALLOCATION_ERROR; + goto error; + } + filteredRules = new UVector(status); + if (U_FAILURE(status)) { + goto error; + } + + // Create initial rule + tzt.getTo()->getName(name); + res_initial = new InitialTimeZoneRule(name, tzt.getTo()->getRawOffset(), + tzt.getTo()->getDSTSavings()); + + // Mark rules which does not need to be processed + for (i = 0; i < ruleCount; i++) { + r = (TimeZoneRule*)orgRules->elementAt(i); + avail = r->getNextStart(start, res_initial->getRawOffset(), res_initial->getDSTSavings(), FALSE, time); + done[i] = !avail; + } + + time = start; + while (!bFinalStd || !bFinalDst) { + avail = getNextTransition(time, FALSE, tzt); + if (!avail) { + break; + } + UDate updatedTime = tzt.getTime(); + if (updatedTime == time) { + // Can get here if rules for start & end of daylight time have exactly + // the same time. + // TODO: fix getNextTransition() to prevent it? + status = U_INVALID_STATE_ERROR; + goto error; + } + time = updatedTime; + + const TimeZoneRule *toRule = tzt.getTo(); + for (i = 0; i < ruleCount; i++) { + r = (TimeZoneRule*)orgRules->elementAt(i); + if (*r == *toRule) { + break; + } + } + if (i >= ruleCount) { + // This case should never happen + status = U_INVALID_STATE_ERROR; + goto error; + } + if (done[i]) { + continue; + } + const TimeArrayTimeZoneRule *tar = dynamic_cast(toRule); + const AnnualTimeZoneRule *ar; + if (tar != NULL) { + // Get the previous raw offset and DST savings before the very first start time + TimeZoneTransition tzt0; + t = start; + while (TRUE) { + avail = getNextTransition(t, FALSE, tzt0); + if (!avail) { + break; + } + if (tzt0.getTo()->operator==(*tar)) { + break; + } + t = tzt0.getTime(); + } + if (avail) { + // Check if the entire start times to be added + tar->getFirstStart(tzt.getFrom()->getRawOffset(), tzt.getFrom()->getDSTSavings(), firstStart); + if (firstStart > start) { + // Just add the rule as is + filteredRules->addElement(tar->clone(), status); + if (U_FAILURE(status)) { + goto error; + } + } else { + // Colllect transitions after the start time + int32_t startTimes; + DateTimeRule::TimeRuleType timeType; + int32_t idx; + + startTimes = tar->countStartTimes(); + timeType = tar->getTimeType(); + for (idx = 0; idx < startTimes; idx++) { + tar->getStartTimeAt(idx, t); + if (timeType == DateTimeRule::STANDARD_TIME) { + t -= tzt.getFrom()->getRawOffset(); + } + if (timeType == DateTimeRule::WALL_TIME) { + t -= tzt.getFrom()->getDSTSavings(); + } + if (t > start) { + break; + } + } + int32_t asize = startTimes - idx; + if (asize > 0) { + newTimes = (UDate*)uprv_malloc(sizeof(UDate) * asize); + if (newTimes == NULL) { + status = U_MEMORY_ALLOCATION_ERROR; + goto error; + } + for (int32_t newidx = 0; newidx < asize; newidx++) { + tar->getStartTimeAt(idx + newidx, newTimes[newidx]); + if (U_FAILURE(status)) { + uprv_free(newTimes); + newTimes = NULL; + goto error; + } + } + tar->getName(name); + TimeArrayTimeZoneRule *newTar = new TimeArrayTimeZoneRule(name, + tar->getRawOffset(), tar->getDSTSavings(), newTimes, asize, timeType); + uprv_free(newTimes); + filteredRules->addElement(newTar, status); + if (U_FAILURE(status)) { + goto error; + } + } + } + } + } else if ((ar = dynamic_cast(toRule)) != NULL) { + ar->getFirstStart(tzt.getFrom()->getRawOffset(), tzt.getFrom()->getDSTSavings(), firstStart); + if (firstStart == tzt.getTime()) { + // Just add the rule as is + filteredRules->addElement(ar->clone(), status); + if (U_FAILURE(status)) { + goto error; + } + } else { + // Calculate the transition year + int32_t year, month, dom, dow, doy, mid; + Grego::timeToFields(tzt.getTime(), year, month, dom, dow, doy, mid); + // Re-create the rule + ar->getName(name); + AnnualTimeZoneRule *newAr = new AnnualTimeZoneRule(name, ar->getRawOffset(), ar->getDSTSavings(), + *(ar->getRule()), year, ar->getEndYear()); + filteredRules->addElement(newAr, status); + if (U_FAILURE(status)) { + goto error; + } + } + // check if this is a final rule + if (ar->getEndYear() == AnnualTimeZoneRule::MAX_YEAR) { + // After bot final standard and dst rules are processed, + // exit this while loop. + if (ar->getDSTSavings() == 0) { + bFinalStd = TRUE; + } else { + bFinalDst = TRUE; + } + } + } + done[i] = TRUE; + } + + // Set the results + if (orgRules != NULL) { + while (!orgRules->isEmpty()) { + r = (TimeZoneRule*)orgRules->orphanElementAt(0); + delete r; + } + delete orgRules; + } + if (done != NULL) { + uprv_free(done); + } + + initial = res_initial; + transitionRules = filteredRules; + return; + +error: + if (orgtrs != NULL) { + uprv_free(orgtrs); + } + if (orgRules != NULL) { + while (!orgRules->isEmpty()) { + r = (TimeZoneRule*)orgRules->orphanElementAt(0); + delete r; + } + delete orgRules; + } + if (done != NULL) { + if (filteredRules != NULL) { + while (!filteredRules->isEmpty()) { + r = (TimeZoneRule*)filteredRules->orphanElementAt(0); + delete r; + } + delete filteredRules; + } + delete res_initial; + uprv_free(done); + } + + initial = NULL; + transitionRules = NULL; +} + +void +BasicTimeZone::getOffsetFromLocal(UDate /*date*/, int32_t /*nonExistingTimeOpt*/, int32_t /*duplicatedTimeOpt*/, + int32_t& /*rawOffset*/, int32_t& /*dstOffset*/, UErrorCode& status) const { + if (U_FAILURE(status)) { + return; + } + status = U_UNSUPPORTED_ERROR; +} + +U_NAMESPACE_END + +#endif /* #if !UCONFIG_NO_FORMATTING */ + +//eof diff --git a/libicui18n/libicui18n/basictz.cpp.patch b/libicui18n/libicui18n/basictz.cpp.patch new file mode 100644 index 0000000..78a3534 --- /dev/null +++ b/libicui18n/libicui18n/basictz.cpp.patch @@ -0,0 +1,11 @@ +--- libicui18n/i18n/basictz.cpp 2019-12-23 14:38:40.201889324 +0300 ++++ libicui18n/basictz.cpp 2020-07-21 15:00:43.322806723 +0300 +@@ -408,7 +408,7 @@ BasicTimeZone::getTimeZoneRulesAfter(UDa + if (!avail) { + break; + } +- if (*(tzt0.getTo()) == *tar) { ++ if (tzt0.getTo()->operator==(*tar)) { + break; + } + t = tzt0.getTime(); diff --git a/libicui18n/libicui18n/buildfile b/libicui18n/libicui18n/buildfile index a2391b1..6ec314f 100644 --- a/libicui18n/libicui18n/buildfile +++ b/libicui18n/libicui18n/buildfile @@ -1,9 +1,11 @@ # file : libicui18n/buildfile # license : Unicode License; see accompanying LICENSE file -import int_libs = libicuuc%lib{icuuc} +import intf_libs = libicuuc%lib{icuuc} -lib{icui18n}: {hxx cxx}{**} $int_libs +patched = stsearch olsontz vtzone rbtz dtitvfmt basictz + +lib{icui18n}: {hxx}{**} i18n/cxx{* -{$patched}} cxx{$patched} $intf_libs tclass = $cxx.target.class tsys = $cxx.target.system @@ -52,9 +54,11 @@ switch $cxx.class, $tsys # Disable warnings that pop up with -Wall -Wextra. Upstream doesn't seem # to care about these and it is not easy to disable specific warnings in a # way that works across compilers/version (some -Wno-* options are only - # recognized in newer versions). + # recognized in newer versions). There are still some warnings left that + # appear for certain platforms/compilers. We pass them through but disable + # treating them as errors. # - cxx.coptions += -Wno-all -Wno-extra + cxx.coptions += -Wno-all -Wno-extra -Wno-error # Disable the Clang targeting MSVC warnings. # @@ -108,7 +112,7 @@ switch $tclass, $tsys lib{icui18n}: { cc.export.poptions = "-I$src_base/i18n" - cc.export.libs = $int_libs + cc.export.libs = $intf_libs } liba{icui18n}: cc.export.poptions += -DU_STATIC_IMPLEMENTATION diff --git a/libicui18n/libicui18n/dtitvfmt.cpp b/libicui18n/libicui18n/dtitvfmt.cpp new file mode 100644 index 0000000..db6d1c3 --- /dev/null +++ b/libicui18n/libicui18n/dtitvfmt.cpp @@ -0,0 +1,1661 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/******************************************************************************* +* Copyright (C) 2008-2016, International Business Machines Corporation and +* others. All Rights Reserved. +******************************************************************************* +* +* File DTITVFMT.CPP +* +******************************************************************************* +*/ + +#include "utypeinfo.h" // for 'typeid' to work + +#include "unicode/dtitvfmt.h" + +#if !UCONFIG_NO_FORMATTING + +//TODO: put in compilation +//#define DTITVFMT_DEBUG 1 + +#include "unicode/calendar.h" +#include "unicode/dtptngen.h" +#include "unicode/dtitvinf.h" +#include "unicode/simpleformatter.h" +#include "cmemory.h" +#include "cstring.h" +#include "dtitv_impl.h" +#include "mutex.h" +#include "uresimp.h" +#include "formattedval_impl.h" + +#ifdef DTITVFMT_DEBUG +#include +#endif + +U_NAMESPACE_BEGIN + + + +#ifdef DTITVFMT_DEBUG +#define PRINTMESG(msg) { std::cout << "(" << __FILE__ << ":" << __LINE__ << ") " << msg << "\n"; } +#endif + + +static const UChar gDateFormatSkeleton[][11] = { +//yMMMMEEEEd +{LOW_Y, CAP_M, CAP_M, CAP_M, CAP_M, CAP_E, CAP_E, CAP_E, CAP_E, LOW_D, 0}, +//yMMMMd +{LOW_Y, CAP_M, CAP_M, CAP_M, CAP_M, LOW_D, 0}, +//yMMMd +{LOW_Y, CAP_M, CAP_M, CAP_M, LOW_D, 0}, +//yMd +{LOW_Y, CAP_M, LOW_D, 0} }; + + +static const char gCalendarTag[] = "calendar"; +static const char gGregorianTag[] = "gregorian"; +static const char gDateTimePatternsTag[] = "DateTimePatterns"; + + +// latestFirst: +static const UChar gLaterFirstPrefix[] = {LOW_L, LOW_A, LOW_T, LOW_E, LOW_S,LOW_T, CAP_F, LOW_I, LOW_R, LOW_S, LOW_T, COLON}; + +// earliestFirst: +static const UChar gEarlierFirstPrefix[] = {LOW_E, LOW_A, LOW_R, LOW_L, LOW_I, LOW_E, LOW_S, LOW_T, CAP_F, LOW_I, LOW_R, LOW_S, LOW_T, COLON}; + + +class FormattedDateIntervalData : public FormattedValueFieldPositionIteratorImpl { +public: + FormattedDateIntervalData(UErrorCode& status) : FormattedValueFieldPositionIteratorImpl(5, status) {} + virtual ~FormattedDateIntervalData(); +}; + +FormattedDateIntervalData::~FormattedDateIntervalData() = default; + +UPRV_FORMATTED_VALUE_SUBCLASS_AUTO_IMPL(FormattedDateInterval) + + +UOBJECT_DEFINE_RTTI_IMPLEMENTATION(DateIntervalFormat) + +// Mutex, protects access to fDateFormat, fFromCalendar and fToCalendar. +// Needed because these data members are modified by const methods of DateIntervalFormat. + +static UMutex gFormatterMutex; + +DateIntervalFormat* U_EXPORT2 +DateIntervalFormat::createInstance(const UnicodeString& skeleton, + UErrorCode& status) { + return createInstance(skeleton, Locale::getDefault(), status); +} + + +DateIntervalFormat* U_EXPORT2 +DateIntervalFormat::createInstance(const UnicodeString& skeleton, + const Locale& locale, + UErrorCode& status) { +#ifdef DTITVFMT_DEBUG + char result[1000]; + char result_1[1000]; + char mesg[2000]; + skeleton.extract(0, skeleton.length(), result, "UTF-8"); + UnicodeString pat; + ((SimpleDateFormat*)dtfmt)->toPattern(pat); + pat.extract(0, pat.length(), result_1, "UTF-8"); + sprintf(mesg, "skeleton: %s; pattern: %s\n", result, result_1); + PRINTMESG(mesg) +#endif + + DateIntervalInfo* dtitvinf = new DateIntervalInfo(locale, status); + return create(locale, dtitvinf, &skeleton, status); +} + + + +DateIntervalFormat* U_EXPORT2 +DateIntervalFormat::createInstance(const UnicodeString& skeleton, + const DateIntervalInfo& dtitvinf, + UErrorCode& status) { + return createInstance(skeleton, Locale::getDefault(), dtitvinf, status); +} + + +DateIntervalFormat* U_EXPORT2 +DateIntervalFormat::createInstance(const UnicodeString& skeleton, + const Locale& locale, + const DateIntervalInfo& dtitvinf, + UErrorCode& status) { + DateIntervalInfo* ptn = dtitvinf.clone(); + return create(locale, ptn, &skeleton, status); +} + + +DateIntervalFormat::DateIntervalFormat() +: fInfo(NULL), + fDateFormat(NULL), + fFromCalendar(NULL), + fToCalendar(NULL), + fLocale(Locale::getRoot()), + fDatePattern(NULL), + fTimePattern(NULL), + fDateTimeFormat(NULL) +{} + + +DateIntervalFormat::DateIntervalFormat(const DateIntervalFormat& itvfmt) +: Format(itvfmt), + fInfo(NULL), + fDateFormat(NULL), + fFromCalendar(NULL), + fToCalendar(NULL), + fLocale(itvfmt.fLocale), + fDatePattern(NULL), + fTimePattern(NULL), + fDateTimeFormat(NULL) { + *this = itvfmt; +} + + +DateIntervalFormat& +DateIntervalFormat::operator=(const DateIntervalFormat& itvfmt) { + if ( this != &itvfmt ) { + delete fDateFormat; + delete fInfo; + delete fFromCalendar; + delete fToCalendar; + delete fDatePattern; + delete fTimePattern; + delete fDateTimeFormat; + { + Mutex lock(&gFormatterMutex); + if ( itvfmt.fDateFormat ) { + fDateFormat = itvfmt.fDateFormat->clone(); + } else { + fDateFormat = NULL; + } + if ( itvfmt.fFromCalendar ) { + fFromCalendar = itvfmt.fFromCalendar->clone(); + } else { + fFromCalendar = NULL; + } + if ( itvfmt.fToCalendar ) { + fToCalendar = itvfmt.fToCalendar->clone(); + } else { + fToCalendar = NULL; + } + } + if ( itvfmt.fInfo ) { + fInfo = itvfmt.fInfo->clone(); + } else { + fInfo = NULL; + } + fSkeleton = itvfmt.fSkeleton; + int8_t i; + for ( i = 0; i< DateIntervalInfo::kIPI_MAX_INDEX; ++i ) { + fIntervalPatterns[i] = itvfmt.fIntervalPatterns[i]; + } + fLocale = itvfmt.fLocale; + fDatePattern = (itvfmt.fDatePattern)? itvfmt.fDatePattern->clone(): NULL; + fTimePattern = (itvfmt.fTimePattern)? itvfmt.fTimePattern->clone(): NULL; + fDateTimeFormat = (itvfmt.fDateTimeFormat)? itvfmt.fDateTimeFormat->clone(): NULL; + } + return *this; +} + + +DateIntervalFormat::~DateIntervalFormat() { + delete fInfo; + delete fDateFormat; + delete fFromCalendar; + delete fToCalendar; + delete fDatePattern; + delete fTimePattern; + delete fDateTimeFormat; +} + + +DateIntervalFormat* +DateIntervalFormat::clone() const { + return new DateIntervalFormat(*this); +} + + +UBool +DateIntervalFormat::operator==(const Format& other) const { + if (typeid(*this) != typeid(other)) {return FALSE;} + const DateIntervalFormat* fmt = (DateIntervalFormat*)&other; + if (this == fmt) {return TRUE;} + if (!Format::operator==(other)) {return FALSE;} + if ((fInfo != fmt->fInfo) && (fInfo == NULL || fmt->fInfo == NULL)) {return FALSE;} + if (fInfo && fmt->fInfo && (*fInfo != *fmt->fInfo )) {return FALSE;} + { + Mutex lock(&gFormatterMutex); + if (fDateFormat != fmt->fDateFormat && (fDateFormat == NULL || fmt->fDateFormat == NULL)) {return FALSE;} + if (fDateFormat && fmt->fDateFormat && !(*fDateFormat == *fmt->fDateFormat)) {return FALSE;} + } + // note: fFromCalendar and fToCalendar hold no persistent state, and therefore do not participate in operator ==. + // fDateFormat has the master calendar for the DateIntervalFormat. + if (fSkeleton != fmt->fSkeleton) {return FALSE;} + if (fDatePattern != fmt->fDatePattern && (fDatePattern == NULL || fmt->fDatePattern == NULL)) {return FALSE;} + if (fDatePattern && fmt->fDatePattern && (*fDatePattern != *fmt->fDatePattern)) {return FALSE;} + if (fTimePattern != fmt->fTimePattern && (fTimePattern == NULL || fmt->fTimePattern == NULL)) {return FALSE;} + if (fTimePattern && fmt->fTimePattern && (*fTimePattern != *fmt->fTimePattern)) {return FALSE;} + if (fDateTimeFormat != fmt->fDateTimeFormat && (fDateTimeFormat == NULL || fmt->fDateTimeFormat == NULL)) {return FALSE;} + if (fDateTimeFormat && fmt->fDateTimeFormat && (*fDateTimeFormat != *fmt->fDateTimeFormat)) {return FALSE;} + if (fLocale != fmt->fLocale) {return FALSE;} + + for (int32_t i = 0; i< DateIntervalInfo::kIPI_MAX_INDEX; ++i ) { + if (fIntervalPatterns[i].firstPart != fmt->fIntervalPatterns[i].firstPart) {return FALSE;} + if (fIntervalPatterns[i].secondPart != fmt->fIntervalPatterns[i].secondPart ) {return FALSE;} + if (fIntervalPatterns[i].laterDateFirst != fmt->fIntervalPatterns[i].laterDateFirst) {return FALSE;} + } + return TRUE; +} + + +UnicodeString& +DateIntervalFormat::format(const Formattable& obj, + UnicodeString& appendTo, + FieldPosition& fieldPosition, + UErrorCode& status) const { + if ( U_FAILURE(status) ) { + return appendTo; + } + + if ( obj.getType() == Formattable::kObject ) { + const UObject* formatObj = obj.getObject(); + const DateInterval* interval = dynamic_cast(formatObj); + if (interval != NULL) { + return format(interval, appendTo, fieldPosition, status); + } + } + status = U_ILLEGAL_ARGUMENT_ERROR; + return appendTo; +} + + +UnicodeString& +DateIntervalFormat::format(const DateInterval* dtInterval, + UnicodeString& appendTo, + FieldPosition& fieldPosition, + UErrorCode& status) const { + if ( U_FAILURE(status) ) { + return appendTo; + } + if (fDateFormat == NULL || fInfo == NULL) { + status = U_INVALID_STATE_ERROR; + return appendTo; + } + + FieldPositionOnlyHandler handler(fieldPosition); + handler.setAcceptFirstOnly(TRUE); + int8_t ignore; + + Mutex lock(&gFormatterMutex); + return formatIntervalImpl(*dtInterval, appendTo, ignore, handler, status); +} + + +FormattedDateInterval DateIntervalFormat::formatToValue( + const DateInterval& dtInterval, + UErrorCode& status) const { + LocalPointer result(new FormattedDateIntervalData(status), status); + if (U_FAILURE(status)) { + return FormattedDateInterval(status); + } + UnicodeString string; + int8_t firstIndex; + auto handler = result->getHandler(status); + handler.setCategory(UFIELD_CATEGORY_DATE); + { + Mutex lock(&gFormatterMutex); + formatIntervalImpl(dtInterval, string, firstIndex, handler, status); + } + handler.getError(status); + result->appendString(string, status); + if (U_FAILURE(status)) { + return FormattedDateInterval(status); + } + + // Compute the span fields and sort them into place: + if (firstIndex != -1) { + result->addOverlapSpans(UFIELD_CATEGORY_DATE_INTERVAL_SPAN, firstIndex, status); + if (U_FAILURE(status)) { + return FormattedDateInterval(status); + } + result->sort(); + } + + return FormattedDateInterval(result.orphan()); +} + + +UnicodeString& +DateIntervalFormat::format(Calendar& fromCalendar, + Calendar& toCalendar, + UnicodeString& appendTo, + FieldPosition& pos, + UErrorCode& status) const { + FieldPositionOnlyHandler handler(pos); + handler.setAcceptFirstOnly(TRUE); + int8_t ignore; + + Mutex lock(&gFormatterMutex); + return formatImpl(fromCalendar, toCalendar, appendTo, ignore, handler, status); +} + + +FormattedDateInterval DateIntervalFormat::formatToValue( + Calendar& fromCalendar, + Calendar& toCalendar, + UErrorCode& status) const { + LocalPointer result(new FormattedDateIntervalData(status), status); + if (U_FAILURE(status)) { + return FormattedDateInterval(status); + } + UnicodeString string; + int8_t firstIndex; + auto handler = result->getHandler(status); + handler.setCategory(UFIELD_CATEGORY_DATE); + { + Mutex lock(&gFormatterMutex); + formatImpl(fromCalendar, toCalendar, string, firstIndex, handler, status); + } + handler.getError(status); + result->appendString(string, status); + if (U_FAILURE(status)) { + return FormattedDateInterval(status); + } + + // Compute the span fields and sort them into place: + if (firstIndex != -1) { + result->addOverlapSpans(UFIELD_CATEGORY_DATE_INTERVAL_SPAN, firstIndex, status); + result->sort(); + } + + return FormattedDateInterval(result.orphan()); +} + + +UnicodeString& DateIntervalFormat::formatIntervalImpl( + const DateInterval& dtInterval, + UnicodeString& appendTo, + int8_t& firstIndex, + FieldPositionHandler& fphandler, + UErrorCode& status) const { + if (U_FAILURE(status)) { + return appendTo; + } + if (fFromCalendar == nullptr || fToCalendar == nullptr) { + status = U_INVALID_STATE_ERROR; + return appendTo; + } + fFromCalendar->setTime(dtInterval.getFromDate(), status); + fToCalendar->setTime(dtInterval.getToDate(), status); + return formatImpl(*fFromCalendar, *fToCalendar, appendTo, firstIndex, fphandler, status); +} + + +UnicodeString& +DateIntervalFormat::formatImpl(Calendar& fromCalendar, + Calendar& toCalendar, + UnicodeString& appendTo, + int8_t& firstIndex, + FieldPositionHandler& fphandler, + UErrorCode& status) const { + if ( U_FAILURE(status) ) { + return appendTo; + } + + // Initialize firstIndex to -1 (single date, no range) + firstIndex = -1; + + // not support different calendar types and time zones + //if ( fromCalendar.getType() != toCalendar.getType() ) { + if ( !fromCalendar.isEquivalentTo(toCalendar) ) { + status = U_ILLEGAL_ARGUMENT_ERROR; + return appendTo; + } + + // First, find the largest different calendar field. + UCalendarDateFields field = UCAL_FIELD_COUNT; + + if ( fromCalendar.get(UCAL_ERA,status) != toCalendar.get(UCAL_ERA,status)) { + field = UCAL_ERA; + } else if ( fromCalendar.get(UCAL_YEAR, status) != + toCalendar.get(UCAL_YEAR, status) ) { + field = UCAL_YEAR; + } else if ( fromCalendar.get(UCAL_MONTH, status) != + toCalendar.get(UCAL_MONTH, status) ) { + field = UCAL_MONTH; + } else if ( fromCalendar.get(UCAL_DATE, status) != + toCalendar.get(UCAL_DATE, status) ) { + field = UCAL_DATE; + } else if ( fromCalendar.get(UCAL_AM_PM, status) != + toCalendar.get(UCAL_AM_PM, status) ) { + field = UCAL_AM_PM; + } else if ( fromCalendar.get(UCAL_HOUR, status) != + toCalendar.get(UCAL_HOUR, status) ) { + field = UCAL_HOUR; + } else if ( fromCalendar.get(UCAL_MINUTE, status) != + toCalendar.get(UCAL_MINUTE, status) ) { + field = UCAL_MINUTE; + } else if ( fromCalendar.get(UCAL_SECOND, status) != + toCalendar.get(UCAL_SECOND, status) ) { + field = UCAL_SECOND; + } + + if ( U_FAILURE(status) ) { + return appendTo; + } + if ( field == UCAL_FIELD_COUNT ) { + /* ignore the millisecond etc. small fields' difference. + * use single date when all the above are the same. + */ + return fDateFormat->_format(fromCalendar, appendTo, fphandler, status); + } + UBool fromToOnSameDay = (field==UCAL_AM_PM || field==UCAL_HOUR || field==UCAL_MINUTE || field==UCAL_SECOND); + + // following call should not set wrong status, + // all the pass-in fields are valid till here + int32_t itvPtnIndex = DateIntervalInfo::calendarFieldToIntervalIndex(field, + status); + const PatternInfo& intervalPattern = fIntervalPatterns[itvPtnIndex]; + + if ( intervalPattern.firstPart.isEmpty() && + intervalPattern.secondPart.isEmpty() ) { + if ( fDateFormat->isFieldUnitIgnored(field) ) { + /* the largest different calendar field is small than + * the smallest calendar field in pattern, + * return single date format. + */ + return fDateFormat->_format(fromCalendar, appendTo, fphandler, status); + } + return fallbackFormat(fromCalendar, toCalendar, fromToOnSameDay, appendTo, firstIndex, fphandler, status); + } + // If the first part in interval pattern is empty, + // the 2nd part of it saves the full-pattern used in fall-back. + // For a 'real' interval pattern, the first part will never be empty. + if ( intervalPattern.firstPart.isEmpty() ) { + // fall back + UnicodeString originalPattern; + fDateFormat->toPattern(originalPattern); + fDateFormat->applyPattern(intervalPattern.secondPart); + appendTo = fallbackFormat(fromCalendar, toCalendar, fromToOnSameDay, appendTo, firstIndex, fphandler, status); + fDateFormat->applyPattern(originalPattern); + return appendTo; + } + Calendar* firstCal; + Calendar* secondCal; + if ( intervalPattern.laterDateFirst ) { + firstCal = &toCalendar; + secondCal = &fromCalendar; + firstIndex = 1; + } else { + firstCal = &fromCalendar; + secondCal = &toCalendar; + firstIndex = 0; + } + // break the interval pattern into 2 parts, + // first part should not be empty, + UnicodeString originalPattern; + fDateFormat->toPattern(originalPattern); + fDateFormat->applyPattern(intervalPattern.firstPart); + fDateFormat->_format(*firstCal, appendTo, fphandler, status); + + if ( !intervalPattern.secondPart.isEmpty() ) { + fDateFormat->applyPattern(intervalPattern.secondPart); + fDateFormat->_format(*secondCal, appendTo, fphandler, status); + } + fDateFormat->applyPattern(originalPattern); + return appendTo; +} + + + +void +DateIntervalFormat::parseObject(const UnicodeString& /* source */, + Formattable& /* result */, + ParsePosition& /* parse_pos */) const { + // parseObject(const UnicodeString&, Formattable&, UErrorCode&) const + // will set status as U_INVALID_FORMAT_ERROR if + // parse_pos is still 0 +} + + + + +const DateIntervalInfo* +DateIntervalFormat::getDateIntervalInfo() const { + return fInfo; +} + + +void +DateIntervalFormat::setDateIntervalInfo(const DateIntervalInfo& newItvPattern, + UErrorCode& status) { + delete fInfo; + fInfo = new DateIntervalInfo(newItvPattern); + + // Delete patterns that get reset by initializePattern + delete fDatePattern; + fDatePattern = NULL; + delete fTimePattern; + fTimePattern = NULL; + delete fDateTimeFormat; + fDateTimeFormat = NULL; + + if (fDateFormat) { + initializePattern(status); + } +} + + + +const DateFormat* +DateIntervalFormat::getDateFormat() const { + return fDateFormat; +} + + +void +DateIntervalFormat::adoptTimeZone(TimeZone* zone) +{ + if (fDateFormat != NULL) { + fDateFormat->adoptTimeZone(zone); + } + // The fDateFormat has the master calendar for the DateIntervalFormat and has + // ownership of any adopted TimeZone; fFromCalendar and fToCalendar are internal + // work clones of that calendar (and should not also be given ownership of the + // adopted TimeZone). + if (fFromCalendar) { + fFromCalendar->setTimeZone(*zone); + } + if (fToCalendar) { + fToCalendar->setTimeZone(*zone); + } +} + +void +DateIntervalFormat::setTimeZone(const TimeZone& zone) +{ + if (fDateFormat != NULL) { + fDateFormat->setTimeZone(zone); + } + // The fDateFormat has the master calendar for the DateIntervalFormat; + // fFromCalendar and fToCalendar are internal work clones of that calendar. + if (fFromCalendar) { + fFromCalendar->setTimeZone(zone); + } + if (fToCalendar) { + fToCalendar->setTimeZone(zone); + } +} + +const TimeZone& +DateIntervalFormat::getTimeZone() const +{ + if (fDateFormat != NULL) { + Mutex lock(&gFormatterMutex); + return fDateFormat->getTimeZone(); + } + // If fDateFormat is NULL (unexpected), create default timezone. + return *(TimeZone::createDefault()); +} + +DateIntervalFormat::DateIntervalFormat(const Locale& locale, + DateIntervalInfo* dtItvInfo, + const UnicodeString* skeleton, + UErrorCode& status) +: fInfo(NULL), + fDateFormat(NULL), + fFromCalendar(NULL), + fToCalendar(NULL), + fLocale(locale), + fDatePattern(NULL), + fTimePattern(NULL), + fDateTimeFormat(NULL) +{ + LocalPointer info(dtItvInfo, status); + LocalPointer dtfmt(static_cast( + DateFormat::createInstanceForSkeleton(*skeleton, locale, status)), status); + if (U_FAILURE(status)) { + return; + } + + if ( skeleton ) { + fSkeleton = *skeleton; + } + fInfo = info.orphan(); + fDateFormat = dtfmt.orphan(); + if ( fDateFormat->getCalendar() ) { + fFromCalendar = fDateFormat->getCalendar()->clone(); + fToCalendar = fDateFormat->getCalendar()->clone(); + } + initializePattern(status); +} + +DateIntervalFormat* U_EXPORT2 +DateIntervalFormat::create(const Locale& locale, + DateIntervalInfo* dtitvinf, + const UnicodeString* skeleton, + UErrorCode& status) { + DateIntervalFormat* f = new DateIntervalFormat(locale, dtitvinf, + skeleton, status); + if ( f == NULL ) { + status = U_MEMORY_ALLOCATION_ERROR; + delete dtitvinf; + } else if ( U_FAILURE(status) ) { + // safe to delete f, although nothing acutally is saved + delete f; + f = 0; + } + return f; +} + + + +/** + * Initialize interval patterns locale to this formatter + * + * This code is a bit complicated since + * 1. the interval patterns saved in resource bundle files are interval + * patterns based on date or time only. + * It does not have interval patterns based on both date and time. + * Interval patterns on both date and time are algorithm generated. + * + * For example, it has interval patterns on skeleton "dMy" and "hm", + * but it does not have interval patterns on skeleton "dMyhm". + * + * The rule to genearte interval patterns for both date and time skeleton are + * 1) when the year, month, or day differs, concatenate the two original + * expressions with a separator between, + * For example, interval pattern from "Jan 10, 2007 10:10 am" + * to "Jan 11, 2007 10:10am" is + * "Jan 10, 2007 10:10 am - Jan 11, 2007 10:10am" + * + * 2) otherwise, present the date followed by the range expression + * for the time. + * For example, interval pattern from "Jan 10, 2007 10:10 am" + * to "Jan 10, 2007 11:10am" is + * "Jan 10, 2007 10:10 am - 11:10am" + * + * 2. even a pattern does not request a certion calendar field, + * the interval pattern needs to include such field if such fields are + * different between 2 dates. + * For example, a pattern/skeleton is "hm", but the interval pattern + * includes year, month, and date when year, month, and date differs. + * + * @param status output param set to success/failure code on exit + * @stable ICU 4.0 + */ +void +DateIntervalFormat::initializePattern(UErrorCode& status) { + if ( U_FAILURE(status) ) { + return; + } + const Locale& locale = fDateFormat->getSmpFmtLocale(); + if ( fSkeleton.isEmpty() ) { + UnicodeString fullPattern; + fDateFormat->toPattern(fullPattern); +#ifdef DTITVFMT_DEBUG + char result[1000]; + char result_1[1000]; + char mesg[2000]; + fSkeleton.extract(0, fSkeleton.length(), result, "UTF-8"); + sprintf(mesg, "in getBestSkeleton: fSkeleton: %s; \n", result); + PRINTMESG(mesg) +#endif + // fSkeleton is already set by createDateIntervalInstance() + // or by createInstance(UnicodeString skeleton, .... ) + fSkeleton = DateTimePatternGenerator::staticGetSkeleton( + fullPattern, status); + if ( U_FAILURE(status) ) { + return; + } + } + + // initialize the fIntervalPattern ordering + int8_t i; + for ( i = 0; i < DateIntervalInfo::kIPI_MAX_INDEX; ++i ) { + fIntervalPatterns[i].laterDateFirst = fInfo->getDefaultOrder(); + } + + /* Check whether the skeleton is a combination of date and time. + * For the complication reason 1 explained above. + */ + UnicodeString dateSkeleton; + UnicodeString timeSkeleton; + UnicodeString normalizedTimeSkeleton; + UnicodeString normalizedDateSkeleton; + + + /* the difference between time skeleton and normalizedTimeSkeleton are: + * 1. (Formerly, normalized time skeleton folded 'H' to 'h'; no longer true) + * 2. 'a' is omitted in normalized time skeleton. + * 3. there is only one appearance for 'h' or 'H', 'm','v', 'z' in normalized + * time skeleton + * + * The difference between date skeleton and normalizedDateSkeleton are: + * 1. both 'y' and 'd' appear only once in normalizeDateSkeleton + * 2. 'E' and 'EE' are normalized into 'EEE' + * 3. 'MM' is normalized into 'M' + */ + getDateTimeSkeleton(fSkeleton, dateSkeleton, normalizedDateSkeleton, + timeSkeleton, normalizedTimeSkeleton); + +#ifdef DTITVFMT_DEBUG + char result[1000]; + char result_1[1000]; + char mesg[2000]; + fSkeleton.extract(0, fSkeleton.length(), result, "UTF-8"); + sprintf(mesg, "in getBestSkeleton: fSkeleton: %s; \n", result); + PRINTMESG(mesg) +#endif + + // move this up here since we need it for fallbacks + if ( timeSkeleton.length() > 0 && dateSkeleton.length() > 0 ) { + // Need the Date/Time pattern for concatenation of the date + // with the time interval. + // The date/time pattern ( such as {0} {1} ) is saved in + // calendar, that is why need to get the CalendarData here. + LocalUResourceBundlePointer dateTimePatternsRes(ures_open(NULL, locale.getBaseName(), &status)); + ures_getByKey(dateTimePatternsRes.getAlias(), gCalendarTag, + dateTimePatternsRes.getAlias(), &status); + ures_getByKeyWithFallback(dateTimePatternsRes.getAlias(), gGregorianTag, + dateTimePatternsRes.getAlias(), &status); + ures_getByKeyWithFallback(dateTimePatternsRes.getAlias(), gDateTimePatternsTag, + dateTimePatternsRes.getAlias(), &status); + + int32_t dateTimeFormatLength; + const UChar* dateTimeFormat = ures_getStringByIndex( + dateTimePatternsRes.getAlias(), + (int32_t)DateFormat::kDateTime, + &dateTimeFormatLength, &status); + if ( U_SUCCESS(status) && dateTimeFormatLength >= 3 ) { + fDateTimeFormat = new UnicodeString(dateTimeFormat, dateTimeFormatLength); + } + } + + UBool found = setSeparateDateTimePtn(normalizedDateSkeleton, + normalizedTimeSkeleton); + + // for skeletons with seconds, found is false and we enter this block + if ( found == false ) { + // use fallback + // TODO: if user asks "m"(minute), but "d"(day) differ + if ( timeSkeleton.length() != 0 ) { + if ( dateSkeleton.length() == 0 ) { + // prefix with yMd + timeSkeleton.insert(0, gDateFormatSkeleton[DateFormat::kShort], -1); + UnicodeString pattern = DateFormat::getBestPattern( + locale, timeSkeleton, status); + if ( U_FAILURE(status) ) { + return; + } + // for fall back interval patterns, + // the first part of the pattern is empty, + // the second part of the pattern is the full-pattern + // should be used in fall-back. + setPatternInfo(UCAL_DATE, NULL, &pattern, fInfo->getDefaultOrder()); + setPatternInfo(UCAL_MONTH, NULL, &pattern, fInfo->getDefaultOrder()); + setPatternInfo(UCAL_YEAR, NULL, &pattern, fInfo->getDefaultOrder()); + } else { + // TODO: fall back + } + } else { + // TODO: fall back + } + return; + } // end of skeleton not found + // interval patterns for skeleton are found in resource + if ( timeSkeleton.length() == 0 ) { + // done + } else if ( dateSkeleton.length() == 0 ) { + // prefix with yMd + timeSkeleton.insert(0, gDateFormatSkeleton[DateFormat::kShort], -1); + UnicodeString pattern = DateFormat::getBestPattern( + locale, timeSkeleton, status); + if ( U_FAILURE(status) ) { + return; + } + // for fall back interval patterns, + // the first part of the pattern is empty, + // the second part of the pattern is the full-pattern + // should be used in fall-back. + setPatternInfo(UCAL_DATE, NULL, &pattern, fInfo->getDefaultOrder()); + setPatternInfo(UCAL_MONTH, NULL, &pattern, fInfo->getDefaultOrder()); + setPatternInfo(UCAL_YEAR, NULL, &pattern, fInfo->getDefaultOrder()); + } else { + /* if both present, + * 1) when the year, month, or day differs, + * concatenate the two original expressions with a separator between, + * 2) otherwise, present the date followed by the + * range expression for the time. + */ + /* + * 1) when the year, month, or day differs, + * concatenate the two original expressions with a separator between, + */ + // if field exists, use fall back + UnicodeString skeleton = fSkeleton; + if ( !fieldExistsInSkeleton(UCAL_DATE, dateSkeleton) ) { + // prefix skeleton with 'd' + skeleton.insert(0, LOW_D); + setFallbackPattern(UCAL_DATE, skeleton, status); + } + if ( !fieldExistsInSkeleton(UCAL_MONTH, dateSkeleton) ) { + // then prefix skeleton with 'M' + skeleton.insert(0, CAP_M); + setFallbackPattern(UCAL_MONTH, skeleton, status); + } + if ( !fieldExistsInSkeleton(UCAL_YEAR, dateSkeleton) ) { + // then prefix skeleton with 'y' + skeleton.insert(0, LOW_Y); + setFallbackPattern(UCAL_YEAR, skeleton, status); + } + + /* + * 2) otherwise, present the date followed by the + * range expression for the time. + */ + + if ( fDateTimeFormat == NULL ) { + // earlier failure getting dateTimeFormat + return; + } + + UnicodeString datePattern = DateFormat::getBestPattern( + locale, dateSkeleton, status); + + concatSingleDate2TimeInterval(*fDateTimeFormat, datePattern, UCAL_AM_PM, status); + concatSingleDate2TimeInterval(*fDateTimeFormat, datePattern, UCAL_HOUR, status); + concatSingleDate2TimeInterval(*fDateTimeFormat, datePattern, UCAL_MINUTE, status); + } +} + + + +void U_EXPORT2 +DateIntervalFormat::getDateTimeSkeleton(const UnicodeString& skeleton, + UnicodeString& dateSkeleton, + UnicodeString& normalizedDateSkeleton, + UnicodeString& timeSkeleton, + UnicodeString& normalizedTimeSkeleton) { + // dateSkeleton follows the sequence of y*M*E*d* + // timeSkeleton follows the sequence of hm*[v|z]? + int32_t ECount = 0; + int32_t dCount = 0; + int32_t MCount = 0; + int32_t yCount = 0; + int32_t hCount = 0; + int32_t HCount = 0; + int32_t mCount = 0; + int32_t vCount = 0; + int32_t zCount = 0; + int32_t i; + + for (i = 0; i < skeleton.length(); ++i) { + UChar ch = skeleton[i]; + switch ( ch ) { + case CAP_E: + dateSkeleton.append(ch); + ++ECount; + break; + case LOW_D: + dateSkeleton.append(ch); + ++dCount; + break; + case CAP_M: + dateSkeleton.append(ch); + ++MCount; + break; + case LOW_Y: + dateSkeleton.append(ch); + ++yCount; + break; + case CAP_G: + case CAP_Y: + case LOW_U: + case CAP_Q: + case LOW_Q: + case CAP_L: + case LOW_L: + case CAP_W: + case LOW_W: + case CAP_D: + case CAP_F: + case LOW_G: + case LOW_E: + case LOW_C: + case CAP_U: + case LOW_R: + normalizedDateSkeleton.append(ch); + dateSkeleton.append(ch); + break; + case LOW_A: + // 'a' is implicitly handled + timeSkeleton.append(ch); + break; + case LOW_H: + timeSkeleton.append(ch); + ++hCount; + break; + case CAP_H: + timeSkeleton.append(ch); + ++HCount; + break; + case LOW_M: + timeSkeleton.append(ch); + ++mCount; + break; + case LOW_Z: + ++zCount; + timeSkeleton.append(ch); + break; + case LOW_V: + ++vCount; + timeSkeleton.append(ch); + break; + case CAP_V: + case CAP_Z: + case LOW_K: + case CAP_K: + case LOW_J: + case LOW_S: + case CAP_S: + case CAP_A: + timeSkeleton.append(ch); + normalizedTimeSkeleton.append(ch); + break; + } + } + + /* generate normalized form for date*/ + if ( yCount != 0 ) { + for (i = 0; i < yCount; ++i) { + normalizedDateSkeleton.append(LOW_Y); + } + } + if ( MCount != 0 ) { + if ( MCount < 3 ) { + normalizedDateSkeleton.append(CAP_M); + } else { + for ( int32_t j = 0; j < MCount && j < MAX_M_COUNT; ++j) { + normalizedDateSkeleton.append(CAP_M); + } + } + } + if ( ECount != 0 ) { + if ( ECount <= 3 ) { + normalizedDateSkeleton.append(CAP_E); + } else { + for ( int32_t j = 0; j < ECount && j < MAX_E_COUNT; ++j ) { + normalizedDateSkeleton.append(CAP_E); + } + } + } + if ( dCount != 0 ) { + normalizedDateSkeleton.append(LOW_D); + } + + /* generate normalized form for time */ + if ( HCount != 0 ) { + normalizedTimeSkeleton.append(CAP_H); + } + else if ( hCount != 0 ) { + normalizedTimeSkeleton.append(LOW_H); + } + if ( mCount != 0 ) { + normalizedTimeSkeleton.append(LOW_M); + } + if ( zCount != 0 ) { + normalizedTimeSkeleton.append(LOW_Z); + } + if ( vCount != 0 ) { + normalizedTimeSkeleton.append(LOW_V); + } +} + + +/** + * Generate date or time interval pattern from resource, + * and set them into the interval pattern locale to this formatter. + * + * It needs to handle the following: + * 1. need to adjust field width. + * For example, the interval patterns saved in DateIntervalInfo + * includes "dMMMy", but not "dMMMMy". + * Need to get interval patterns for dMMMMy from dMMMy. + * Another example, the interval patterns saved in DateIntervalInfo + * includes "hmv", but not "hmz". + * Need to get interval patterns for "hmz' from 'hmv' + * + * 2. there might be no pattern for 'y' differ for skeleton "Md", + * in order to get interval patterns for 'y' differ, + * need to look for it from skeleton 'yMd' + * + * @param dateSkeleton normalized date skeleton + * @param timeSkeleton normalized time skeleton + * @return whether the resource is found for the skeleton. + * TRUE if interval pattern found for the skeleton, + * FALSE otherwise. + * @stable ICU 4.0 + */ +UBool +DateIntervalFormat::setSeparateDateTimePtn( + const UnicodeString& dateSkeleton, + const UnicodeString& timeSkeleton) { + const UnicodeString* skeleton; + // if both date and time skeleton present, + // the final interval pattern might include time interval patterns + // ( when, am_pm, hour, minute differ ), + // but not date interval patterns ( when year, month, day differ ). + // For year/month/day differ, it falls back to fall-back pattern. + if ( timeSkeleton.length() != 0 ) { + skeleton = &timeSkeleton; + } else { + skeleton = &dateSkeleton; + } + + /* interval patterns for skeleton "dMMMy" (but not "dMMMMy") + * are defined in resource, + * interval patterns for skeleton "dMMMMy" are calculated by + * 1. get the best match skeleton for "dMMMMy", which is "dMMMy" + * 2. get the interval patterns for "dMMMy", + * 3. extend "MMM" to "MMMM" in above interval patterns for "dMMMMy" + * getBestSkeleton() is step 1. + */ + // best skeleton, and the difference information + int8_t differenceInfo = 0; + const UnicodeString* bestSkeleton = fInfo->getBestSkeleton(*skeleton, + differenceInfo); + /* best skeleton could be NULL. + For example: in "ca" resource file, + interval format is defined as following + intervalFormats{ + fallback{"{0} - {1}"} + } + there is no skeletons/interval patterns defined, + and the best skeleton match could be NULL + */ + if ( bestSkeleton == NULL ) { + return false; + } + + // Set patterns for fallback use, need to do this + // before returning if differenceInfo == -1 + UErrorCode status; + if ( dateSkeleton.length() != 0) { + status = U_ZERO_ERROR; + fDatePattern = new UnicodeString(DateFormat::getBestPattern( + fLocale, dateSkeleton, status)); + } + if ( timeSkeleton.length() != 0) { + status = U_ZERO_ERROR; + fTimePattern = new UnicodeString(DateFormat::getBestPattern( + fLocale, timeSkeleton, status)); + } + + // difference: + // 0 means the best matched skeleton is the same as input skeleton + // 1 means the fields are the same, but field width are different + // 2 means the only difference between fields are v/z, + // -1 means there are other fields difference + // (this will happen, for instance, if the supplied skeleton has seconds, + // but no skeletons in the intervalFormats data do) + if ( differenceInfo == -1 ) { + // skeleton has different fields, not only v/z difference + return false; + } + + if ( timeSkeleton.length() == 0 ) { + UnicodeString extendedSkeleton; + UnicodeString extendedBestSkeleton; + // only has date skeleton + setIntervalPattern(UCAL_DATE, skeleton, bestSkeleton, differenceInfo, + &extendedSkeleton, &extendedBestSkeleton); + + UBool extended = setIntervalPattern(UCAL_MONTH, skeleton, bestSkeleton, + differenceInfo, + &extendedSkeleton, &extendedBestSkeleton); + + if ( extended ) { + bestSkeleton = &extendedBestSkeleton; + skeleton = &extendedSkeleton; + } + setIntervalPattern(UCAL_YEAR, skeleton, bestSkeleton, differenceInfo, + &extendedSkeleton, &extendedBestSkeleton); + setIntervalPattern(UCAL_ERA, skeleton, bestSkeleton, differenceInfo, + &extendedSkeleton, &extendedBestSkeleton); + } else { + setIntervalPattern(UCAL_MINUTE, skeleton, bestSkeleton, differenceInfo); + setIntervalPattern(UCAL_HOUR, skeleton, bestSkeleton, differenceInfo); + setIntervalPattern(UCAL_AM_PM, skeleton, bestSkeleton, differenceInfo); + } + return true; +} + + + +void +DateIntervalFormat::setFallbackPattern(UCalendarDateFields field, + const UnicodeString& skeleton, + UErrorCode& status) { + if ( U_FAILURE(status) ) { + return; + } + UnicodeString pattern = DateFormat::getBestPattern( + fLocale, skeleton, status); + if ( U_FAILURE(status) ) { + return; + } + setPatternInfo(field, NULL, &pattern, fInfo->getDefaultOrder()); +} + + + + +void +DateIntervalFormat::setPatternInfo(UCalendarDateFields field, + const UnicodeString* firstPart, + const UnicodeString* secondPart, + UBool laterDateFirst) { + // for fall back interval patterns, + // the first part of the pattern is empty, + // the second part of the pattern is the full-pattern + // should be used in fall-back. + UErrorCode status = U_ZERO_ERROR; + // following should not set any wrong status. + int32_t itvPtnIndex = DateIntervalInfo::calendarFieldToIntervalIndex(field, + status); + if ( U_FAILURE(status) ) { + return; + } + PatternInfo& ptn = fIntervalPatterns[itvPtnIndex]; + if ( firstPart ) { + ptn.firstPart = *firstPart; + } + if ( secondPart ) { + ptn.secondPart = *secondPart; + } + ptn.laterDateFirst = laterDateFirst; +} + +void +DateIntervalFormat::setIntervalPattern(UCalendarDateFields field, + const UnicodeString& intervalPattern) { + UBool order = fInfo->getDefaultOrder(); + setIntervalPattern(field, intervalPattern, order); +} + + +void +DateIntervalFormat::setIntervalPattern(UCalendarDateFields field, + const UnicodeString& intervalPattern, + UBool laterDateFirst) { + const UnicodeString* pattern = &intervalPattern; + UBool order = laterDateFirst; + // check for "latestFirst:" or "earliestFirst:" prefix + int8_t prefixLength = UPRV_LENGTHOF(gLaterFirstPrefix); + int8_t earliestFirstLength = UPRV_LENGTHOF(gEarlierFirstPrefix); + UnicodeString realPattern; + if ( intervalPattern.startsWith(gLaterFirstPrefix, prefixLength) ) { + order = true; + intervalPattern.extract(prefixLength, + intervalPattern.length() - prefixLength, + realPattern); + pattern = &realPattern; + } else if ( intervalPattern.startsWith(gEarlierFirstPrefix, + earliestFirstLength) ) { + order = false; + intervalPattern.extract(earliestFirstLength, + intervalPattern.length() - earliestFirstLength, + realPattern); + pattern = &realPattern; + } + + int32_t splitPoint = splitPatternInto2Part(*pattern); + + UnicodeString firstPart; + UnicodeString secondPart; + pattern->extract(0, splitPoint, firstPart); + if ( splitPoint < pattern->length() ) { + pattern->extract(splitPoint, pattern->length()-splitPoint, secondPart); + } + setPatternInfo(field, &firstPart, &secondPart, order); +} + + + + +/** + * Generate interval pattern from existing resource + * + * It not only save the interval patterns, + * but also return the extended skeleton and its best match skeleton. + * + * @param field largest different calendar field + * @param skeleton skeleton + * @param bestSkeleton the best match skeleton which has interval pattern + * defined in resource + * @param differenceInfo the difference between skeleton and best skeleton + * 0 means the best matched skeleton is the same as input skeleton + * 1 means the fields are the same, but field width are different + * 2 means the only difference between fields are v/z, + * -1 means there are other fields difference + * + * @param extendedSkeleton extended skeleton + * @param extendedBestSkeleton extended best match skeleton + * @return whether the interval pattern is found + * through extending skeleton or not. + * TRUE if interval pattern is found by + * extending skeleton, FALSE otherwise. + * @stable ICU 4.0 + */ +UBool +DateIntervalFormat::setIntervalPattern(UCalendarDateFields field, + const UnicodeString* skeleton, + const UnicodeString* bestSkeleton, + int8_t differenceInfo, + UnicodeString* extendedSkeleton, + UnicodeString* extendedBestSkeleton) { + UErrorCode status = U_ZERO_ERROR; + // following getIntervalPattern() should not generate error status + UnicodeString pattern; + fInfo->getIntervalPattern(*bestSkeleton, field, pattern, status); + if ( pattern.isEmpty() ) { + // single date + if ( SimpleDateFormat::isFieldUnitIgnored(*bestSkeleton, field) ) { + // do nothing, format will handle it + return false; + } + + // for 24 hour system, interval patterns in resource file + // might not include pattern when am_pm differ, + // which should be the same as hour differ. + // add it here for simplicity + if ( field == UCAL_AM_PM ) { + fInfo->getIntervalPattern(*bestSkeleton, UCAL_HOUR, pattern,status); + if ( !pattern.isEmpty() ) { + setIntervalPattern(field, pattern); + } + return false; + } + // else, looking for pattern when 'y' differ for 'dMMMM' skeleton, + // first, get best match pattern "MMMd", + // since there is no pattern for 'y' differs for skeleton 'MMMd', + // need to look for it from skeleton 'yMMMd', + // if found, adjust field width in interval pattern from + // "MMM" to "MMMM". + UChar fieldLetter = fgCalendarFieldToPatternLetter[field]; + if ( extendedSkeleton ) { + *extendedSkeleton = *skeleton; + *extendedBestSkeleton = *bestSkeleton; + extendedSkeleton->insert(0, fieldLetter); + extendedBestSkeleton->insert(0, fieldLetter); + // for example, looking for patterns when 'y' differ for + // skeleton "MMMM". + fInfo->getIntervalPattern(*extendedBestSkeleton,field,pattern,status); + if ( pattern.isEmpty() && differenceInfo == 0 ) { + // if there is no skeleton "yMMMM" defined, + // look for the best match skeleton, for example: "yMMM" + const UnicodeString* tmpBest = fInfo->getBestSkeleton( + *extendedBestSkeleton, differenceInfo); + if ( tmpBest != 0 && differenceInfo != -1 ) { + fInfo->getIntervalPattern(*tmpBest, field, pattern, status); + bestSkeleton = tmpBest; + } + } + } + } + if ( !pattern.isEmpty() ) { + if ( differenceInfo != 0 ) { + UnicodeString adjustIntervalPattern; + adjustFieldWidth(*skeleton, *bestSkeleton, pattern, differenceInfo, + adjustIntervalPattern); + setIntervalPattern(field, adjustIntervalPattern); + } else { + setIntervalPattern(field, pattern); + } + if ( extendedSkeleton && !extendedSkeleton->isEmpty() ) { + return TRUE; + } + } + return FALSE; +} + + + +int32_t U_EXPORT2 +DateIntervalFormat::splitPatternInto2Part(const UnicodeString& intervalPattern) { + UBool inQuote = false; + UChar prevCh = 0; + int32_t count = 0; + + /* repeatedPattern used to record whether a pattern has already seen. + It is a pattern applies to first calendar if it is first time seen, + otherwise, it is a pattern applies to the second calendar + */ + UBool patternRepeated[] = + { + // A B C D E F G H I J K L M N O + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + // P Q R S T U V W X Y Z + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + // a b c d e f g h i j k l m n o + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + // p q r s t u v w x y z + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + }; + + int8_t PATTERN_CHAR_BASE = 0x41; + + /* loop through the pattern string character by character looking for + * the first repeated pattern letter, which breaks the interval pattern + * into 2 parts. + */ + int32_t i; + UBool foundRepetition = false; + for (i = 0; i < intervalPattern.length(); ++i) { + UChar ch = intervalPattern.charAt(i); + + if (ch != prevCh && count > 0) { + // check the repeativeness of pattern letter + UBool repeated = patternRepeated[(int)(prevCh - PATTERN_CHAR_BASE)]; + if ( repeated == FALSE ) { + patternRepeated[prevCh - PATTERN_CHAR_BASE] = TRUE; + } else { + foundRepetition = true; + break; + } + count = 0; + } + if (ch == 0x0027 /*'*/) { + // Consecutive single quotes are a single quote literal, + // either outside of quotes or between quotes + if ((i+1) < intervalPattern.length() && + intervalPattern.charAt(i+1) == 0x0027 /*'*/) { + ++i; + } else { + inQuote = ! inQuote; + } + } + else if (!inQuote && ((ch >= 0x0061 /*'a'*/ && ch <= 0x007A /*'z'*/) + || (ch >= 0x0041 /*'A'*/ && ch <= 0x005A /*'Z'*/))) { + // ch is a date-time pattern character + prevCh = ch; + ++count; + } + } + // check last pattern char, distinguish + // "dd MM" ( no repetition ), + // "d-d"(last char repeated ), and + // "d-d MM" ( repetition found ) + if ( count > 0 && foundRepetition == FALSE ) { + if ( patternRepeated[(int)(prevCh - PATTERN_CHAR_BASE)] == FALSE ) { + count = 0; + } + } + return (i - count); +} + +void DateIntervalFormat::fallbackFormatRange( + Calendar& fromCalendar, + Calendar& toCalendar, + UnicodeString& appendTo, + int8_t& firstIndex, + FieldPositionHandler& fphandler, + UErrorCode& status) const { + UnicodeString fallbackPattern; + fInfo->getFallbackIntervalPattern(fallbackPattern); + SimpleFormatter sf(fallbackPattern, 2, 2, status); + if (U_FAILURE(status)) { + return; + } + int32_t offsets[2]; + UnicodeString patternBody = sf.getTextWithNoArguments(offsets, 2); + + // TODO(ICU-20406): Use SimpleFormatter Iterator interface when available. + if (offsets[0] < offsets[1]) { + firstIndex = 0; + appendTo.append(patternBody.tempSubStringBetween(0, offsets[0])); + fDateFormat->_format(fromCalendar, appendTo, fphandler, status); + appendTo.append(patternBody.tempSubStringBetween(offsets[0], offsets[1])); + fDateFormat->_format(toCalendar, appendTo, fphandler, status); + appendTo.append(patternBody.tempSubStringBetween(offsets[1])); + } else { + firstIndex = 1; + appendTo.append(patternBody.tempSubStringBetween(0, offsets[1])); + fDateFormat->_format(toCalendar, appendTo, fphandler, status); + appendTo.append(patternBody.tempSubStringBetween(offsets[1], offsets[0])); + fDateFormat->_format(fromCalendar, appendTo, fphandler, status); + appendTo.append(patternBody.tempSubStringBetween(offsets[0])); + } +} + +UnicodeString& +DateIntervalFormat::fallbackFormat(Calendar& fromCalendar, + Calendar& toCalendar, + UBool fromToOnSameDay, // new + UnicodeString& appendTo, + int8_t& firstIndex, + FieldPositionHandler& fphandler, + UErrorCode& status) const { + if ( U_FAILURE(status) ) { + return appendTo; + } + + UBool formatDatePlusTimeRange = (fromToOnSameDay && fDatePattern && fTimePattern); + if (formatDatePlusTimeRange) { + SimpleFormatter sf(*fDateTimeFormat, 2, 2, status); + if (U_FAILURE(status)) { + return appendTo; + } + int32_t offsets[2]; + UnicodeString patternBody = sf.getTextWithNoArguments(offsets, 2); + + UnicodeString fullPattern; // for saving the pattern in fDateFormat + fDateFormat->toPattern(fullPattern); // save current pattern, restore later + + // {0} is time range + // {1} is single date portion + // TODO(ICU-20406): Use SimpleFormatter Iterator interface when available. + if (offsets[0] < offsets[1]) { + appendTo.append(patternBody.tempSubStringBetween(0, offsets[0])); + fDateFormat->applyPattern(*fTimePattern); + fallbackFormatRange(fromCalendar, toCalendar, appendTo, firstIndex, fphandler, status); + appendTo.append(patternBody.tempSubStringBetween(offsets[0], offsets[1])); + fDateFormat->applyPattern(*fDatePattern); + fDateFormat->_format(fromCalendar, appendTo, fphandler, status); + appendTo.append(patternBody.tempSubStringBetween(offsets[1])); + } else { + appendTo.append(patternBody.tempSubStringBetween(0, offsets[1])); + fDateFormat->applyPattern(*fDatePattern); + fDateFormat->_format(fromCalendar, appendTo, fphandler, status); + appendTo.append(patternBody.tempSubStringBetween(offsets[1], offsets[0])); + fDateFormat->applyPattern(*fTimePattern); + fallbackFormatRange(fromCalendar, toCalendar, appendTo, firstIndex, fphandler, status); + appendTo.append(patternBody.tempSubStringBetween(offsets[0])); + } + + // restore full pattern + fDateFormat->applyPattern(fullPattern); + } else { + fallbackFormatRange(fromCalendar, toCalendar, appendTo, firstIndex, fphandler, status); + } + return appendTo; +} + + + + +UBool U_EXPORT2 +DateIntervalFormat::fieldExistsInSkeleton(UCalendarDateFields field, + const UnicodeString& skeleton) +{ + const UChar fieldChar = fgCalendarFieldToPatternLetter[field]; + return ( (skeleton.indexOf(fieldChar) == -1)?FALSE:TRUE ) ; +} + + + +void U_EXPORT2 +DateIntervalFormat::adjustFieldWidth(const UnicodeString& inputSkeleton, + const UnicodeString& bestMatchSkeleton, + const UnicodeString& bestIntervalPattern, + int8_t differenceInfo, + UnicodeString& adjustedPtn) { + adjustedPtn = bestIntervalPattern; + int32_t inputSkeletonFieldWidth[] = + { + // A B C D E F G H I J K L M N O + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + // P Q R S T U V W X Y Z + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + // a b c d e f g h i j k l m n o + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + // p q r s t u v w x y z + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + }; + + int32_t bestMatchSkeletonFieldWidth[] = + { + // A B C D E F G H I J K L M N O + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + // P Q R S T U V W X Y Z + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + // a b c d e f g h i j k l m n o + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + // p q r s t u v w x y z + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + }; + + DateIntervalInfo::parseSkeleton(inputSkeleton, inputSkeletonFieldWidth); + DateIntervalInfo::parseSkeleton(bestMatchSkeleton, bestMatchSkeletonFieldWidth); + if ( differenceInfo == 2 ) { + adjustedPtn.findAndReplace(UnicodeString((UChar)0x76 /* v */), + UnicodeString((UChar)0x7a /* z */)); + } + + UBool inQuote = false; + UChar prevCh = 0; + int32_t count = 0; + + const int8_t PATTERN_CHAR_BASE = 0x41; + + // loop through the pattern string character by character + int32_t adjustedPtnLength = adjustedPtn.length(); + int32_t i; + for (i = 0; i < adjustedPtnLength; ++i) { + UChar ch = adjustedPtn.charAt(i); + if (ch != prevCh && count > 0) { + // check the repeativeness of pattern letter + UChar skeletonChar = prevCh; + if ( skeletonChar == CAP_L ) { + // there is no "L" (always be "M") in skeleton, + // but there is "L" in pattern. + // for skeleton "M+", the pattern might be "...L..." + skeletonChar = CAP_M; + } + int32_t fieldCount = bestMatchSkeletonFieldWidth[(int)(skeletonChar - PATTERN_CHAR_BASE)]; + int32_t inputFieldCount = inputSkeletonFieldWidth[(int)(skeletonChar - PATTERN_CHAR_BASE)]; + if ( fieldCount == count && inputFieldCount > fieldCount ) { + count = inputFieldCount - fieldCount; + int32_t j; + for ( j = 0; j < count; ++j ) { + adjustedPtn.insert(i, prevCh); + } + i += count; + adjustedPtnLength += count; + } + count = 0; + } + if (ch == 0x0027 /*'*/) { + // Consecutive single quotes are a single quote literal, + // either outside of quotes or between quotes + if ((i+1) < adjustedPtn.length() && adjustedPtn.charAt(i+1) == 0x0027 /* ' */) { + ++i; + } else { + inQuote = ! inQuote; + } + } + else if ( ! inQuote && ((ch >= 0x0061 /*'a'*/ && ch <= 0x007A /*'z'*/) + || (ch >= 0x0041 /*'A'*/ && ch <= 0x005A /*'Z'*/))) { + // ch is a date-time pattern character + prevCh = ch; + ++count; + } + } + if ( count > 0 ) { + // last item + // check the repeativeness of pattern letter + UChar skeletonChar = prevCh; + if ( skeletonChar == CAP_L ) { + // there is no "L" (always be "M") in skeleton, + // but there is "L" in pattern. + // for skeleton "M+", the pattern might be "...L..." + skeletonChar = CAP_M; + } + int32_t fieldCount = bestMatchSkeletonFieldWidth[(int)(skeletonChar - PATTERN_CHAR_BASE)]; + int32_t inputFieldCount = inputSkeletonFieldWidth[(int)(skeletonChar - PATTERN_CHAR_BASE)]; + if ( fieldCount == count && inputFieldCount > fieldCount ) { + count = inputFieldCount - fieldCount; + int32_t j; + for ( j = 0; j < count; ++j ) { + adjustedPtn.append(prevCh); + } + } + } +} + + + +void +DateIntervalFormat::concatSingleDate2TimeInterval(UnicodeString& format, + const UnicodeString& datePattern, + UCalendarDateFields field, + UErrorCode& status) { + // following should not set wrong status + int32_t itvPtnIndex = DateIntervalInfo::calendarFieldToIntervalIndex(field, + status); + if ( U_FAILURE(status) ) { + return; + } + PatternInfo& timeItvPtnInfo = fIntervalPatterns[itvPtnIndex]; + if ( !timeItvPtnInfo.firstPart.isEmpty() ) { + UnicodeString timeIntervalPattern(timeItvPtnInfo.firstPart); + timeIntervalPattern.append(timeItvPtnInfo.secondPart); + UnicodeString combinedPattern; + SimpleFormatter(format, 2, 2, status). + format(timeIntervalPattern, datePattern, combinedPattern, status); + if ( U_FAILURE(status) ) { + return; + } + setIntervalPattern(field, combinedPattern, timeItvPtnInfo.laterDateFirst); + } + // else: fall back + // it should not happen if the interval format defined is valid +} + + + +const UChar +DateIntervalFormat::fgCalendarFieldToPatternLetter[] = +{ + /*GyM*/ CAP_G, LOW_Y, CAP_M, + /*wWd*/ LOW_W, CAP_W, LOW_D, + /*DEF*/ CAP_D, CAP_E, CAP_F, + /*ahH*/ LOW_A, LOW_H, CAP_H, + /*msS*/ LOW_M, LOW_S, CAP_S, // MINUTE, SECOND, MILLISECOND + /*z.Y*/ LOW_Z, SPACE, CAP_Y, // ZONE_OFFSET, DST_OFFSET, YEAR_WOY, + /*eug*/ LOW_E, LOW_U, LOW_G, // DOW_LOCAL, EXTENDED_YEAR, JULIAN_DAY, + /*A..*/ CAP_A, SPACE, SPACE, // MILLISECONDS_IN_DAY, IS_LEAP_MONTH, FIELD_COUNT +}; + + + +U_NAMESPACE_END + +#endif diff --git a/libicui18n/libicui18n/dtitvfmt.cpp.patch b/libicui18n/libicui18n/dtitvfmt.cpp.patch new file mode 100644 index 0000000..f6a0f5c --- /dev/null +++ b/libicui18n/libicui18n/dtitvfmt.cpp.patch @@ -0,0 +1,11 @@ +--- libicui18n/i18n/dtitvfmt.cpp 2019-12-23 14:38:40.214889289 +0300 ++++ libicui18n/dtitvfmt.cpp 2020-07-21 14:35:34.435172232 +0300 +@@ -232,7 +232,7 @@ DateIntervalFormat::operator==(const For + { + Mutex lock(&gFormatterMutex); + if (fDateFormat != fmt->fDateFormat && (fDateFormat == NULL || fmt->fDateFormat == NULL)) {return FALSE;} +- if (fDateFormat && fmt->fDateFormat && (*fDateFormat != *fmt->fDateFormat)) {return FALSE;} ++ if (fDateFormat && fmt->fDateFormat && !(*fDateFormat == *fmt->fDateFormat)) {return FALSE;} + } + // note: fFromCalendar and fToCalendar hold no persistent state, and therefore do not participate in operator ==. + // fDateFormat has the master calendar for the DateIntervalFormat. diff --git a/libicui18n/libicui18n/olsontz.cpp b/libicui18n/libicui18n/olsontz.cpp new file mode 100644 index 0000000..85bb737 --- /dev/null +++ b/libicui18n/libicui18n/olsontz.cpp @@ -0,0 +1,1080 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +********************************************************************** +* Copyright (c) 2003-2013, International Business Machines +* Corporation and others. All Rights Reserved. +********************************************************************** +* Author: Alan Liu +* Created: July 21 2003 +* Since: ICU 2.8 +********************************************************************** +*/ + +#include "utypeinfo.h" // for 'typeid' to work + +#include "olsontz.h" + +#if !UCONFIG_NO_FORMATTING + +#include "unicode/ures.h" +#include "unicode/simpletz.h" +#include "unicode/gregocal.h" +#include "gregoimp.h" +#include "cmemory.h" +#include "uassert.h" +#include "uvector.h" +#include // DBL_MAX +#include "uresimp.h" +#include "zonemeta.h" +#include "umutex.h" + +#ifdef U_DEBUG_TZ +# include +# include "uresimp.h" // for debugging + +static void debug_tz_loc(const char *f, int32_t l) +{ + fprintf(stderr, "%s:%d: ", f, l); +} + +static void debug_tz_msg(const char *pat, ...) +{ + va_list ap; + va_start(ap, pat); + vfprintf(stderr, pat, ap); + fflush(stderr); +} +// must use double parens, i.e.: U_DEBUG_TZ_MSG(("four is: %d",4)); +#define U_DEBUG_TZ_MSG(x) {debug_tz_loc(__FILE__,__LINE__);debug_tz_msg x;} +#else +#define U_DEBUG_TZ_MSG(x) +#endif + +static UBool arrayEqual(const void *a1, const void *a2, int32_t size) { + if (a1 == NULL && a2 == NULL) { + return TRUE; + } + if ((a1 != NULL && a2 == NULL) || (a1 == NULL && a2 != NULL)) { + return FALSE; + } + if (a1 == a2) { + return TRUE; + } + + return (uprv_memcmp(a1, a2, size) == 0); +} + +U_NAMESPACE_BEGIN + +#define kTRANS "trans" +#define kTRANSPRE32 "transPre32" +#define kTRANSPOST32 "transPost32" +#define kTYPEOFFSETS "typeOffsets" +#define kTYPEMAP "typeMap" +#define kLINKS "links" +#define kFINALRULE "finalRule" +#define kFINALRAW "finalRaw" +#define kFINALYEAR "finalYear" + +#define SECONDS_PER_DAY (24*60*60) + +static const int32_t ZEROS[] = {0,0}; + +UOBJECT_DEFINE_RTTI_IMPLEMENTATION(OlsonTimeZone) + +/** + * Default constructor. Creates a time zone with an empty ID and + * a fixed GMT offset of zero. + */ +/*OlsonTimeZone::OlsonTimeZone() : finalYear(INT32_MAX), finalMillis(DBL_MAX), finalZone(0), transitionRulesInitialized(FALSE) { + clearTransitionRules(); + constructEmpty(); +}*/ + +/** + * Construct a GMT+0 zone with no transitions. This is done when a + * constructor fails so the resultant object is well-behaved. + */ +void OlsonTimeZone::constructEmpty() { + canonicalID = NULL; + + transitionCountPre32 = transitionCount32 = transitionCountPost32 = 0; + transitionTimesPre32 = transitionTimes32 = transitionTimesPost32 = NULL; + + typeMapData = NULL; + + typeCount = 1; + typeOffsets = ZEROS; + + finalZone = NULL; +} + +/** + * Construct from a resource bundle + * @param top the top-level zoneinfo resource bundle. This is used + * to lookup the rule that `res' may refer to, if there is one. + * @param res the resource bundle of the zone to be constructed + * @param ec input-output error code + */ +OlsonTimeZone::OlsonTimeZone(const UResourceBundle* top, + const UResourceBundle* res, + const UnicodeString& tzid, + UErrorCode& ec) : + BasicTimeZone(tzid), finalZone(NULL) +{ + clearTransitionRules(); + U_DEBUG_TZ_MSG(("OlsonTimeZone(%s)\n", ures_getKey((UResourceBundle*)res))); + if ((top == NULL || res == NULL) && U_SUCCESS(ec)) { + ec = U_ILLEGAL_ARGUMENT_ERROR; + } + if (U_SUCCESS(ec)) { + // TODO -- clean up -- Doesn't work if res points to an alias + // // TODO remove nonconst casts below when ures_* API is fixed + // setID(ures_getKey((UResourceBundle*) res)); // cast away const + + int32_t len; + StackUResourceBundle r; + + // Pre-32bit second transitions + ures_getByKey(res, kTRANSPRE32, r.getAlias(), &ec); + transitionTimesPre32 = ures_getIntVector(r.getAlias(), &len, &ec); + transitionCountPre32 = static_cast(len >> 1); + if (ec == U_MISSING_RESOURCE_ERROR) { + // No pre-32bit transitions + transitionTimesPre32 = NULL; + transitionCountPre32 = 0; + ec = U_ZERO_ERROR; + } else if (U_SUCCESS(ec) && (len < 0 || len > 0x7FFF || (len & 1) != 0) /* len must be even */) { + ec = U_INVALID_FORMAT_ERROR; + } + + // 32bit second transitions + ures_getByKey(res, kTRANS, r.getAlias(), &ec); + transitionTimes32 = ures_getIntVector(r.getAlias(), &len, &ec); + transitionCount32 = static_cast(len); + if (ec == U_MISSING_RESOURCE_ERROR) { + // No 32bit transitions + transitionTimes32 = NULL; + transitionCount32 = 0; + ec = U_ZERO_ERROR; + } else if (U_SUCCESS(ec) && (len < 0 || len > 0x7FFF)) { + ec = U_INVALID_FORMAT_ERROR; + } + + // Post-32bit second transitions + ures_getByKey(res, kTRANSPOST32, r.getAlias(), &ec); + transitionTimesPost32 = ures_getIntVector(r.getAlias(), &len, &ec); + transitionCountPost32 = static_cast(len >> 1); + if (ec == U_MISSING_RESOURCE_ERROR) { + // No pre-32bit transitions + transitionTimesPost32 = NULL; + transitionCountPost32 = 0; + ec = U_ZERO_ERROR; + } else if (U_SUCCESS(ec) && (len < 0 || len > 0x7FFF || (len & 1) != 0) /* len must be even */) { + ec = U_INVALID_FORMAT_ERROR; + } + + // Type offsets list must be of even size, with size >= 2 + ures_getByKey(res, kTYPEOFFSETS, r.getAlias(), &ec); + typeOffsets = ures_getIntVector(r.getAlias(), &len, &ec); + if (U_SUCCESS(ec) && (len < 2 || len > 0x7FFE || (len & 1) != 0)) { + ec = U_INVALID_FORMAT_ERROR; + } + typeCount = (int16_t) len >> 1; + + // Type map data must be of the same size as the transition count + typeMapData = NULL; + if (transitionCount() > 0) { + ures_getByKey(res, kTYPEMAP, r.getAlias(), &ec); + typeMapData = ures_getBinary(r.getAlias(), &len, &ec); + if (ec == U_MISSING_RESOURCE_ERROR) { + // no type mapping data + ec = U_INVALID_FORMAT_ERROR; + } else if (U_SUCCESS(ec) && len != transitionCount()) { + ec = U_INVALID_FORMAT_ERROR; + } + } + + // Process final rule and data, if any + const UChar *ruleIdUStr = ures_getStringByKey(res, kFINALRULE, &len, &ec); + ures_getByKey(res, kFINALRAW, r.getAlias(), &ec); + int32_t ruleRaw = ures_getInt(r.getAlias(), &ec); + ures_getByKey(res, kFINALYEAR, r.getAlias(), &ec); + int32_t ruleYear = ures_getInt(r.getAlias(), &ec); + if (U_SUCCESS(ec)) { + UnicodeString ruleID(TRUE, ruleIdUStr, len); + UResourceBundle *rule = TimeZone::loadRule(top, ruleID, NULL, ec); + const int32_t *ruleData = ures_getIntVector(rule, &len, &ec); + if (U_SUCCESS(ec) && len == 11) { + UnicodeString emptyStr; + finalZone = new SimpleTimeZone( + ruleRaw * U_MILLIS_PER_SECOND, + emptyStr, + (int8_t)ruleData[0], (int8_t)ruleData[1], (int8_t)ruleData[2], + ruleData[3] * U_MILLIS_PER_SECOND, + (SimpleTimeZone::TimeMode) ruleData[4], + (int8_t)ruleData[5], (int8_t)ruleData[6], (int8_t)ruleData[7], + ruleData[8] * U_MILLIS_PER_SECOND, + (SimpleTimeZone::TimeMode) ruleData[9], + ruleData[10] * U_MILLIS_PER_SECOND, ec); + if (finalZone == NULL) { + ec = U_MEMORY_ALLOCATION_ERROR; + } else { + finalStartYear = ruleYear; + + // Note: Setting finalStartYear to the finalZone is problematic. When a date is around + // year boundary, SimpleTimeZone may return false result when DST is observed at the + // beginning of year. We could apply safe margin (day or two), but when one of recurrent + // rules falls around year boundary, it could return false result. Without setting the + // start year, finalZone works fine around the year boundary of the start year. + + // finalZone->setStartYear(finalStartYear); + + + // Compute the millis for Jan 1, 0:00 GMT of the finalYear + + // Note: finalStartMillis is used for detecting either if + // historic transition data or finalZone to be used. In an + // extreme edge case - for example, two transitions fall into + // small windows of time around the year boundary, this may + // result incorrect offset computation. But I think it will + // never happen practically. Yoshito - Feb 20, 2010 + finalStartMillis = Grego::fieldsToDay(finalStartYear, 0, 1) * U_MILLIS_PER_DAY; + } + } else { + ec = U_INVALID_FORMAT_ERROR; + } + ures_close(rule); + } else if (ec == U_MISSING_RESOURCE_ERROR) { + // No final zone + ec = U_ZERO_ERROR; + } + + // initialize canonical ID + canonicalID = ZoneMeta::getCanonicalCLDRID(tzid, ec); + } + + if (U_FAILURE(ec)) { + constructEmpty(); + } +} + +/** + * Copy constructor + */ +OlsonTimeZone::OlsonTimeZone(const OlsonTimeZone& other) : + BasicTimeZone(other), finalZone(0) { + *this = other; +} + +/** + * Assignment operator + */ +OlsonTimeZone& OlsonTimeZone::operator=(const OlsonTimeZone& other) { + canonicalID = other.canonicalID; + + transitionTimesPre32 = other.transitionTimesPre32; + transitionTimes32 = other.transitionTimes32; + transitionTimesPost32 = other.transitionTimesPost32; + + transitionCountPre32 = other.transitionCountPre32; + transitionCount32 = other.transitionCount32; + transitionCountPost32 = other.transitionCountPost32; + + typeCount = other.typeCount; + typeOffsets = other.typeOffsets; + typeMapData = other.typeMapData; + + delete finalZone; + finalZone = (other.finalZone != 0) ? other.finalZone->clone() : 0; + + finalStartYear = other.finalStartYear; + finalStartMillis = other.finalStartMillis; + + clearTransitionRules(); + + return *this; +} + +/** + * Destructor + */ +OlsonTimeZone::~OlsonTimeZone() { + deleteTransitionRules(); + delete finalZone; +} + +/** + * Returns true if the two TimeZone objects are equal. + */ +UBool OlsonTimeZone::operator==(const TimeZone& other) const { + return ((this == &other) || + (typeid(*this) == typeid(other) && + TimeZone::operator==(other) && + hasSameRules(other))); +} + +/** + * TimeZone API. + */ +OlsonTimeZone* OlsonTimeZone::clone() const { + return new OlsonTimeZone(*this); +} + +/** + * TimeZone API. + */ +int32_t OlsonTimeZone::getOffset(uint8_t era, int32_t year, int32_t month, + int32_t dom, uint8_t dow, + int32_t millis, UErrorCode& ec) const { + if (month < UCAL_JANUARY || month > UCAL_DECEMBER) { + if (U_SUCCESS(ec)) { + ec = U_ILLEGAL_ARGUMENT_ERROR; + } + return 0; + } else { + return getOffset(era, year, month, dom, dow, millis, + Grego::monthLength(year, month), + ec); + } +} + +/** + * TimeZone API. + */ +int32_t OlsonTimeZone::getOffset(uint8_t era, int32_t year, int32_t month, + int32_t dom, uint8_t dow, + int32_t millis, int32_t monthLength, + UErrorCode& ec) const { + if (U_FAILURE(ec)) { + return 0; + } + + if ((era != GregorianCalendar::AD && era != GregorianCalendar::BC) + || month < UCAL_JANUARY + || month > UCAL_DECEMBER + || dom < 1 + || dom > monthLength + || dow < UCAL_SUNDAY + || dow > UCAL_SATURDAY + || millis < 0 + || millis >= U_MILLIS_PER_DAY + || monthLength < 28 + || monthLength > 31) { + ec = U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + + if (era == GregorianCalendar::BC) { + year = -year; + } + + if (finalZone != NULL && year >= finalStartYear) { + return finalZone->getOffset(era, year, month, dom, dow, + millis, monthLength, ec); + } + + // Compute local epoch millis from input fields + UDate date = (UDate)(Grego::fieldsToDay(year, month, dom) * U_MILLIS_PER_DAY + millis); + int32_t rawoff, dstoff; + getHistoricalOffset(date, TRUE, kDaylight, kStandard, rawoff, dstoff); + return rawoff + dstoff; +} + +/** + * TimeZone API. + */ +void OlsonTimeZone::getOffset(UDate date, UBool local, int32_t& rawoff, + int32_t& dstoff, UErrorCode& ec) const { + if (U_FAILURE(ec)) { + return; + } + if (finalZone != NULL && date >= finalStartMillis) { + finalZone->getOffset(date, local, rawoff, dstoff, ec); + } else { + getHistoricalOffset(date, local, kFormer, kLatter, rawoff, dstoff); + } +} + +void +OlsonTimeZone::getOffsetFromLocal(UDate date, int32_t nonExistingTimeOpt, int32_t duplicatedTimeOpt, + int32_t& rawoff, int32_t& dstoff, UErrorCode& ec) const { + if (U_FAILURE(ec)) { + return; + } + if (finalZone != NULL && date >= finalStartMillis) { + finalZone->getOffsetFromLocal(date, nonExistingTimeOpt, duplicatedTimeOpt, rawoff, dstoff, ec); + } else { + getHistoricalOffset(date, TRUE, nonExistingTimeOpt, duplicatedTimeOpt, rawoff, dstoff); + } +} + + +/** + * TimeZone API. + */ +void OlsonTimeZone::setRawOffset(int32_t /*offsetMillis*/) { + // We don't support this operation, since OlsonTimeZones are + // immutable (except for the ID, which is in the base class). + + // Nothing to do! +} + +/** + * TimeZone API. + */ +int32_t OlsonTimeZone::getRawOffset() const { + UErrorCode ec = U_ZERO_ERROR; + int32_t raw, dst; + getOffset((double) uprv_getUTCtime() * U_MILLIS_PER_SECOND, + FALSE, raw, dst, ec); + return raw; +} + +#if defined U_DEBUG_TZ +void printTime(double ms) { + int32_t year, month, dom, dow; + double millis=0; + double days = ClockMath::floorDivide(((double)ms), (double)U_MILLIS_PER_DAY, millis); + + Grego::dayToFields(days, year, month, dom, dow); + U_DEBUG_TZ_MSG((" getHistoricalOffset: time %.1f (%04d.%02d.%02d+%.1fh)\n", ms, + year, month+1, dom, (millis/kOneHour))); + } +#endif + +int64_t +OlsonTimeZone::transitionTimeInSeconds(int16_t transIdx) const { + U_ASSERT(transIdx >= 0 && transIdx < transitionCount()); + + if (transIdx < transitionCountPre32) { + return (((int64_t)((uint32_t)transitionTimesPre32[transIdx << 1])) << 32) + | ((int64_t)((uint32_t)transitionTimesPre32[(transIdx << 1) + 1])); + } + + transIdx -= transitionCountPre32; + if (transIdx < transitionCount32) { + return (int64_t)transitionTimes32[transIdx]; + } + + transIdx -= transitionCount32; + return (((int64_t)((uint32_t)transitionTimesPost32[transIdx << 1])) << 32) + | ((int64_t)((uint32_t)transitionTimesPost32[(transIdx << 1) + 1])); +} + +// Maximum absolute offset in seconds (86400 seconds = 1 day) +// getHistoricalOffset uses this constant as safety margin of +// quick zone transition checking. +#define MAX_OFFSET_SECONDS 86400 + +void +OlsonTimeZone::getHistoricalOffset(UDate date, UBool local, + int32_t NonExistingTimeOpt, int32_t DuplicatedTimeOpt, + int32_t& rawoff, int32_t& dstoff) const { + U_DEBUG_TZ_MSG(("getHistoricalOffset(%.1f, %s, %d, %d, raw, dst)\n", + date, local?"T":"F", NonExistingTimeOpt, DuplicatedTimeOpt)); +#if defined U_DEBUG_TZ + printTime(date*1000.0); +#endif + int16_t transCount = transitionCount(); + + if (transCount > 0) { + double sec = uprv_floor(date / U_MILLIS_PER_SECOND); + if (!local && sec < transitionTimeInSeconds(0)) { + // Before the first transition time + rawoff = initialRawOffset() * U_MILLIS_PER_SECOND; + dstoff = initialDstOffset() * U_MILLIS_PER_SECOND; + } else { + // Linear search from the end is the fastest approach, since + // most lookups will happen at/near the end. + int16_t transIdx; + for (transIdx = transCount - 1; transIdx >= 0; transIdx--) { + int64_t transition = transitionTimeInSeconds(transIdx); + + if (local && (sec >= (transition - MAX_OFFSET_SECONDS))) { + int32_t offsetBefore = zoneOffsetAt(transIdx - 1); + UBool dstBefore = dstOffsetAt(transIdx - 1) != 0; + + int32_t offsetAfter = zoneOffsetAt(transIdx); + UBool dstAfter = dstOffsetAt(transIdx) != 0; + + UBool dstToStd = dstBefore && !dstAfter; + UBool stdToDst = !dstBefore && dstAfter; + + if (offsetAfter - offsetBefore >= 0) { + // Positive transition, which makes a non-existing local time range + if (((NonExistingTimeOpt & kStdDstMask) == kStandard && dstToStd) + || ((NonExistingTimeOpt & kStdDstMask) == kDaylight && stdToDst)) { + transition += offsetBefore; + } else if (((NonExistingTimeOpt & kStdDstMask) == kStandard && stdToDst) + || ((NonExistingTimeOpt & kStdDstMask) == kDaylight && dstToStd)) { + transition += offsetAfter; + } else if ((NonExistingTimeOpt & kFormerLatterMask) == kLatter) { + transition += offsetBefore; + } else { + // Interprets the time with rule before the transition, + // default for non-existing time range + transition += offsetAfter; + } + } else { + // Negative transition, which makes a duplicated local time range + if (((DuplicatedTimeOpt & kStdDstMask) == kStandard && dstToStd) + || ((DuplicatedTimeOpt & kStdDstMask) == kDaylight && stdToDst)) { + transition += offsetAfter; + } else if (((DuplicatedTimeOpt & kStdDstMask) == kStandard && stdToDst) + || ((DuplicatedTimeOpt & kStdDstMask) == kDaylight && dstToStd)) { + transition += offsetBefore; + } else if ((DuplicatedTimeOpt & kFormerLatterMask) == kFormer) { + transition += offsetBefore; + } else { + // Interprets the time with rule after the transition, + // default for duplicated local time range + transition += offsetAfter; + } + } + } + if (sec >= transition) { + break; + } + } + // transIdx could be -1 when local=true + rawoff = rawOffsetAt(transIdx) * U_MILLIS_PER_SECOND; + dstoff = dstOffsetAt(transIdx) * U_MILLIS_PER_SECOND; + } + } else { + // No transitions, single pair of offsets only + rawoff = initialRawOffset() * U_MILLIS_PER_SECOND; + dstoff = initialDstOffset() * U_MILLIS_PER_SECOND; + } + U_DEBUG_TZ_MSG(("getHistoricalOffset(%.1f, %s, %d, %d, raw, dst) - raw=%d, dst=%d\n", + date, local?"T":"F", NonExistingTimeOpt, DuplicatedTimeOpt, rawoff, dstoff)); +} + +/** + * TimeZone API. + */ +UBool OlsonTimeZone::useDaylightTime() const { + // If DST was observed in 1942 (for example) but has never been + // observed from 1943 to the present, most clients will expect + // this method to return FALSE. This method determines whether + // DST is in use in the current year (at any point in the year) + // and returns TRUE if so. + + UDate current = uprv_getUTCtime(); + if (finalZone != NULL && current >= finalStartMillis) { + return finalZone->useDaylightTime(); + } + + int32_t year, month, dom, dow, doy, mid; + Grego::timeToFields(current, year, month, dom, dow, doy, mid); + + // Find start of this year, and start of next year + double start = Grego::fieldsToDay(year, 0, 1) * SECONDS_PER_DAY; + double limit = Grego::fieldsToDay(year+1, 0, 1) * SECONDS_PER_DAY; + + // Return TRUE if DST is observed at any time during the current + // year. + for (int16_t i = 0; i < transitionCount(); ++i) { + double transition = (double)transitionTimeInSeconds(i); + if (transition >= limit) { + break; + } + if ((transition >= start && dstOffsetAt(i) != 0) + || (transition > start && dstOffsetAt(i - 1) != 0)) { + return TRUE; + } + } + return FALSE; +} +int32_t +OlsonTimeZone::getDSTSavings() const{ + if (finalZone != NULL){ + return finalZone->getDSTSavings(); + } + return TimeZone::getDSTSavings(); +} +/** + * TimeZone API. + */ +UBool OlsonTimeZone::inDaylightTime(UDate date, UErrorCode& ec) const { + int32_t raw, dst; + getOffset(date, FALSE, raw, dst, ec); + return dst != 0; +} + +UBool +OlsonTimeZone::hasSameRules(const TimeZone &other) const { + if (this == &other) { + return TRUE; + } + const OlsonTimeZone* z = dynamic_cast(&other); + if (z == NULL) { + return FALSE; + } + + // [sic] pointer comparison: typeMapData points into + // memory-mapped or DLL space, so if two zones have the same + // pointer, they are equal. + if (typeMapData == z->typeMapData) { + return TRUE; + } + + // If the pointers are not equal, the zones may still + // be equal if their rules and transitions are equal + if ((finalZone == NULL && z->finalZone != NULL) + || (finalZone != NULL && z->finalZone == NULL) + || (finalZone != NULL && z->finalZone != NULL && !(*finalZone == *z->finalZone))) { + return FALSE; + } + + if (finalZone != NULL) { + if (finalStartYear != z->finalStartYear || finalStartMillis != z->finalStartMillis) { + return FALSE; + } + } + if (typeCount != z->typeCount + || transitionCountPre32 != z->transitionCountPre32 + || transitionCount32 != z->transitionCount32 + || transitionCountPost32 != z->transitionCountPost32) { + return FALSE; + } + + return + arrayEqual(transitionTimesPre32, z->transitionTimesPre32, sizeof(transitionTimesPre32[0]) * transitionCountPre32 << 1) + && arrayEqual(transitionTimes32, z->transitionTimes32, sizeof(transitionTimes32[0]) * transitionCount32) + && arrayEqual(transitionTimesPost32, z->transitionTimesPost32, sizeof(transitionTimesPost32[0]) * transitionCountPost32 << 1) + && arrayEqual(typeOffsets, z->typeOffsets, sizeof(typeOffsets[0]) * typeCount << 1) + && arrayEqual(typeMapData, z->typeMapData, sizeof(typeMapData[0]) * transitionCount()); +} + +void +OlsonTimeZone::clearTransitionRules(void) { + initialRule = NULL; + firstTZTransition = NULL; + firstFinalTZTransition = NULL; + historicRules = NULL; + historicRuleCount = 0; + finalZoneWithStartYear = NULL; + firstTZTransitionIdx = 0; + transitionRulesInitOnce.reset(); +} + +void +OlsonTimeZone::deleteTransitionRules(void) { + if (initialRule != NULL) { + delete initialRule; + } + if (firstTZTransition != NULL) { + delete firstTZTransition; + } + if (firstFinalTZTransition != NULL) { + delete firstFinalTZTransition; + } + if (finalZoneWithStartYear != NULL) { + delete finalZoneWithStartYear; + } + if (historicRules != NULL) { + for (int i = 0; i < historicRuleCount; i++) { + if (historicRules[i] != NULL) { + delete historicRules[i]; + } + } + uprv_free(historicRules); + } + clearTransitionRules(); +} + +/* + * Lazy transition rules initializer + */ + +static void U_CALLCONV initRules(OlsonTimeZone *This, UErrorCode &status) { + This->initTransitionRules(status); +} + +void +OlsonTimeZone::checkTransitionRules(UErrorCode& status) const { + OlsonTimeZone *ncThis = const_cast(this); + umtx_initOnce(ncThis->transitionRulesInitOnce, &initRules, ncThis, status); +} + +void +OlsonTimeZone::initTransitionRules(UErrorCode& status) { + if(U_FAILURE(status)) { + return; + } + deleteTransitionRules(); + UnicodeString tzid; + getID(tzid); + + UnicodeString stdName = tzid + UNICODE_STRING_SIMPLE("(STD)"); + UnicodeString dstName = tzid + UNICODE_STRING_SIMPLE("(DST)"); + + int32_t raw, dst; + + // Create initial rule + raw = initialRawOffset() * U_MILLIS_PER_SECOND; + dst = initialDstOffset() * U_MILLIS_PER_SECOND; + initialRule = new InitialTimeZoneRule((dst == 0 ? stdName : dstName), raw, dst); + // Check to make sure initialRule was created + if (initialRule == NULL) { + status = U_MEMORY_ALLOCATION_ERROR; + deleteTransitionRules(); + return; + } + + int32_t transCount = transitionCount(); + if (transCount > 0) { + int16_t transitionIdx, typeIdx; + + // We probably no longer need to check the first "real" transition + // here, because the new tzcode remove such transitions already. + // For now, keeping this code for just in case. Feb 19, 2010 Yoshito + firstTZTransitionIdx = 0; + for (transitionIdx = 0; transitionIdx < transCount; transitionIdx++) { + if (typeMapData[transitionIdx] != 0) { // type 0 is the initial type + break; + } + firstTZTransitionIdx++; + } + if (transitionIdx == transCount) { + // Actually no transitions... + } else { + // Build historic rule array + UDate* times = (UDate*)uprv_malloc(sizeof(UDate)*transCount); /* large enough to store all transition times */ + if (times == NULL) { + status = U_MEMORY_ALLOCATION_ERROR; + deleteTransitionRules(); + return; + } + for (typeIdx = 0; typeIdx < typeCount; typeIdx++) { + // Gather all start times for each pair of offsets + int32_t nTimes = 0; + for (transitionIdx = firstTZTransitionIdx; transitionIdx < transCount; transitionIdx++) { + if (typeIdx == (int16_t)typeMapData[transitionIdx]) { + UDate tt = (UDate)transitionTime(transitionIdx); + if (finalZone == NULL || tt <= finalStartMillis) { + // Exclude transitions after finalMillis + times[nTimes++] = tt; + } + } + } + if (nTimes > 0) { + // Create a TimeArrayTimeZoneRule + raw = typeOffsets[typeIdx << 1] * U_MILLIS_PER_SECOND; + dst = typeOffsets[(typeIdx << 1) + 1] * U_MILLIS_PER_SECOND; + if (historicRules == NULL) { + historicRuleCount = typeCount; + historicRules = (TimeArrayTimeZoneRule**)uprv_malloc(sizeof(TimeArrayTimeZoneRule*)*historicRuleCount); + if (historicRules == NULL) { + status = U_MEMORY_ALLOCATION_ERROR; + deleteTransitionRules(); + uprv_free(times); + return; + } + for (int i = 0; i < historicRuleCount; i++) { + // Initialize TimeArrayTimeZoneRule pointers as NULL + historicRules[i] = NULL; + } + } + historicRules[typeIdx] = new TimeArrayTimeZoneRule((dst == 0 ? stdName : dstName), + raw, dst, times, nTimes, DateTimeRule::UTC_TIME); + // Check for memory allocation error + if (historicRules[typeIdx] == NULL) { + status = U_MEMORY_ALLOCATION_ERROR; + deleteTransitionRules(); + return; + } + } + } + uprv_free(times); + + // Create initial transition + typeIdx = (int16_t)typeMapData[firstTZTransitionIdx]; + firstTZTransition = new TimeZoneTransition((UDate)transitionTime(firstTZTransitionIdx), + *initialRule, *historicRules[typeIdx]); + // Check to make sure firstTZTransition was created. + if (firstTZTransition == NULL) { + status = U_MEMORY_ALLOCATION_ERROR; + deleteTransitionRules(); + return; + } + } + } + if (finalZone != NULL) { + // Get the first occurence of final rule starts + UDate startTime = (UDate)finalStartMillis; + TimeZoneRule *firstFinalRule = NULL; + + if (finalZone->useDaylightTime()) { + /* + * Note: When an OlsonTimeZone is constructed, we should set the final year + * as the start year of finalZone. However, the bounday condition used for + * getting offset from finalZone has some problems. + * For now, we do not set the valid start year when the construction time + * and create a clone and set the start year when extracting rules. + */ + finalZoneWithStartYear = finalZone->clone(); + // Check to make sure finalZone was actually cloned. + if (finalZoneWithStartYear == NULL) { + status = U_MEMORY_ALLOCATION_ERROR; + deleteTransitionRules(); + return; + } + finalZoneWithStartYear->setStartYear(finalStartYear); + + TimeZoneTransition tzt; + finalZoneWithStartYear->getNextTransition(startTime, false, tzt); + firstFinalRule = tzt.getTo()->clone(); + // Check to make sure firstFinalRule received proper clone. + if (firstFinalRule == NULL) { + status = U_MEMORY_ALLOCATION_ERROR; + deleteTransitionRules(); + return; + } + startTime = tzt.getTime(); + } else { + // final rule with no transitions + finalZoneWithStartYear = finalZone->clone(); + // Check to make sure finalZone was actually cloned. + if (finalZoneWithStartYear == NULL) { + status = U_MEMORY_ALLOCATION_ERROR; + deleteTransitionRules(); + return; + } + finalZone->getID(tzid); + firstFinalRule = new TimeArrayTimeZoneRule(tzid, + finalZone->getRawOffset(), 0, &startTime, 1, DateTimeRule::UTC_TIME); + // Check firstFinalRule was properly created. + if (firstFinalRule == NULL) { + status = U_MEMORY_ALLOCATION_ERROR; + deleteTransitionRules(); + return; + } + } + TimeZoneRule *prevRule = NULL; + if (transCount > 0) { + prevRule = historicRules[typeMapData[transCount - 1]]; + } + if (prevRule == NULL) { + // No historic transitions, but only finalZone available + prevRule = initialRule; + } + firstFinalTZTransition = new TimeZoneTransition(); + // Check to make sure firstFinalTZTransition was created before dereferencing + if (firstFinalTZTransition == NULL) { + status = U_MEMORY_ALLOCATION_ERROR; + deleteTransitionRules(); + return; + } + firstFinalTZTransition->setTime(startTime); + firstFinalTZTransition->adoptFrom(prevRule->clone()); + firstFinalTZTransition->adoptTo(firstFinalRule); + } +} + +UBool +OlsonTimeZone::getNextTransition(UDate base, UBool inclusive, TimeZoneTransition& result) const { + UErrorCode status = U_ZERO_ERROR; + checkTransitionRules(status); + if (U_FAILURE(status)) { + return FALSE; + } + + if (finalZone != NULL) { + if (inclusive && base == firstFinalTZTransition->getTime()) { + result = *firstFinalTZTransition; + return TRUE; + } else if (base >= firstFinalTZTransition->getTime()) { + if (finalZone->useDaylightTime()) { + //return finalZone->getNextTransition(base, inclusive, result); + return finalZoneWithStartYear->getNextTransition(base, inclusive, result); + } else { + // No more transitions + return FALSE; + } + } + } + if (historicRules != NULL) { + // Find a historical transition + int16_t transCount = transitionCount(); + int16_t ttidx = transCount - 1; + for (; ttidx >= firstTZTransitionIdx; ttidx--) { + UDate t = (UDate)transitionTime(ttidx); + if (base > t || (!inclusive && base == t)) { + break; + } + } + if (ttidx == transCount - 1) { + if (firstFinalTZTransition != NULL) { + result = *firstFinalTZTransition; + return TRUE; + } else { + return FALSE; + } + } else if (ttidx < firstTZTransitionIdx) { + result = *firstTZTransition; + return TRUE; + } else { + // Create a TimeZoneTransition + TimeZoneRule *to = historicRules[typeMapData[ttidx + 1]]; + TimeZoneRule *from = historicRules[typeMapData[ttidx]]; + UDate startTime = (UDate)transitionTime(ttidx+1); + + // The transitions loaded from zoneinfo.res may contain non-transition data + UnicodeString fromName, toName; + from->getName(fromName); + to->getName(toName); + if (fromName == toName && from->getRawOffset() == to->getRawOffset() + && from->getDSTSavings() == to->getDSTSavings()) { + return getNextTransition(startTime, false, result); + } + result.setTime(startTime); + result.adoptFrom(from->clone()); + result.adoptTo(to->clone()); + return TRUE; + } + } + return FALSE; +} + +UBool +OlsonTimeZone::getPreviousTransition(UDate base, UBool inclusive, TimeZoneTransition& result) const { + UErrorCode status = U_ZERO_ERROR; + checkTransitionRules(status); + if (U_FAILURE(status)) { + return FALSE; + } + + if (finalZone != NULL) { + if (inclusive && base == firstFinalTZTransition->getTime()) { + result = *firstFinalTZTransition; + return TRUE; + } else if (base > firstFinalTZTransition->getTime()) { + if (finalZone->useDaylightTime()) { + //return finalZone->getPreviousTransition(base, inclusive, result); + return finalZoneWithStartYear->getPreviousTransition(base, inclusive, result); + } else { + result = *firstFinalTZTransition; + return TRUE; + } + } + } + + if (historicRules != NULL) { + // Find a historical transition + int16_t ttidx = transitionCount() - 1; + for (; ttidx >= firstTZTransitionIdx; ttidx--) { + UDate t = (UDate)transitionTime(ttidx); + if (base > t || (inclusive && base == t)) { + break; + } + } + if (ttidx < firstTZTransitionIdx) { + // No more transitions + return FALSE; + } else if (ttidx == firstTZTransitionIdx) { + result = *firstTZTransition; + return TRUE; + } else { + // Create a TimeZoneTransition + TimeZoneRule *to = historicRules[typeMapData[ttidx]]; + TimeZoneRule *from = historicRules[typeMapData[ttidx-1]]; + UDate startTime = (UDate)transitionTime(ttidx); + + // The transitions loaded from zoneinfo.res may contain non-transition data + UnicodeString fromName, toName; + from->getName(fromName); + to->getName(toName); + if (fromName == toName && from->getRawOffset() == to->getRawOffset() + && from->getDSTSavings() == to->getDSTSavings()) { + return getPreviousTransition(startTime, false, result); + } + result.setTime(startTime); + result.adoptFrom(from->clone()); + result.adoptTo(to->clone()); + return TRUE; + } + } + return FALSE; +} + +int32_t +OlsonTimeZone::countTransitionRules(UErrorCode& status) const { + if (U_FAILURE(status)) { + return 0; + } + checkTransitionRules(status); + if (U_FAILURE(status)) { + return 0; + } + + int32_t count = 0; + if (historicRules != NULL) { + // historicRules may contain null entries when original zoneinfo data + // includes non transition data. + for (int32_t i = 0; i < historicRuleCount; i++) { + if (historicRules[i] != NULL) { + count++; + } + } + } + if (finalZone != NULL) { + if (finalZone->useDaylightTime()) { + count += 2; + } else { + count++; + } + } + return count; +} + +void +OlsonTimeZone::getTimeZoneRules(const InitialTimeZoneRule*& initial, + const TimeZoneRule* trsrules[], + int32_t& trscount, + UErrorCode& status) const { + if (U_FAILURE(status)) { + return; + } + checkTransitionRules(status); + if (U_FAILURE(status)) { + return; + } + + // Initial rule + initial = initialRule; + + // Transition rules + int32_t cnt = 0; + if (historicRules != NULL && trscount > cnt) { + // historicRules may contain null entries when original zoneinfo data + // includes non transition data. + for (int32_t i = 0; i < historicRuleCount; i++) { + if (historicRules[i] != NULL) { + trsrules[cnt++] = historicRules[i]; + if (cnt >= trscount) { + break; + } + } + } + } + if (finalZoneWithStartYear != NULL && trscount > cnt) { + const InitialTimeZoneRule *tmpini; + int32_t tmpcnt = trscount - cnt; + finalZoneWithStartYear->getTimeZoneRules(tmpini, &trsrules[cnt], tmpcnt, status); + if (U_FAILURE(status)) { + return; + } + cnt += tmpcnt; + } + // Set the result length + trscount = cnt; +} + +U_NAMESPACE_END + +#endif // !UCONFIG_NO_FORMATTING + +//eof diff --git a/libicui18n/libicui18n/olsontz.cpp.patch b/libicui18n/libicui18n/olsontz.cpp.patch new file mode 100644 index 0000000..b8213ac --- /dev/null +++ b/libicui18n/libicui18n/olsontz.cpp.patch @@ -0,0 +1,11 @@ +--- libicui18n/i18n/olsontz.cpp 2019-12-23 14:38:40.225889259 +0300 ++++ libicui18n/olsontz.cpp 2020-07-21 14:14:11.005172691 +0300 +@@ -624,7 +624,7 @@ OlsonTimeZone::hasSameRules(const TimeZo + // be equal if their rules and transitions are equal + if ((finalZone == NULL && z->finalZone != NULL) + || (finalZone != NULL && z->finalZone == NULL) +- || (finalZone != NULL && z->finalZone != NULL && *finalZone != *z->finalZone)) { ++ || (finalZone != NULL && z->finalZone != NULL && !(*finalZone == *z->finalZone))) { + return FALSE; + } + diff --git a/libicui18n/libicui18n/rbtz.cpp b/libicui18n/libicui18n/rbtz.cpp new file mode 100644 index 0000000..6725d36 --- /dev/null +++ b/libicui18n/libicui18n/rbtz.cpp @@ -0,0 +1,959 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* Copyright (C) 2007-2013, International Business Machines Corporation and +* others. All Rights Reserved. +******************************************************************************* +*/ + +#include "utypeinfo.h" // for 'typeid' to work + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_FORMATTING + +#include "unicode/rbtz.h" +#include "unicode/gregocal.h" +#include "uvector.h" +#include "gregoimp.h" +#include "cmemory.h" +#include "umutex.h" + +U_NAMESPACE_BEGIN + +/** + * A struct representing a time zone transition + */ +struct Transition { + UDate time; + TimeZoneRule* from; + TimeZoneRule* to; +}; + +static UBool compareRules(UVector* rules1, UVector* rules2) { + if (rules1 == NULL && rules2 == NULL) { + return TRUE; + } else if (rules1 == NULL || rules2 == NULL) { + return FALSE; + } + int32_t size = rules1->size(); + if (size != rules2->size()) { + return FALSE; + } + for (int32_t i = 0; i < size; i++) { + TimeZoneRule *r1 = (TimeZoneRule*)rules1->elementAt(i); + TimeZoneRule *r2 = (TimeZoneRule*)rules2->elementAt(i); + if (*r1 != *r2) { + return FALSE; + } + } + return TRUE; +} + +UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RuleBasedTimeZone) + +RuleBasedTimeZone::RuleBasedTimeZone(const UnicodeString& id, InitialTimeZoneRule* initialRule) +: BasicTimeZone(id), fInitialRule(initialRule), fHistoricRules(NULL), fFinalRules(NULL), + fHistoricTransitions(NULL), fUpToDate(FALSE) { +} + +RuleBasedTimeZone::RuleBasedTimeZone(const RuleBasedTimeZone& source) +: BasicTimeZone(source), fInitialRule(source.fInitialRule->clone()), + fHistoricTransitions(NULL), fUpToDate(FALSE) { + fHistoricRules = copyRules(source.fHistoricRules); + fFinalRules = copyRules(source.fFinalRules); + if (source.fUpToDate) { + UErrorCode status = U_ZERO_ERROR; + complete(status); + } +} + +RuleBasedTimeZone::~RuleBasedTimeZone() { + deleteTransitions(); + deleteRules(); +} + +RuleBasedTimeZone& +RuleBasedTimeZone::operator=(const RuleBasedTimeZone& right) { + if (!(*this == right)) { + BasicTimeZone::operator=(right); + deleteRules(); + fInitialRule = right.fInitialRule->clone(); + fHistoricRules = copyRules(right.fHistoricRules); + fFinalRules = copyRules(right.fFinalRules); + deleteTransitions(); + fUpToDate = FALSE; + } + return *this; +} + +UBool +RuleBasedTimeZone::operator==(const TimeZone& that) const { + if (this == &that) { + return TRUE; + } + if (typeid(*this) != typeid(that) + || BasicTimeZone::operator==(that) == FALSE) { + return FALSE; + } + RuleBasedTimeZone *rbtz = (RuleBasedTimeZone*)&that; + if (!(*fInitialRule == *(rbtz->fInitialRule))) { + return FALSE; + } + if (compareRules(fHistoricRules, rbtz->fHistoricRules) + && compareRules(fFinalRules, rbtz->fFinalRules)) { + return TRUE; + } + return FALSE; +} + +UBool +RuleBasedTimeZone::operator!=(const TimeZone& that) const { + return !operator==(that); +} + +void +RuleBasedTimeZone::addTransitionRule(TimeZoneRule* rule, UErrorCode& status) { + if (U_FAILURE(status)) { + return; + } + AnnualTimeZoneRule* atzrule = dynamic_cast(rule); + if (atzrule != NULL && atzrule->getEndYear() == AnnualTimeZoneRule::MAX_YEAR) { + // A final rule + if (fFinalRules == NULL) { + fFinalRules = new UVector(status); + if (U_FAILURE(status)) { + return; + } + } else if (fFinalRules->size() >= 2) { + // Cannot handle more than two final rules + status = U_INVALID_STATE_ERROR; + return; + } + fFinalRules->addElement((void*)rule, status); + } else { + // Non-final rule + if (fHistoricRules == NULL) { + fHistoricRules = new UVector(status); + if (U_FAILURE(status)) { + return; + } + } + fHistoricRules->addElement((void*)rule, status); + } + // Mark dirty, so transitions are recalculated at next complete() call + fUpToDate = FALSE; +} + + +void +RuleBasedTimeZone::completeConst(UErrorCode& status) const { + static UMutex gLock; + if (U_FAILURE(status)) { + return; + } + umtx_lock(&gLock); + if (!fUpToDate) { + RuleBasedTimeZone *ncThis = const_cast(this); + ncThis->complete(status); + } + umtx_unlock(&gLock); +} + +void +RuleBasedTimeZone::complete(UErrorCode& status) { + if (U_FAILURE(status)) { + return; + } + if (fUpToDate) { + return; + } + // Make sure either no final rules or a pair of AnnualTimeZoneRules + // are available. + if (fFinalRules != NULL && fFinalRules->size() != 2) { + status = U_INVALID_STATE_ERROR; + return; + } + + UBool *done = NULL; + // Create a TimezoneTransition and add to the list + if (fHistoricRules != NULL || fFinalRules != NULL) { + TimeZoneRule *curRule = fInitialRule; + UDate lastTransitionTime = MIN_MILLIS; + + // Build the transition array which represents historical time zone + // transitions. + if (fHistoricRules != NULL && fHistoricRules->size() > 0) { + int32_t i; + int32_t historicCount = fHistoricRules->size(); + done = (UBool*)uprv_malloc(sizeof(UBool) * historicCount); + if (done == NULL) { + status = U_MEMORY_ALLOCATION_ERROR; + goto cleanup; + } + for (i = 0; i < historicCount; i++) { + done[i] = FALSE; + } + while (TRUE) { + int32_t curStdOffset = curRule->getRawOffset(); + int32_t curDstSavings = curRule->getDSTSavings(); + UDate nextTransitionTime = MAX_MILLIS; + TimeZoneRule *nextRule = NULL; + TimeZoneRule *r = NULL; + UBool avail; + UDate tt; + UnicodeString curName, name; + curRule->getName(curName); + + for (i = 0; i < historicCount; i++) { + if (done[i]) { + continue; + } + r = (TimeZoneRule*)fHistoricRules->elementAt(i); + avail = r->getNextStart(lastTransitionTime, curStdOffset, curDstSavings, false, tt); + if (!avail) { + // No more transitions from this rule - skip this rule next time + done[i] = TRUE; + } else { + r->getName(name); + if (*r == *curRule || + (name == curName && r->getRawOffset() == curRule->getRawOffset() + && r->getDSTSavings() == curRule->getDSTSavings())) { + continue; + } + if (tt < nextTransitionTime) { + nextTransitionTime = tt; + nextRule = r; + } + } + } + + if (nextRule == NULL) { + // Check if all historic rules are done + UBool bDoneAll = TRUE; + for (int32_t j = 0; j < historicCount; j++) { + if (!done[j]) { + bDoneAll = FALSE; + break; + } + } + if (bDoneAll) { + break; + } + } + + if (fFinalRules != NULL) { + // Check if one of final rules has earlier transition date + for (i = 0; i < 2 /* fFinalRules->size() */; i++) { + TimeZoneRule *fr = (TimeZoneRule*)fFinalRules->elementAt(i); + if (*fr == *curRule) { + continue; + } + r = (TimeZoneRule*)fFinalRules->elementAt(i); + avail = r->getNextStart(lastTransitionTime, curStdOffset, curDstSavings, false, tt); + if (avail) { + if (tt < nextTransitionTime) { + nextTransitionTime = tt; + nextRule = r; + } + } + } + } + + if (nextRule == NULL) { + // Nothing more + break; + } + + if (fHistoricTransitions == NULL) { + fHistoricTransitions = new UVector(status); + if (U_FAILURE(status)) { + goto cleanup; + } + } + Transition *trst = (Transition*)uprv_malloc(sizeof(Transition)); + if (trst == NULL) { + status = U_MEMORY_ALLOCATION_ERROR; + goto cleanup; + } + trst->time = nextTransitionTime; + trst->from = curRule; + trst->to = nextRule; + fHistoricTransitions->addElement(trst, status); + if (U_FAILURE(status)) { + goto cleanup; + } + lastTransitionTime = nextTransitionTime; + curRule = nextRule; + } + } + if (fFinalRules != NULL) { + if (fHistoricTransitions == NULL) { + fHistoricTransitions = new UVector(status); + if (U_FAILURE(status)) { + goto cleanup; + } + } + // Append the first transition for each + TimeZoneRule *rule0 = (TimeZoneRule*)fFinalRules->elementAt(0); + TimeZoneRule *rule1 = (TimeZoneRule*)fFinalRules->elementAt(1); + UDate tt0, tt1; + UBool avail0 = rule0->getNextStart(lastTransitionTime, curRule->getRawOffset(), curRule->getDSTSavings(), false, tt0); + UBool avail1 = rule1->getNextStart(lastTransitionTime, curRule->getRawOffset(), curRule->getDSTSavings(), false, tt1); + if (!avail0 || !avail1) { + // Should not happen, because both rules are permanent + status = U_INVALID_STATE_ERROR; + goto cleanup; + } + Transition *final0 = (Transition*)uprv_malloc(sizeof(Transition)); + if (final0 == NULL) { + status = U_MEMORY_ALLOCATION_ERROR; + goto cleanup; + } + Transition *final1 = (Transition*)uprv_malloc(sizeof(Transition)); + if (final1 == NULL) { + uprv_free(final0); + status = U_MEMORY_ALLOCATION_ERROR; + goto cleanup; + } + if (tt0 < tt1) { + final0->time = tt0; + final0->from = curRule; + final0->to = rule0; + rule1->getNextStart(tt0, rule0->getRawOffset(), rule0->getDSTSavings(), false, final1->time); + final1->from = rule0; + final1->to = rule1; + } else { + final0->time = tt1; + final0->from = curRule; + final0->to = rule1; + rule0->getNextStart(tt1, rule1->getRawOffset(), rule1->getDSTSavings(), false, final1->time); + final1->from = rule1; + final1->to = rule0; + } + fHistoricTransitions->addElement(final0, status); + if (U_FAILURE(status)) { + goto cleanup; + } + fHistoricTransitions->addElement(final1, status); + if (U_FAILURE(status)) { + goto cleanup; + } + } + } + fUpToDate = TRUE; + if (done != NULL) { + uprv_free(done); + } + return; + +cleanup: + deleteTransitions(); + if (done != NULL) { + uprv_free(done); + } + fUpToDate = FALSE; +} + +RuleBasedTimeZone* +RuleBasedTimeZone::clone() const { + return new RuleBasedTimeZone(*this); +} + +int32_t +RuleBasedTimeZone::getOffset(uint8_t era, int32_t year, int32_t month, int32_t day, + uint8_t dayOfWeek, int32_t millis, UErrorCode& status) const { + if (U_FAILURE(status)) { + return 0; + } + if (month < UCAL_JANUARY || month > UCAL_DECEMBER) { + status = U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } else { + return getOffset(era, year, month, day, dayOfWeek, millis, + Grego::monthLength(year, month), status); + } +} + +int32_t +RuleBasedTimeZone::getOffset(uint8_t era, int32_t year, int32_t month, int32_t day, + uint8_t /*dayOfWeek*/, int32_t millis, + int32_t /*monthLength*/, UErrorCode& status) const { + // dayOfWeek and monthLength are unused + if (U_FAILURE(status)) { + return 0; + } + if (era == GregorianCalendar::BC) { + // Convert to extended year + year = 1 - year; + } + int32_t rawOffset, dstOffset; + UDate time = (UDate)Grego::fieldsToDay(year, month, day) * U_MILLIS_PER_DAY + millis; + getOffsetInternal(time, TRUE, kDaylight, kStandard, rawOffset, dstOffset, status); + if (U_FAILURE(status)) { + return 0; + } + return (rawOffset + dstOffset); +} + +void +RuleBasedTimeZone::getOffset(UDate date, UBool local, int32_t& rawOffset, + int32_t& dstOffset, UErrorCode& status) const { + getOffsetInternal(date, local, kFormer, kLatter, rawOffset, dstOffset, status); +} + +void +RuleBasedTimeZone::getOffsetFromLocal(UDate date, int32_t nonExistingTimeOpt, int32_t duplicatedTimeOpt, + int32_t& rawOffset, int32_t& dstOffset, UErrorCode& status) const { + getOffsetInternal(date, TRUE, nonExistingTimeOpt, duplicatedTimeOpt, rawOffset, dstOffset, status); +} + + +/* + * The internal getOffset implementation + */ +void +RuleBasedTimeZone::getOffsetInternal(UDate date, UBool local, + int32_t NonExistingTimeOpt, int32_t DuplicatedTimeOpt, + int32_t& rawOffset, int32_t& dstOffset, + UErrorCode& status) const { + rawOffset = 0; + dstOffset = 0; + + if (U_FAILURE(status)) { + return; + } + if (!fUpToDate) { + // Transitions are not yet resolved. We cannot do it here + // because this method is const. Thus, do nothing and return + // error status. + status = U_INVALID_STATE_ERROR; + return; + } + const TimeZoneRule *rule = NULL; + if (fHistoricTransitions == NULL) { + rule = fInitialRule; + } else { + UDate tstart = getTransitionTime((Transition*)fHistoricTransitions->elementAt(0), + local, NonExistingTimeOpt, DuplicatedTimeOpt); + if (date < tstart) { + rule = fInitialRule; + } else { + int32_t idx = fHistoricTransitions->size() - 1; + UDate tend = getTransitionTime((Transition*)fHistoricTransitions->elementAt(idx), + local, NonExistingTimeOpt, DuplicatedTimeOpt); + if (date > tend) { + if (fFinalRules != NULL) { + rule = findRuleInFinal(date, local, NonExistingTimeOpt, DuplicatedTimeOpt); + } + if (rule == NULL) { + // no final rules or the given time is before the first transition + // specified by the final rules -> use the last rule + rule = ((Transition*)fHistoricTransitions->elementAt(idx))->to; + } + } else { + // Find a historical transition + while (idx >= 0) { + if (date >= getTransitionTime((Transition*)fHistoricTransitions->elementAt(idx), + local, NonExistingTimeOpt, DuplicatedTimeOpt)) { + break; + } + idx--; + } + rule = ((Transition*)fHistoricTransitions->elementAt(idx))->to; + } + } + } + if (rule != NULL) { + rawOffset = rule->getRawOffset(); + dstOffset = rule->getDSTSavings(); + } +} + +void +RuleBasedTimeZone::setRawOffset(int32_t /*offsetMillis*/) { + // We don't support this operation at this moment. + // Nothing to do! +} + +int32_t +RuleBasedTimeZone::getRawOffset(void) const { + // Note: This implementation returns standard GMT offset + // as of current time. + UErrorCode status = U_ZERO_ERROR; + int32_t raw, dst; + getOffset(uprv_getUTCtime() * U_MILLIS_PER_SECOND, + FALSE, raw, dst, status); + return raw; +} + +UBool +RuleBasedTimeZone::useDaylightTime(void) const { + // Note: This implementation returns true when + // daylight saving time is used as of now or + // after the next transition. + UErrorCode status = U_ZERO_ERROR; + UDate now = uprv_getUTCtime() * U_MILLIS_PER_SECOND; + int32_t raw, dst; + getOffset(now, FALSE, raw, dst, status); + if (dst != 0) { + return TRUE; + } + // If DST is not used now, check if DST is used after the next transition + UDate time; + TimeZoneRule *from, *to; + UBool avail = findNext(now, FALSE, time, from, to); + if (avail && to->getDSTSavings() != 0) { + return TRUE; + } + return FALSE; +} + +UBool +RuleBasedTimeZone::inDaylightTime(UDate date, UErrorCode& status) const { + if (U_FAILURE(status)) { + return FALSE; + } + int32_t raw, dst; + getOffset(date, FALSE, raw, dst, status); + if (dst != 0) { + return TRUE; + } + return FALSE; +} + +UBool +RuleBasedTimeZone::hasSameRules(const TimeZone& other) const { + if (this == &other) { + return TRUE; + } + if (typeid(*this) != typeid(other)) { + return FALSE; + } + const RuleBasedTimeZone& that = (const RuleBasedTimeZone&)other; + if (!(*fInitialRule == *(that.fInitialRule))) { + return FALSE; + } + if (compareRules(fHistoricRules, that.fHistoricRules) + && compareRules(fFinalRules, that.fFinalRules)) { + return TRUE; + } + return FALSE; +} + +UBool +RuleBasedTimeZone::getNextTransition(UDate base, UBool inclusive, TimeZoneTransition& result) const { + UErrorCode status = U_ZERO_ERROR; + completeConst(status); + if (U_FAILURE(status)) { + return FALSE; + } + UDate transitionTime; + TimeZoneRule *fromRule, *toRule; + UBool found = findNext(base, inclusive, transitionTime, fromRule, toRule); + if (found) { + result.setTime(transitionTime); + result.setFrom((const TimeZoneRule&)*fromRule); + result.setTo((const TimeZoneRule&)*toRule); + return TRUE; + } + return FALSE; +} + +UBool +RuleBasedTimeZone::getPreviousTransition(UDate base, UBool inclusive, TimeZoneTransition& result) const { + UErrorCode status = U_ZERO_ERROR; + completeConst(status); + if (U_FAILURE(status)) { + return FALSE; + } + UDate transitionTime; + TimeZoneRule *fromRule, *toRule; + UBool found = findPrev(base, inclusive, transitionTime, fromRule, toRule); + if (found) { + result.setTime(transitionTime); + result.setFrom((const TimeZoneRule&)*fromRule); + result.setTo((const TimeZoneRule&)*toRule); + return TRUE; + } + return FALSE; +} + +int32_t +RuleBasedTimeZone::countTransitionRules(UErrorCode& /*status*/) const { + int32_t count = 0; + if (fHistoricRules != NULL) { + count += fHistoricRules->size(); + } + if (fFinalRules != NULL) { + count += fFinalRules->size(); + } + return count; +} + +void +RuleBasedTimeZone::getTimeZoneRules(const InitialTimeZoneRule*& initial, + const TimeZoneRule* trsrules[], + int32_t& trscount, + UErrorCode& status) const { + if (U_FAILURE(status)) { + return; + } + // Initial rule + initial = fInitialRule; + + // Transition rules + int32_t cnt = 0; + int32_t idx; + if (fHistoricRules != NULL && cnt < trscount) { + int32_t historicCount = fHistoricRules->size(); + idx = 0; + while (cnt < trscount && idx < historicCount) { + trsrules[cnt++] = (const TimeZoneRule*)fHistoricRules->elementAt(idx++); + } + } + if (fFinalRules != NULL && cnt < trscount) { + int32_t finalCount = fFinalRules->size(); + idx = 0; + while (cnt < trscount && idx < finalCount) { + trsrules[cnt++] = (const TimeZoneRule*)fFinalRules->elementAt(idx++); + } + } + // Set the result length + trscount = cnt; +} + +void +RuleBasedTimeZone::deleteRules(void) { + delete fInitialRule; + fInitialRule = NULL; + if (fHistoricRules != NULL) { + while (!fHistoricRules->isEmpty()) { + delete (TimeZoneRule*)(fHistoricRules->orphanElementAt(0)); + } + delete fHistoricRules; + fHistoricRules = NULL; + } + if (fFinalRules != NULL) { + while (!fFinalRules->isEmpty()) { + delete (AnnualTimeZoneRule*)(fFinalRules->orphanElementAt(0)); + } + delete fFinalRules; + fFinalRules = NULL; + } +} + +void +RuleBasedTimeZone::deleteTransitions(void) { + if (fHistoricTransitions != NULL) { + while (!fHistoricTransitions->isEmpty()) { + Transition *trs = (Transition*)fHistoricTransitions->orphanElementAt(0); + uprv_free(trs); + } + delete fHistoricTransitions; + } + fHistoricTransitions = NULL; +} + +UVector* +RuleBasedTimeZone::copyRules(UVector* source) { + if (source == NULL) { + return NULL; + } + UErrorCode ec = U_ZERO_ERROR; + int32_t size = source->size(); + UVector *rules = new UVector(size, ec); + if (U_FAILURE(ec)) { + return NULL; + } + int32_t i; + for (i = 0; i < size; i++) { + rules->addElement(((TimeZoneRule*)source->elementAt(i))->clone(), ec); + if (U_FAILURE(ec)) { + break; + } + } + if (U_FAILURE(ec)) { + // In case of error, clean up + for (i = 0; i < rules->size(); i++) { + TimeZoneRule *rule = (TimeZoneRule*)rules->orphanElementAt(i); + delete rule; + } + delete rules; + return NULL; + } + return rules; +} + +TimeZoneRule* +RuleBasedTimeZone::findRuleInFinal(UDate date, UBool local, + int32_t NonExistingTimeOpt, int32_t DuplicatedTimeOpt) const { + if (fFinalRules == NULL) { + return NULL; + } + + AnnualTimeZoneRule* fr0 = (AnnualTimeZoneRule*)fFinalRules->elementAt(0); + AnnualTimeZoneRule* fr1 = (AnnualTimeZoneRule*)fFinalRules->elementAt(1); + if (fr0 == NULL || fr1 == NULL) { + return NULL; + } + + UDate start0, start1; + UDate base; + int32_t localDelta; + + base = date; + if (local) { + localDelta = getLocalDelta(fr1->getRawOffset(), fr1->getDSTSavings(), + fr0->getRawOffset(), fr0->getDSTSavings(), + NonExistingTimeOpt, DuplicatedTimeOpt); + base -= localDelta; + } + UBool avail0 = fr0->getPreviousStart(base, fr1->getRawOffset(), fr1->getDSTSavings(), TRUE, start0); + + base = date; + if (local) { + localDelta = getLocalDelta(fr0->getRawOffset(), fr0->getDSTSavings(), + fr1->getRawOffset(), fr1->getDSTSavings(), + NonExistingTimeOpt, DuplicatedTimeOpt); + base -= localDelta; + } + UBool avail1 = fr1->getPreviousStart(base, fr0->getRawOffset(), fr0->getDSTSavings(), TRUE, start1); + + if (!avail0 || !avail1) { + if (avail0) { + return fr0; + } else if (avail1) { + return fr1; + } + // Both rules take effect after the given time + return NULL; + } + + return (start0 > start1) ? fr0 : fr1; +} + +UBool +RuleBasedTimeZone::findNext(UDate base, UBool inclusive, UDate& transitionTime, + TimeZoneRule*& fromRule, TimeZoneRule*& toRule) const { + if (fHistoricTransitions == NULL) { + return FALSE; + } + UBool isFinal = FALSE; + UBool found = FALSE; + Transition result; + Transition *tzt = (Transition*)fHistoricTransitions->elementAt(0); + UDate tt = tzt->time; + if (tt > base || (inclusive && tt == base)) { + result = *tzt; + found = TRUE; + } else { + int32_t idx = fHistoricTransitions->size() - 1; + tzt = (Transition*)fHistoricTransitions->elementAt(idx); + tt = tzt->time; + if (inclusive && tt == base) { + result = *tzt; + found = TRUE; + } else if (tt <= base) { + if (fFinalRules != NULL) { + // Find a transion time with finalRules + TimeZoneRule *r0 = (TimeZoneRule*)fFinalRules->elementAt(0); + TimeZoneRule *r1 = (TimeZoneRule*)fFinalRules->elementAt(1); + UDate start0, start1; + UBool avail0 = r0->getNextStart(base, r1->getRawOffset(), r1->getDSTSavings(), inclusive, start0); + UBool avail1 = r1->getNextStart(base, r0->getRawOffset(), r0->getDSTSavings(), inclusive, start1); + // avail0/avail1 should be always TRUE + if (!avail0 && !avail1) { + return FALSE; + } + if (!avail1 || start0 < start1) { + result.time = start0; + result.from = r1; + result.to = r0; + } else { + result.time = start1; + result.from = r0; + result.to = r1; + } + isFinal = TRUE; + found = TRUE; + } + } else { + // Find a transition within the historic transitions + idx--; + Transition *prev = tzt; + while (idx > 0) { + tzt = (Transition*)fHistoricTransitions->elementAt(idx); + tt = tzt->time; + if (tt < base || (!inclusive && tt == base)) { + break; + } + idx--; + prev = tzt; + } + result.time = prev->time; + result.from = prev->from; + result.to = prev->to; + found = TRUE; + } + } + if (found) { + // For now, this implementation ignore transitions with only zone name changes. + if (result.from->getRawOffset() == result.to->getRawOffset() + && result.from->getDSTSavings() == result.to->getDSTSavings()) { + if (isFinal) { + return FALSE; + } else { + // No offset changes. Try next one if not final + return findNext(result.time, FALSE /* always exclusive */, + transitionTime, fromRule, toRule); + } + } + transitionTime = result.time; + fromRule = result.from; + toRule = result.to; + return TRUE; + } + return FALSE; +} + +UBool +RuleBasedTimeZone::findPrev(UDate base, UBool inclusive, UDate& transitionTime, + TimeZoneRule*& fromRule, TimeZoneRule*& toRule) const { + if (fHistoricTransitions == NULL) { + return FALSE; + } + UBool found = FALSE; + Transition result; + Transition *tzt = (Transition*)fHistoricTransitions->elementAt(0); + UDate tt = tzt->time; + if (inclusive && tt == base) { + result = *tzt; + found = TRUE; + } else if (tt < base) { + int32_t idx = fHistoricTransitions->size() - 1; + tzt = (Transition*)fHistoricTransitions->elementAt(idx); + tt = tzt->time; + if (inclusive && tt == base) { + result = *tzt; + found = TRUE; + } else if (tt < base) { + if (fFinalRules != NULL) { + // Find a transion time with finalRules + TimeZoneRule *r0 = (TimeZoneRule*)fFinalRules->elementAt(0); + TimeZoneRule *r1 = (TimeZoneRule*)fFinalRules->elementAt(1); + UDate start0, start1; + UBool avail0 = r0->getPreviousStart(base, r1->getRawOffset(), r1->getDSTSavings(), inclusive, start0); + UBool avail1 = r1->getPreviousStart(base, r0->getRawOffset(), r0->getDSTSavings(), inclusive, start1); + // avail0/avail1 should be always TRUE + if (!avail0 && !avail1) { + return FALSE; + } + if (!avail1 || start0 > start1) { + result.time = start0; + result.from = r1; + result.to = r0; + } else { + result.time = start1; + result.from = r0; + result.to = r1; + } + } else { + result = *tzt; + } + found = TRUE; + } else { + // Find a transition within the historic transitions + idx--; + while (idx >= 0) { + tzt = (Transition*)fHistoricTransitions->elementAt(idx); + tt = tzt->time; + if (tt < base || (inclusive && tt == base)) { + break; + } + idx--; + } + result = *tzt; + found = TRUE; + } + } + if (found) { + // For now, this implementation ignore transitions with only zone name changes. + if (result.from->getRawOffset() == result.to->getRawOffset() + && result.from->getDSTSavings() == result.to->getDSTSavings()) { + // No offset changes. Try next one if not final + return findPrev(result.time, FALSE /* always exclusive */, + transitionTime, fromRule, toRule); + } + transitionTime = result.time; + fromRule = result.from; + toRule = result.to; + return TRUE; + } + return FALSE; +} + +UDate +RuleBasedTimeZone::getTransitionTime(Transition* transition, UBool local, + int32_t NonExistingTimeOpt, int32_t DuplicatedTimeOpt) const { + UDate time = transition->time; + if (local) { + time += getLocalDelta(transition->from->getRawOffset(), transition->from->getDSTSavings(), + transition->to->getRawOffset(), transition->to->getDSTSavings(), + NonExistingTimeOpt, DuplicatedTimeOpt); + } + return time; +} + +int32_t +RuleBasedTimeZone::getLocalDelta(int32_t rawBefore, int32_t dstBefore, int32_t rawAfter, int32_t dstAfter, + int32_t NonExistingTimeOpt, int32_t DuplicatedTimeOpt) const { + int32_t delta = 0; + + int32_t offsetBefore = rawBefore + dstBefore; + int32_t offsetAfter = rawAfter + dstAfter; + + UBool dstToStd = (dstBefore != 0) && (dstAfter == 0); + UBool stdToDst = (dstBefore == 0) && (dstAfter != 0); + + if (offsetAfter - offsetBefore >= 0) { + // Positive transition, which makes a non-existing local time range + if (((NonExistingTimeOpt & kStdDstMask) == kStandard && dstToStd) + || ((NonExistingTimeOpt & kStdDstMask) == kDaylight && stdToDst)) { + delta = offsetBefore; + } else if (((NonExistingTimeOpt & kStdDstMask) == kStandard && stdToDst) + || ((NonExistingTimeOpt & kStdDstMask) == kDaylight && dstToStd)) { + delta = offsetAfter; + } else if ((NonExistingTimeOpt & kFormerLatterMask) == kLatter) { + delta = offsetBefore; + } else { + // Interprets the time with rule before the transition, + // default for non-existing time range + delta = offsetAfter; + } + } else { + // Negative transition, which makes a duplicated local time range + if (((DuplicatedTimeOpt & kStdDstMask) == kStandard && dstToStd) + || ((DuplicatedTimeOpt & kStdDstMask) == kDaylight && stdToDst)) { + delta = offsetAfter; + } else if (((DuplicatedTimeOpt & kStdDstMask) == kStandard && stdToDst) + || ((DuplicatedTimeOpt & kStdDstMask) == kDaylight && dstToStd)) { + delta = offsetBefore; + } else if ((DuplicatedTimeOpt & kFormerLatterMask) == kFormer) { + delta = offsetBefore; + } else { + // Interprets the time with rule after the transition, + // default for duplicated local time range + delta = offsetAfter; + } + } + return delta; +} + +U_NAMESPACE_END + +#endif /* #if !UCONFIG_NO_FORMATTING */ + +//eof + diff --git a/libicui18n/libicui18n/rbtz.cpp.patch b/libicui18n/libicui18n/rbtz.cpp.patch new file mode 100644 index 0000000..551cebe --- /dev/null +++ b/libicui18n/libicui18n/rbtz.cpp.patch @@ -0,0 +1,29 @@ +--- libicui18n/i18n/rbtz.cpp 2019-12-23 14:38:40.227889254 +0300 ++++ libicui18n/rbtz.cpp 2020-07-21 14:30:49.408841121 +0300 +@@ -76,7 +76,7 @@ RuleBasedTimeZone::~RuleBasedTimeZone() + + RuleBasedTimeZone& + RuleBasedTimeZone::operator=(const RuleBasedTimeZone& right) { +- if (*this != right) { ++ if (!(*this == right)) { + BasicTimeZone::operator=(right); + deleteRules(); + fInitialRule = right.fInitialRule->clone(); +@@ -98,7 +98,7 @@ RuleBasedTimeZone::operator==(const Time + return FALSE; + } + RuleBasedTimeZone *rbtz = (RuleBasedTimeZone*)&that; +- if (*fInitialRule != *(rbtz->fInitialRule)) { ++ if (!(*fInitialRule == *(rbtz->fInitialRule))) { + return FALSE; + } + if (compareRules(fHistoricRules, rbtz->fHistoricRules) +@@ -532,7 +532,7 @@ RuleBasedTimeZone::hasSameRules(const Ti + return FALSE; + } + const RuleBasedTimeZone& that = (const RuleBasedTimeZone&)other; +- if (*fInitialRule != *(that.fInitialRule)) { ++ if (!(*fInitialRule == *(that.fInitialRule))) { + return FALSE; + } + if (compareRules(fHistoricRules, that.fHistoricRules) diff --git a/libicui18n/libicui18n/stsearch.cpp b/libicui18n/libicui18n/stsearch.cpp new file mode 100644 index 0000000..a107194 --- /dev/null +++ b/libicui18n/libicui18n/stsearch.cpp @@ -0,0 +1,483 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +********************************************************************** +* Copyright (C) 2001-2014 IBM and others. All rights reserved. +********************************************************************** +* Date Name Description +* 03/22/2000 helena Creation. +********************************************************************** +*/ + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_COLLATION && !UCONFIG_NO_BREAK_ITERATION + +#include "unicode/stsearch.h" +#include "usrchimp.h" +#include "cmemory.h" + +U_NAMESPACE_BEGIN + +UOBJECT_DEFINE_RTTI_IMPLEMENTATION(StringSearch) + +// public constructors and destructors ----------------------------------- + +StringSearch::StringSearch(const UnicodeString &pattern, + const UnicodeString &text, + const Locale &locale, + BreakIterator *breakiter, + UErrorCode &status) : + SearchIterator(text, breakiter), + m_pattern_(pattern) +{ + if (U_FAILURE(status)) { + m_strsrch_ = NULL; + return; + } + + m_strsrch_ = usearch_open(m_pattern_.getBuffer(), m_pattern_.length(), + m_text_.getBuffer(), m_text_.length(), + locale.getName(), (UBreakIterator *)breakiter, + &status); + uprv_free(m_search_); + m_search_ = NULL; + + if (U_SUCCESS(status)) { + // m_search_ has been created by the base SearchIterator class + m_search_ = m_strsrch_->search; + } +} + +StringSearch::StringSearch(const UnicodeString &pattern, + const UnicodeString &text, + RuleBasedCollator *coll, + BreakIterator *breakiter, + UErrorCode &status) : + SearchIterator(text, breakiter), + m_pattern_(pattern) +{ + if (U_FAILURE(status)) { + m_strsrch_ = NULL; + return; + } + if (coll == NULL) { + status = U_ILLEGAL_ARGUMENT_ERROR; + m_strsrch_ = NULL; + return; + } + m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(), + m_pattern_.length(), + m_text_.getBuffer(), + m_text_.length(), coll->toUCollator(), + (UBreakIterator *)breakiter, + &status); + uprv_free(m_search_); + m_search_ = NULL; + + if (U_SUCCESS(status)) { + // m_search_ has been created by the base SearchIterator class + m_search_ = m_strsrch_->search; + } +} + +StringSearch::StringSearch(const UnicodeString &pattern, + CharacterIterator &text, + const Locale &locale, + BreakIterator *breakiter, + UErrorCode &status) : + SearchIterator(text, breakiter), + m_pattern_(pattern) +{ + if (U_FAILURE(status)) { + m_strsrch_ = NULL; + return; + } + m_strsrch_ = usearch_open(m_pattern_.getBuffer(), m_pattern_.length(), + m_text_.getBuffer(), m_text_.length(), + locale.getName(), (UBreakIterator *)breakiter, + &status); + uprv_free(m_search_); + m_search_ = NULL; + + if (U_SUCCESS(status)) { + // m_search_ has been created by the base SearchIterator class + m_search_ = m_strsrch_->search; + } +} + +StringSearch::StringSearch(const UnicodeString &pattern, + CharacterIterator &text, + RuleBasedCollator *coll, + BreakIterator *breakiter, + UErrorCode &status) : + SearchIterator(text, breakiter), + m_pattern_(pattern) +{ + if (U_FAILURE(status)) { + m_strsrch_ = NULL; + return; + } + if (coll == NULL) { + status = U_ILLEGAL_ARGUMENT_ERROR; + m_strsrch_ = NULL; + return; + } + m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(), + m_pattern_.length(), + m_text_.getBuffer(), + m_text_.length(), coll->toUCollator(), + (UBreakIterator *)breakiter, + &status); + uprv_free(m_search_); + m_search_ = NULL; + + if (U_SUCCESS(status)) { + // m_search_ has been created by the base SearchIterator class + m_search_ = m_strsrch_->search; + } +} + +StringSearch::StringSearch(const StringSearch &that) : + SearchIterator(that.m_text_, that.m_breakiterator_), + m_pattern_(that.m_pattern_) +{ + UErrorCode status = U_ZERO_ERROR; + + // Free m_search_ from the superclass + uprv_free(m_search_); + m_search_ = NULL; + + if (that.m_strsrch_ == NULL) { + // This was not a good copy + m_strsrch_ = NULL; + } + else { + // Make a deep copy + m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(), + m_pattern_.length(), + m_text_.getBuffer(), + m_text_.length(), + that.m_strsrch_->collator, + (UBreakIterator *)that.m_breakiterator_, + &status); + if (U_SUCCESS(status)) { + // m_search_ has been created by the base SearchIterator class + m_search_ = m_strsrch_->search; + } + } +} + +StringSearch::~StringSearch() +{ + if (m_strsrch_ != NULL) { + usearch_close(m_strsrch_); + m_search_ = NULL; + } +} + +StringSearch * +StringSearch::clone() const { + return new StringSearch(*this); +} + +// operator overloading --------------------------------------------- +StringSearch & StringSearch::operator=(const StringSearch &that) +{ + if (!((*this) == that)) { + UErrorCode status = U_ZERO_ERROR; + m_text_ = that.m_text_; + m_breakiterator_ = that.m_breakiterator_; + m_pattern_ = that.m_pattern_; + // all m_search_ in the parent class is linked up with m_strsrch_ + usearch_close(m_strsrch_); + m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(), + m_pattern_.length(), + m_text_.getBuffer(), + m_text_.length(), + that.m_strsrch_->collator, + NULL, &status); + // Check null pointer + if (m_strsrch_ != NULL) { + m_search_ = m_strsrch_->search; + } + } + return *this; +} + +UBool StringSearch::operator==(const SearchIterator &that) const +{ + if (this == &that) { + return TRUE; + } + if (SearchIterator::operator ==(that)) { + StringSearch &thatsrch = (StringSearch &)that; + return (this->m_pattern_ == thatsrch.m_pattern_ && + this->m_strsrch_->collator == thatsrch.m_strsrch_->collator); + } + return FALSE; +} + +// public get and set methods ---------------------------------------- + +void StringSearch::setOffset(int32_t position, UErrorCode &status) +{ + // status checked in usearch_setOffset + usearch_setOffset(m_strsrch_, position, &status); +} + +int32_t StringSearch::getOffset(void) const +{ + return usearch_getOffset(m_strsrch_); +} + +void StringSearch::setText(const UnicodeString &text, UErrorCode &status) +{ + if (U_SUCCESS(status)) { + m_text_ = text; + usearch_setText(m_strsrch_, text.getBuffer(), text.length(), &status); + } +} + +void StringSearch::setText(CharacterIterator &text, UErrorCode &status) +{ + if (U_SUCCESS(status)) { + text.getText(m_text_); + usearch_setText(m_strsrch_, m_text_.getBuffer(), m_text_.length(), &status); + } +} + +RuleBasedCollator * StringSearch::getCollator() const +{ + // Note the const_cast. It would be cleaner if this const method returned a const collator. + return RuleBasedCollator::rbcFromUCollator(const_cast(m_strsrch_->collator)); +} + +void StringSearch::setCollator(RuleBasedCollator *coll, UErrorCode &status) +{ + if (U_SUCCESS(status)) { + usearch_setCollator(m_strsrch_, coll->toUCollator(), &status); + } +} + +void StringSearch::setPattern(const UnicodeString &pattern, + UErrorCode &status) +{ + if (U_SUCCESS(status)) { + m_pattern_ = pattern; + usearch_setPattern(m_strsrch_, m_pattern_.getBuffer(), m_pattern_.length(), + &status); + } +} + +const UnicodeString & StringSearch::getPattern() const +{ + return m_pattern_; +} + +// public methods ---------------------------------------------------- + +void StringSearch::reset() +{ + usearch_reset(m_strsrch_); +} + +StringSearch * StringSearch::safeClone() const +{ + UErrorCode status = U_ZERO_ERROR; + StringSearch *result = new StringSearch(m_pattern_, m_text_, + getCollator(), + m_breakiterator_, + status); + /* test for NULL */ + if (result == 0) { + status = U_MEMORY_ALLOCATION_ERROR; + return 0; + } + result->setOffset(getOffset(), status); + result->setMatchStart(m_strsrch_->search->matchedIndex); + result->setMatchLength(m_strsrch_->search->matchedLength); + if (U_FAILURE(status)) { + return NULL; + } + return result; +} + +// protected method ------------------------------------------------- + +int32_t StringSearch::handleNext(int32_t position, UErrorCode &status) +{ + // values passed here are already in the pre-shift position + if (U_SUCCESS(status)) { + if (m_strsrch_->pattern.cesLength == 0) { + m_search_->matchedIndex = + m_search_->matchedIndex == USEARCH_DONE ? + getOffset() : m_search_->matchedIndex + 1; + m_search_->matchedLength = 0; + ucol_setOffset(m_strsrch_->textIter, m_search_->matchedIndex, + &status); + if (m_search_->matchedIndex == m_search_->textLength) { + m_search_->matchedIndex = USEARCH_DONE; + } + } + else { + // looking at usearch.cpp, this part is shifted out to + // StringSearch instead of SearchIterator because m_strsrch_ is + // not accessible in SearchIterator +#if 0 + if (position + m_strsrch_->pattern.defaultShiftSize + > m_search_->textLength) { + setMatchNotFound(); + return USEARCH_DONE; + } +#endif + if (m_search_->matchedLength <= 0) { + // the flipping direction issue has already been handled + // in next() + // for boundary check purposes. this will ensure that the + // next match will not preceed the current offset + // note search->matchedIndex will always be set to something + // in the code + m_search_->matchedIndex = position - 1; + } + + ucol_setOffset(m_strsrch_->textIter, position, &status); + +#if 0 + for (;;) { + if (m_search_->isCanonicalMatch) { + // can't use exact here since extra accents are allowed. + usearch_handleNextCanonical(m_strsrch_, &status); + } + else { + usearch_handleNextExact(m_strsrch_, &status); + } + if (U_FAILURE(status)) { + return USEARCH_DONE; + } + if (m_breakiterator_ == NULL +#if !UCONFIG_NO_BREAK_ITERATION + || + m_search_->matchedIndex == USEARCH_DONE || + (m_breakiterator_->isBoundary(m_search_->matchedIndex) && + m_breakiterator_->isBoundary(m_search_->matchedIndex + + m_search_->matchedLength)) +#endif + ) { + if (m_search_->matchedIndex == USEARCH_DONE) { + ucol_setOffset(m_strsrch_->textIter, + m_search_->textLength, &status); + } + else { + ucol_setOffset(m_strsrch_->textIter, + m_search_->matchedIndex, &status); + } + return m_search_->matchedIndex; + } + } +#else + // if m_strsrch_->breakIter is always the same as m_breakiterator_ + // then we don't need to check the match boundaries here because + // usearch_handleNextXXX will already have done it. + if (m_search_->isCanonicalMatch) { + // *could* actually use exact here 'cause no extra accents allowed... + usearch_handleNextCanonical(m_strsrch_, &status); + } else { + usearch_handleNextExact(m_strsrch_, &status); + } + + if (U_FAILURE(status)) { + return USEARCH_DONE; + } + + if (m_search_->matchedIndex == USEARCH_DONE) { + ucol_setOffset(m_strsrch_->textIter, m_search_->textLength, &status); + } else { + ucol_setOffset(m_strsrch_->textIter, m_search_->matchedIndex, &status); + } + + return m_search_->matchedIndex; +#endif + } + } + return USEARCH_DONE; +} + +int32_t StringSearch::handlePrev(int32_t position, UErrorCode &status) +{ + // values passed here are already in the pre-shift position + if (U_SUCCESS(status)) { + if (m_strsrch_->pattern.cesLength == 0) { + m_search_->matchedIndex = + (m_search_->matchedIndex == USEARCH_DONE ? getOffset() : + m_search_->matchedIndex); + if (m_search_->matchedIndex == 0) { + setMatchNotFound(); + } + else { + m_search_->matchedIndex --; + ucol_setOffset(m_strsrch_->textIter, m_search_->matchedIndex, + &status); + m_search_->matchedLength = 0; + } + } + else { + // looking at usearch.cpp, this part is shifted out to + // StringSearch instead of SearchIterator because m_strsrch_ is + // not accessible in SearchIterator +#if 0 + if (!m_search_->isOverlap && + position - m_strsrch_->pattern.defaultShiftSize < 0) { + setMatchNotFound(); + return USEARCH_DONE; + } + + for (;;) { + if (m_search_->isCanonicalMatch) { + // can't use exact here since extra accents are allowed. + usearch_handlePreviousCanonical(m_strsrch_, &status); + } + else { + usearch_handlePreviousExact(m_strsrch_, &status); + } + if (U_FAILURE(status)) { + return USEARCH_DONE; + } + if (m_breakiterator_ == NULL +#if !UCONFIG_NO_BREAK_ITERATION + || + m_search_->matchedIndex == USEARCH_DONE || + (m_breakiterator_->isBoundary(m_search_->matchedIndex) && + m_breakiterator_->isBoundary(m_search_->matchedIndex + + m_search_->matchedLength)) +#endif + ) { + return m_search_->matchedIndex; + } + } +#else + ucol_setOffset(m_strsrch_->textIter, position, &status); + + if (m_search_->isCanonicalMatch) { + // *could* use exact match here since extra accents *not* allowed! + usearch_handlePreviousCanonical(m_strsrch_, &status); + } else { + usearch_handlePreviousExact(m_strsrch_, &status); + } + + if (U_FAILURE(status)) { + return USEARCH_DONE; + } + + return m_search_->matchedIndex; +#endif + } + + return m_search_->matchedIndex; + } + return USEARCH_DONE; +} + +U_NAMESPACE_END + +#endif /* #if !UCONFIG_NO_COLLATION */ diff --git a/libicui18n/libicui18n/stsearch.cpp.patch b/libicui18n/libicui18n/stsearch.cpp.patch new file mode 100644 index 0000000..151b414 --- /dev/null +++ b/libicui18n/libicui18n/stsearch.cpp.patch @@ -0,0 +1,11 @@ +--- libicui18n/i18n/stsearch.cpp 2019-12-23 14:38:40.234889235 +0300 ++++ libicui18n/stsearch.cpp 2020-07-21 13:21:49.621459123 +0300 +@@ -184,7 +184,7 @@ StringSearch::clone() const { + // operator overloading --------------------------------------------- + StringSearch & StringSearch::operator=(const StringSearch &that) + { +- if ((*this) != that) { ++ if (!((*this) == that)) { + UErrorCode status = U_ZERO_ERROR; + m_text_ = that.m_text_; + m_breakiterator_ = that.m_breakiterator_; diff --git a/libicui18n/libicui18n/vtzone.cpp b/libicui18n/libicui18n/vtzone.cpp new file mode 100644 index 0000000..ab6825e --- /dev/null +++ b/libicui18n/libicui18n/vtzone.cpp @@ -0,0 +1,2633 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* Copyright (C) 2007-2016, International Business Machines Corporation and +* others. All Rights Reserved. +******************************************************************************* +*/ + +#include "utypeinfo.h" // for 'typeid' to work + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_FORMATTING + +#include "unicode/vtzone.h" +#include "unicode/rbtz.h" +#include "unicode/ucal.h" +#include "unicode/ures.h" +#include "cmemory.h" +#include "uvector.h" +#include "gregoimp.h" +#include "uassert.h" + +U_NAMESPACE_BEGIN + +// This is the deleter that will be use to remove TimeZoneRule +U_CDECL_BEGIN +static void U_CALLCONV +deleteTimeZoneRule(void* obj) { + delete (TimeZoneRule*) obj; +} +U_CDECL_END + +// Smybol characters used by RFC2445 VTIMEZONE +static const UChar COLON = 0x3A; /* : */ +static const UChar SEMICOLON = 0x3B; /* ; */ +static const UChar EQUALS_SIGN = 0x3D; /* = */ +static const UChar COMMA = 0x2C; /* , */ +static const UChar PLUS = 0x2B; /* + */ +static const UChar MINUS = 0x2D; /* - */ + +// RFC2445 VTIMEZONE tokens +static const UChar ICAL_BEGIN_VTIMEZONE[] = {0x42, 0x45, 0x47, 0x49, 0x4E, 0x3A, 0x56, 0x54, 0x49, 0x4D, 0x45, 0x5A, 0x4F, 0x4E, 0x45, 0}; /* "BEGIN:VTIMEZONE" */ +static const UChar ICAL_END_VTIMEZONE[] = {0x45, 0x4E, 0x44, 0x3A, 0x56, 0x54, 0x49, 0x4D, 0x45, 0x5A, 0x4F, 0x4E, 0x45, 0}; /* "END:VTIMEZONE" */ +static const UChar ICAL_BEGIN[] = {0x42, 0x45, 0x47, 0x49, 0x4E, 0}; /* "BEGIN" */ +static const UChar ICAL_END[] = {0x45, 0x4E, 0x44, 0}; /* "END" */ +static const UChar ICAL_VTIMEZONE[] = {0x56, 0x54, 0x49, 0x4D, 0x45, 0x5A, 0x4F, 0x4E, 0x45, 0}; /* "VTIMEZONE" */ +static const UChar ICAL_TZID[] = {0x54, 0x5A, 0x49, 0x44, 0}; /* "TZID" */ +static const UChar ICAL_STANDARD[] = {0x53, 0x54, 0x41, 0x4E, 0x44, 0x41, 0x52, 0x44, 0}; /* "STANDARD" */ +static const UChar ICAL_DAYLIGHT[] = {0x44, 0x41, 0x59, 0x4C, 0x49, 0x47, 0x48, 0x54, 0}; /* "DAYLIGHT" */ +static const UChar ICAL_DTSTART[] = {0x44, 0x54, 0x53, 0x54, 0x41, 0x52, 0x54, 0}; /* "DTSTART" */ +static const UChar ICAL_TZOFFSETFROM[] = {0x54, 0x5A, 0x4F, 0x46, 0x46, 0x53, 0x45, 0x54, 0x46, 0x52, 0x4F, 0x4D, 0}; /* "TZOFFSETFROM" */ +static const UChar ICAL_TZOFFSETTO[] = {0x54, 0x5A, 0x4F, 0x46, 0x46, 0x53, 0x45, 0x54, 0x54, 0x4F, 0}; /* "TZOFFSETTO" */ +static const UChar ICAL_RDATE[] = {0x52, 0x44, 0x41, 0x54, 0x45, 0}; /* "RDATE" */ +static const UChar ICAL_RRULE[] = {0x52, 0x52, 0x55, 0x4C, 0x45, 0}; /* "RRULE" */ +static const UChar ICAL_TZNAME[] = {0x54, 0x5A, 0x4E, 0x41, 0x4D, 0x45, 0}; /* "TZNAME" */ +static const UChar ICAL_TZURL[] = {0x54, 0x5A, 0x55, 0x52, 0x4C, 0}; /* "TZURL" */ +static const UChar ICAL_LASTMOD[] = {0x4C, 0x41, 0x53, 0x54, 0x2D, 0x4D, 0x4F, 0x44, 0x49, 0x46, 0x49, 0x45, 0x44, 0}; /* "LAST-MODIFIED" */ + +static const UChar ICAL_FREQ[] = {0x46, 0x52, 0x45, 0x51, 0}; /* "FREQ" */ +static const UChar ICAL_UNTIL[] = {0x55, 0x4E, 0x54, 0x49, 0x4C, 0}; /* "UNTIL" */ +static const UChar ICAL_YEARLY[] = {0x59, 0x45, 0x41, 0x52, 0x4C, 0x59, 0}; /* "YEARLY" */ +static const UChar ICAL_BYMONTH[] = {0x42, 0x59, 0x4D, 0x4F, 0x4E, 0x54, 0x48, 0}; /* "BYMONTH" */ +static const UChar ICAL_BYDAY[] = {0x42, 0x59, 0x44, 0x41, 0x59, 0}; /* "BYDAY" */ +static const UChar ICAL_BYMONTHDAY[] = {0x42, 0x59, 0x4D, 0x4F, 0x4E, 0x54, 0x48, 0x44, 0x41, 0x59, 0}; /* "BYMONTHDAY" */ + +static const UChar ICAL_NEWLINE[] = {0x0D, 0x0A, 0}; /* CRLF */ + +static const UChar ICAL_DOW_NAMES[7][3] = { + {0x53, 0x55, 0}, /* "SU" */ + {0x4D, 0x4F, 0}, /* "MO" */ + {0x54, 0x55, 0}, /* "TU" */ + {0x57, 0x45, 0}, /* "WE" */ + {0x54, 0x48, 0}, /* "TH" */ + {0x46, 0x52, 0}, /* "FR" */ + {0x53, 0x41, 0} /* "SA" */}; + +// Month length for non-leap year +static const int32_t MONTHLENGTH[] = {31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31}; + +// ICU custom property +static const UChar ICU_TZINFO_PROP[] = {0x58, 0x2D, 0x54, 0x5A, 0x49, 0x4E, 0x46, 0x4F, 0x3A, 0}; /* "X-TZINFO:" */ +static const UChar ICU_TZINFO_PARTIAL[] = {0x2F, 0x50, 0x61, 0x72, 0x74, 0x69, 0x61, 0x6C, 0x40, 0}; /* "/Partial@" */ +static const UChar ICU_TZINFO_SIMPLE[] = {0x2F, 0x53, 0x69, 0x6D, 0x70, 0x6C, 0x65, 0x40, 0}; /* "/Simple@" */ + + +/* + * Simple fixed digit ASCII number to integer converter + */ +static int32_t parseAsciiDigits(const UnicodeString& str, int32_t start, int32_t length, UErrorCode& status) { + if (U_FAILURE(status)) { + return 0; + } + if (length <= 0 || str.length() < start || (start + length) > str.length()) { + status = U_INVALID_FORMAT_ERROR; + return 0; + } + int32_t sign = 1; + if (str.charAt(start) == PLUS) { + start++; + length--; + } else if (str.charAt(start) == MINUS) { + sign = -1; + start++; + length--; + } + int32_t num = 0; + for (int32_t i = 0; i < length; i++) { + int32_t digit = str.charAt(start + i) - 0x0030; + if (digit < 0 || digit > 9) { + status = U_INVALID_FORMAT_ERROR; + return 0; + } + num = 10 * num + digit; + } + return sign * num; +} + +static UnicodeString& appendAsciiDigits(int32_t number, uint8_t length, UnicodeString& str) { + UBool negative = FALSE; + int32_t digits[10]; // max int32_t is 10 decimal digits + int32_t i; + + if (number < 0) { + negative = TRUE; + number *= -1; + } + + length = length > 10 ? 10 : length; + if (length == 0) { + // variable length + i = 0; + do { + digits[i++] = number % 10; + number /= 10; + } while (number != 0); + length = static_cast(i); + } else { + // fixed digits + for (i = 0; i < length; i++) { + digits[i] = number % 10; + number /= 10; + } + } + if (negative) { + str.append(MINUS); + } + for (i = length - 1; i >= 0; i--) { + str.append((UChar)(digits[i] + 0x0030)); + } + return str; +} + +static UnicodeString& appendMillis(UDate date, UnicodeString& str) { + UBool negative = FALSE; + int32_t digits[20]; // max int64_t is 20 decimal digits + int32_t i; + int64_t number; + + if (date < MIN_MILLIS) { + number = (int64_t)MIN_MILLIS; + } else if (date > MAX_MILLIS) { + number = (int64_t)MAX_MILLIS; + } else { + number = (int64_t)date; + } + if (number < 0) { + negative = TRUE; + number *= -1; + } + i = 0; + do { + digits[i++] = (int32_t)(number % 10); + number /= 10; + } while (number != 0); + + if (negative) { + str.append(MINUS); + } + i--; + while (i >= 0) { + str.append((UChar)(digits[i--] + 0x0030)); + } + return str; +} + +/* + * Convert date/time to RFC2445 Date-Time form #1 DATE WITH LOCAL TIME + */ +static UnicodeString& getDateTimeString(UDate time, UnicodeString& str) { + int32_t year, month, dom, dow, doy, mid; + Grego::timeToFields(time, year, month, dom, dow, doy, mid); + + str.remove(); + appendAsciiDigits(year, 4, str); + appendAsciiDigits(month + 1, 2, str); + appendAsciiDigits(dom, 2, str); + str.append((UChar)0x0054 /*'T'*/); + + int32_t t = mid; + int32_t hour = t / U_MILLIS_PER_HOUR; + t %= U_MILLIS_PER_HOUR; + int32_t min = t / U_MILLIS_PER_MINUTE; + t %= U_MILLIS_PER_MINUTE; + int32_t sec = t / U_MILLIS_PER_SECOND; + + appendAsciiDigits(hour, 2, str); + appendAsciiDigits(min, 2, str); + appendAsciiDigits(sec, 2, str); + return str; +} + +/* + * Convert date/time to RFC2445 Date-Time form #2 DATE WITH UTC TIME + */ +static UnicodeString& getUTCDateTimeString(UDate time, UnicodeString& str) { + getDateTimeString(time, str); + str.append((UChar)0x005A /*'Z'*/); + return str; +} + +/* + * Parse RFC2445 Date-Time form #1 DATE WITH LOCAL TIME and + * #2 DATE WITH UTC TIME + */ +static UDate parseDateTimeString(const UnicodeString& str, int32_t offset, UErrorCode& status) { + if (U_FAILURE(status)) { + return 0.0; + } + + int32_t year = 0, month = 0, day = 0, hour = 0, min = 0, sec = 0; + UBool isUTC = FALSE; + UBool isValid = FALSE; + do { + int length = str.length(); + if (length != 15 && length != 16) { + // FORM#1 15 characters, such as "20060317T142115" + // FORM#2 16 characters, such as "20060317T142115Z" + break; + } + if (str.charAt(8) != 0x0054) { + // charcter "T" must be used for separating date and time + break; + } + if (length == 16) { + if (str.charAt(15) != 0x005A) { + // invalid format + break; + } + isUTC = TRUE; + } + + year = parseAsciiDigits(str, 0, 4, status); + month = parseAsciiDigits(str, 4, 2, status) - 1; // 0-based + day = parseAsciiDigits(str, 6, 2, status); + hour = parseAsciiDigits(str, 9, 2, status); + min = parseAsciiDigits(str, 11, 2, status); + sec = parseAsciiDigits(str, 13, 2, status); + + if (U_FAILURE(status)) { + break; + } + + // check valid range + int32_t maxDayOfMonth = Grego::monthLength(year, month); + if (year < 0 || month < 0 || month > 11 || day < 1 || day > maxDayOfMonth || + hour < 0 || hour >= 24 || min < 0 || min >= 60 || sec < 0 || sec >= 60) { + break; + } + + isValid = TRUE; + } while(false); + + if (!isValid) { + status = U_INVALID_FORMAT_ERROR; + return 0.0; + } + // Calculate the time + UDate time = Grego::fieldsToDay(year, month, day) * U_MILLIS_PER_DAY; + time += (hour * U_MILLIS_PER_HOUR + min * U_MILLIS_PER_MINUTE + sec * U_MILLIS_PER_SECOND); + if (!isUTC) { + time -= offset; + } + return time; +} + +/* + * Convert RFC2445 utc-offset string to milliseconds + */ +static int32_t offsetStrToMillis(const UnicodeString& str, UErrorCode& status) { + if (U_FAILURE(status)) { + return 0; + } + + UBool isValid = FALSE; + int32_t sign = 0, hour = 0, min = 0, sec = 0; + + do { + int length = str.length(); + if (length != 5 && length != 7) { + // utf-offset must be 5 or 7 characters + break; + } + // sign + UChar s = str.charAt(0); + if (s == PLUS) { + sign = 1; + } else if (s == MINUS) { + sign = -1; + } else { + // utf-offset must start with "+" or "-" + break; + } + hour = parseAsciiDigits(str, 1, 2, status); + min = parseAsciiDigits(str, 3, 2, status); + if (length == 7) { + sec = parseAsciiDigits(str, 5, 2, status); + } + if (U_FAILURE(status)) { + break; + } + isValid = true; + } while(false); + + if (!isValid) { + status = U_INVALID_FORMAT_ERROR; + return 0; + } + int32_t millis = sign * ((hour * 60 + min) * 60 + sec) * 1000; + return millis; +} + +/* + * Convert milliseconds to RFC2445 utc-offset string + */ +static void millisToOffset(int32_t millis, UnicodeString& str) { + str.remove(); + if (millis >= 0) { + str.append(PLUS); + } else { + str.append(MINUS); + millis = -millis; + } + int32_t hour, min, sec; + int32_t t = millis / 1000; + + sec = t % 60; + t = (t - sec) / 60; + min = t % 60; + hour = t / 60; + + appendAsciiDigits(hour, 2, str); + appendAsciiDigits(min, 2, str); + appendAsciiDigits(sec, 2, str); +} + +/* + * Create a default TZNAME from TZID + */ +static void getDefaultTZName(const UnicodeString &tzid, UBool isDST, UnicodeString& zonename) { + zonename = tzid; + if (isDST) { + zonename += UNICODE_STRING_SIMPLE("(DST)"); + } else { + zonename += UNICODE_STRING_SIMPLE("(STD)"); + } +} + +/* + * Parse individual RRULE + * + * On return - + * + * month calculated by BYMONTH-1, or -1 when not found + * dow day of week in BYDAY, or 0 when not found + * wim day of week ordinal number in BYDAY, or 0 when not found + * dom an array of day of month + * domCount number of availble days in dom (domCount is specifying the size of dom on input) + * until time defined by UNTIL attribute or MIN_MILLIS if not available + */ +static void parseRRULE(const UnicodeString& rrule, int32_t& month, int32_t& dow, int32_t& wim, + int32_t* dom, int32_t& domCount, UDate& until, UErrorCode& status) { + if (U_FAILURE(status)) { + return; + } + int32_t numDom = 0; + + month = -1; + dow = 0; + wim = 0; + until = MIN_MILLIS; + + UBool yearly = FALSE; + //UBool parseError = FALSE; + + int32_t prop_start = 0; + int32_t prop_end; + UnicodeString prop, attr, value; + UBool nextProp = TRUE; + + while (nextProp) { + prop_end = rrule.indexOf(SEMICOLON, prop_start); + if (prop_end == -1) { + prop.setTo(rrule, prop_start); + nextProp = FALSE; + } else { + prop.setTo(rrule, prop_start, prop_end - prop_start); + prop_start = prop_end + 1; + } + int32_t eql = prop.indexOf(EQUALS_SIGN); + if (eql != -1) { + attr.setTo(prop, 0, eql); + value.setTo(prop, eql + 1); + } else { + goto rruleParseError; + } + + if (attr.compare(ICAL_FREQ, -1) == 0) { + // only support YEARLY frequency type + if (value.compare(ICAL_YEARLY, -1) == 0) { + yearly = TRUE; + } else { + goto rruleParseError; + } + } else if (attr.compare(ICAL_UNTIL, -1) == 0) { + // ISO8601 UTC format, for example, "20060315T020000Z" + until = parseDateTimeString(value, 0, status); + if (U_FAILURE(status)) { + goto rruleParseError; + } + } else if (attr.compare(ICAL_BYMONTH, -1) == 0) { + // Note: BYMONTH may contain multiple months, but only single month make sense for + // VTIMEZONE property. + if (value.length() > 2) { + goto rruleParseError; + } + month = parseAsciiDigits(value, 0, value.length(), status) - 1; + if (U_FAILURE(status) || month < 0 || month >= 12) { + goto rruleParseError; + } + } else if (attr.compare(ICAL_BYDAY, -1) == 0) { + // Note: BYDAY may contain multiple day of week separated by comma. It is unlikely used for + // VTIMEZONE property. We do not support the case. + + // 2-letter format is used just for representing a day of week, for example, "SU" for Sunday + // 3 or 4-letter format is used for represeinging Nth day of week, for example, "-1SA" for last Saturday + int32_t length = value.length(); + if (length < 2 || length > 4) { + goto rruleParseError; + } + if (length > 2) { + // Nth day of week + int32_t sign = 1; + if (value.charAt(0) == PLUS) { + sign = 1; + } else if (value.charAt(0) == MINUS) { + sign = -1; + } else if (length == 4) { + goto rruleParseError; + } + int32_t n = parseAsciiDigits(value, length - 3, 1, status); + if (U_FAILURE(status) || n == 0 || n > 4) { + goto rruleParseError; + } + wim = n * sign; + value.remove(0, length - 2); + } + int32_t wday; + for (wday = 0; wday < 7; wday++) { + if (value.compare(ICAL_DOW_NAMES[wday], 2) == 0) { + break; + } + } + if (wday < 7) { + // Sunday(1) - Saturday(7) + dow = wday + 1; + } else { + goto rruleParseError; + } + } else if (attr.compare(ICAL_BYMONTHDAY, -1) == 0) { + // Note: BYMONTHDAY may contain multiple days delimitted by comma + // + // A value of BYMONTHDAY could be negative, for example, -1 means + // the last day in a month + int32_t dom_idx = 0; + int32_t dom_start = 0; + int32_t dom_end; + UBool nextDOM = TRUE; + while (nextDOM) { + dom_end = value.indexOf(COMMA, dom_start); + if (dom_end == -1) { + dom_end = value.length(); + nextDOM = FALSE; + } + if (dom_idx < domCount) { + dom[dom_idx] = parseAsciiDigits(value, dom_start, dom_end - dom_start, status); + if (U_FAILURE(status)) { + goto rruleParseError; + } + dom_idx++; + } else { + status = U_BUFFER_OVERFLOW_ERROR; + goto rruleParseError; + } + dom_start = dom_end + 1; + } + numDom = dom_idx; + } + } + if (!yearly) { + // FREQ=YEARLY must be set + goto rruleParseError; + } + // Set actual number of parsed DOM (ICAL_BYMONTHDAY) + domCount = numDom; + return; + +rruleParseError: + if (U_SUCCESS(status)) { + // Set error status + status = U_INVALID_FORMAT_ERROR; + } +} + +static TimeZoneRule* createRuleByRRULE(const UnicodeString& zonename, int rawOffset, int dstSavings, UDate start, + UVector* dates, int fromOffset, UErrorCode& status) { + if (U_FAILURE(status)) { + return NULL; + } + if (dates == NULL || dates->size() == 0) { + status = U_ILLEGAL_ARGUMENT_ERROR; + return NULL; + } + + int32_t i, j; + DateTimeRule *adtr = NULL; + + // Parse the first rule + UnicodeString rrule = *((UnicodeString*)dates->elementAt(0)); + int32_t month, dayOfWeek, nthDayOfWeek, dayOfMonth = 0; + int32_t days[7]; + int32_t daysCount = UPRV_LENGTHOF(days); + UDate until; + + parseRRULE(rrule, month, dayOfWeek, nthDayOfWeek, days, daysCount, until, status); + if (U_FAILURE(status)) { + return NULL; + } + + if (dates->size() == 1) { + // No more rules + if (daysCount > 1) { + // Multiple BYMONTHDAY values + if (daysCount != 7 || month == -1 || dayOfWeek == 0) { + // Only support the rule using 7 continuous days + // BYMONTH and BYDAY must be set at the same time + goto unsupportedRRule; + } + int32_t firstDay = 31; // max possible number of dates in a month + for (i = 0; i < 7; i++) { + // Resolve negative day numbers. A negative day number should + // not be used in February, but if we see such case, we use 28 + // as the base. + if (days[i] < 0) { + days[i] = MONTHLENGTH[month] + days[i] + 1; + } + if (days[i] < firstDay) { + firstDay = days[i]; + } + } + // Make sure days are continuous + for (i = 1; i < 7; i++) { + UBool found = FALSE; + for (j = 0; j < 7; j++) { + if (days[j] == firstDay + i) { + found = TRUE; + break; + } + } + if (!found) { + // days are not continuous + goto unsupportedRRule; + } + } + // Use DOW_GEQ_DOM rule with firstDay as the start date + dayOfMonth = firstDay; + } + } else { + // Check if BYMONTH + BYMONTHDAY + BYDAY rule with multiple RRULE lines. + // Otherwise, not supported. + if (month == -1 || dayOfWeek == 0 || daysCount == 0) { + // This is not the case + goto unsupportedRRule; + } + // Parse the rest of rules if number of rules is not exceeding 7. + // We can only support 7 continuous days starting from a day of month. + if (dates->size() > 7) { + goto unsupportedRRule; + } + + // Note: To check valid date range across multiple rule is a little + // bit complicated. For now, this code is not doing strict range + // checking across month boundary + + int32_t earliestMonth = month; + int32_t earliestDay = 31; + for (i = 0; i < daysCount; i++) { + int32_t dom = days[i]; + dom = dom > 0 ? dom : MONTHLENGTH[month] + dom + 1; + earliestDay = dom < earliestDay ? dom : earliestDay; + } + + int32_t anotherMonth = -1; + for (i = 1; i < dates->size(); i++) { + rrule = *((UnicodeString*)dates->elementAt(i)); + UDate tmp_until; + int32_t tmp_month, tmp_dayOfWeek, tmp_nthDayOfWeek; + int32_t tmp_days[7]; + int32_t tmp_daysCount = UPRV_LENGTHOF(tmp_days); + parseRRULE(rrule, tmp_month, tmp_dayOfWeek, tmp_nthDayOfWeek, tmp_days, tmp_daysCount, tmp_until, status); + if (U_FAILURE(status)) { + return NULL; + } + // If UNTIL is newer than previous one, use the one + if (tmp_until > until) { + until = tmp_until; + } + + // Check if BYMONTH + BYMONTHDAY + BYDAY rule + if (tmp_month == -1 || tmp_dayOfWeek == 0 || tmp_daysCount == 0) { + goto unsupportedRRule; + } + // Count number of BYMONTHDAY + if (daysCount + tmp_daysCount > 7) { + // We cannot support BYMONTHDAY more than 7 + goto unsupportedRRule; + } + // Check if the same BYDAY is used. Otherwise, we cannot + // support the rule + if (tmp_dayOfWeek != dayOfWeek) { + goto unsupportedRRule; + } + // Check if the month is same or right next to the primary month + if (tmp_month != month) { + if (anotherMonth == -1) { + int32_t diff = tmp_month - month; + if (diff == -11 || diff == -1) { + // Previous month + anotherMonth = tmp_month; + earliestMonth = anotherMonth; + // Reset earliest day + earliestDay = 31; + } else if (diff == 11 || diff == 1) { + // Next month + anotherMonth = tmp_month; + } else { + // The day range cannot exceed more than 2 months + goto unsupportedRRule; + } + } else if (tmp_month != month && tmp_month != anotherMonth) { + // The day range cannot exceed more than 2 months + goto unsupportedRRule; + } + } + // If ealier month, go through days to find the earliest day + if (tmp_month == earliestMonth) { + for (j = 0; j < tmp_daysCount; j++) { + tmp_days[j] = tmp_days[j] > 0 ? tmp_days[j] : MONTHLENGTH[tmp_month] + tmp_days[j] + 1; + earliestDay = tmp_days[j] < earliestDay ? tmp_days[j] : earliestDay; + } + } + daysCount += tmp_daysCount; + } + if (daysCount != 7) { + // Number of BYMONTHDAY entries must be 7 + goto unsupportedRRule; + } + month = earliestMonth; + dayOfMonth = earliestDay; + } + + // Calculate start/end year and missing fields + int32_t startYear, startMonth, startDOM, startDOW, startDOY, startMID; + Grego::timeToFields(start + fromOffset, startYear, startMonth, startDOM, + startDOW, startDOY, startMID); + if (month == -1) { + // If BYMONTH is not set, use the month of DTSTART + month = startMonth; + } + if (dayOfWeek == 0 && nthDayOfWeek == 0 && dayOfMonth == 0) { + // If only YEARLY is set, use the day of DTSTART as BYMONTHDAY + dayOfMonth = startDOM; + } + + int32_t endYear; + if (until != MIN_MILLIS) { + int32_t endMonth, endDOM, endDOW, endDOY, endMID; + Grego::timeToFields(until, endYear, endMonth, endDOM, endDOW, endDOY, endMID); + } else { + endYear = AnnualTimeZoneRule::MAX_YEAR; + } + + // Create the AnnualDateTimeRule + if (dayOfWeek == 0 && nthDayOfWeek == 0 && dayOfMonth != 0) { + // Day in month rule, for example, 15th day in the month + adtr = new DateTimeRule(month, dayOfMonth, startMID, DateTimeRule::WALL_TIME); + } else if (dayOfWeek != 0 && nthDayOfWeek != 0 && dayOfMonth == 0) { + // Nth day of week rule, for example, last Sunday + adtr = new DateTimeRule(month, nthDayOfWeek, dayOfWeek, startMID, DateTimeRule::WALL_TIME); + } else if (dayOfWeek != 0 && nthDayOfWeek == 0 && dayOfMonth != 0) { + // First day of week after day of month rule, for example, + // first Sunday after 15th day in the month + adtr = new DateTimeRule(month, dayOfMonth, dayOfWeek, TRUE, startMID, DateTimeRule::WALL_TIME); + } + if (adtr == NULL) { + goto unsupportedRRule; + } + return new AnnualTimeZoneRule(zonename, rawOffset, dstSavings, adtr, startYear, endYear); + +unsupportedRRule: + status = U_INVALID_STATE_ERROR; + return NULL; +} + +/* + * Create a TimeZoneRule by the RDATE definition + */ +static TimeZoneRule* createRuleByRDATE(const UnicodeString& zonename, int32_t rawOffset, int32_t dstSavings, + UDate start, UVector* dates, int32_t fromOffset, UErrorCode& status) { + if (U_FAILURE(status)) { + return NULL; + } + TimeArrayTimeZoneRule *retVal = NULL; + if (dates == NULL || dates->size() == 0) { + // When no RDATE line is provided, use start (DTSTART) + // as the transition time + retVal = new TimeArrayTimeZoneRule(zonename, rawOffset, dstSavings, + &start, 1, DateTimeRule::UTC_TIME); + } else { + // Create an array of transition times + int32_t size = dates->size(); + UDate* times = (UDate*)uprv_malloc(sizeof(UDate) * size); + if (times == NULL) { + status = U_MEMORY_ALLOCATION_ERROR; + return NULL; + } + for (int32_t i = 0; i < size; i++) { + UnicodeString *datestr = (UnicodeString*)dates->elementAt(i); + times[i] = parseDateTimeString(*datestr, fromOffset, status); + if (U_FAILURE(status)) { + uprv_free(times); + return NULL; + } + } + retVal = new TimeArrayTimeZoneRule(zonename, rawOffset, dstSavings, + times, size, DateTimeRule::UTC_TIME); + uprv_free(times); + } + return retVal; +} + +/* + * Check if the DOW rule specified by month, weekInMonth and dayOfWeek is equivalent + * to the DateTimerule. + */ +static UBool isEquivalentDateRule(int32_t month, int32_t weekInMonth, int32_t dayOfWeek, const DateTimeRule *dtrule) { + if (month != dtrule->getRuleMonth() || dayOfWeek != dtrule->getRuleDayOfWeek()) { + return FALSE; + } + if (dtrule->getTimeRuleType() != DateTimeRule::WALL_TIME) { + // Do not try to do more intelligent comparison for now. + return FALSE; + } + if (dtrule->getDateRuleType() == DateTimeRule::DOW + && dtrule->getRuleWeekInMonth() == weekInMonth) { + return TRUE; + } + int32_t ruleDOM = dtrule->getRuleDayOfMonth(); + if (dtrule->getDateRuleType() == DateTimeRule::DOW_GEQ_DOM) { + if (ruleDOM%7 == 1 && (ruleDOM + 6)/7 == weekInMonth) { + return TRUE; + } + if (month != UCAL_FEBRUARY && (MONTHLENGTH[month] - ruleDOM)%7 == 6 + && weekInMonth == -1*((MONTHLENGTH[month]-ruleDOM+1)/7)) { + return TRUE; + } + } + if (dtrule->getDateRuleType() == DateTimeRule::DOW_LEQ_DOM) { + if (ruleDOM%7 == 0 && ruleDOM/7 == weekInMonth) { + return TRUE; + } + if (month != UCAL_FEBRUARY && (MONTHLENGTH[month] - ruleDOM)%7 == 0 + && weekInMonth == -1*((MONTHLENGTH[month] - ruleDOM)/7 + 1)) { + return TRUE; + } + } + return FALSE; +} + +/* + * Convert the rule to its equivalent rule using WALL_TIME mode. + * This function returns NULL when the specified DateTimeRule is already + * using WALL_TIME mode. + */ +static DateTimeRule* toWallTimeRule(const DateTimeRule* rule, int32_t rawOffset, int32_t dstSavings) { + if (rule->getTimeRuleType() == DateTimeRule::WALL_TIME) { + return NULL; + } + int32_t wallt = rule->getRuleMillisInDay(); + if (rule->getTimeRuleType() == DateTimeRule::UTC_TIME) { + wallt += (rawOffset + dstSavings); + } else if (rule->getTimeRuleType() == DateTimeRule::STANDARD_TIME) { + wallt += dstSavings; + } + + int32_t month = -1, dom = 0, dow = 0; + DateTimeRule::DateRuleType dtype; + int32_t dshift = 0; + if (wallt < 0) { + dshift = -1; + wallt += U_MILLIS_PER_DAY; + } else if (wallt >= U_MILLIS_PER_DAY) { + dshift = 1; + wallt -= U_MILLIS_PER_DAY; + } + + month = rule->getRuleMonth(); + dom = rule->getRuleDayOfMonth(); + dow = rule->getRuleDayOfWeek(); + dtype = rule->getDateRuleType(); + + if (dshift != 0) { + if (dtype == DateTimeRule::DOW) { + // Convert to DOW_GEW_DOM or DOW_LEQ_DOM rule first + int32_t wim = rule->getRuleWeekInMonth(); + if (wim > 0) { + dtype = DateTimeRule::DOW_GEQ_DOM; + dom = 7 * (wim - 1) + 1; + } else { + dtype = DateTimeRule::DOW_LEQ_DOM; + dom = MONTHLENGTH[month] + 7 * (wim + 1); + } + } + // Shift one day before or after + dom += dshift; + if (dom == 0) { + month--; + month = month < UCAL_JANUARY ? UCAL_DECEMBER : month; + dom = MONTHLENGTH[month]; + } else if (dom > MONTHLENGTH[month]) { + month++; + month = month > UCAL_DECEMBER ? UCAL_JANUARY : month; + dom = 1; + } + if (dtype != DateTimeRule::DOM) { + // Adjust day of week + dow += dshift; + if (dow < UCAL_SUNDAY) { + dow = UCAL_SATURDAY; + } else if (dow > UCAL_SATURDAY) { + dow = UCAL_SUNDAY; + } + } + } + // Create a new rule + DateTimeRule *modifiedRule; + if (dtype == DateTimeRule::DOM) { + modifiedRule = new DateTimeRule(month, dom, wallt, DateTimeRule::WALL_TIME); + } else { + modifiedRule = new DateTimeRule(month, dom, dow, + (dtype == DateTimeRule::DOW_GEQ_DOM), wallt, DateTimeRule::WALL_TIME); + } + return modifiedRule; +} + +/* + * Minumum implementations of stream writer/reader, writing/reading + * UnicodeString. For now, we do not want to introduce the dependency + * on the ICU I/O stream in this module. But we want to keep the code + * equivalent to the ICU4J implementation, which utilizes java.io.Writer/ + * Reader. + */ +class VTZWriter { +public: + VTZWriter(UnicodeString& out); + ~VTZWriter(); + + void write(const UnicodeString& str); + void write(UChar ch); + void write(const UChar* str); + //void write(const UChar* str, int32_t length); +private: + UnicodeString* out; +}; + +VTZWriter::VTZWriter(UnicodeString& output) { + out = &output; +} + +VTZWriter::~VTZWriter() { +} + +void +VTZWriter::write(const UnicodeString& str) { + out->append(str); +} + +void +VTZWriter::write(UChar ch) { + out->append(ch); +} + +void +VTZWriter::write(const UChar* str) { + out->append(str, -1); +} + +/* +void +VTZWriter::write(const UChar* str, int32_t length) { + out->append(str, length); +} +*/ + +class VTZReader { +public: + VTZReader(const UnicodeString& input); + ~VTZReader(); + + UChar read(void); +private: + const UnicodeString* in; + int32_t index; +}; + +VTZReader::VTZReader(const UnicodeString& input) { + in = &input; + index = 0; +} + +VTZReader::~VTZReader() { +} + +UChar +VTZReader::read(void) { + UChar ch = 0xFFFF; + if (index < in->length()) { + ch = in->charAt(index); + } + index++; + return ch; +} + + +UOBJECT_DEFINE_RTTI_IMPLEMENTATION(VTimeZone) + +VTimeZone::VTimeZone() +: BasicTimeZone(), tz(NULL), vtzlines(NULL), + lastmod(MAX_MILLIS) { +} + +VTimeZone::VTimeZone(const VTimeZone& source) +: BasicTimeZone(source), tz(NULL), vtzlines(NULL), + tzurl(source.tzurl), lastmod(source.lastmod), + olsonzid(source.olsonzid), icutzver(source.icutzver) { + if (source.tz != NULL) { + tz = source.tz->clone(); + } + if (source.vtzlines != NULL) { + UErrorCode status = U_ZERO_ERROR; + int32_t size = source.vtzlines->size(); + vtzlines = new UVector(uprv_deleteUObject, uhash_compareUnicodeString, size, status); + if (U_SUCCESS(status)) { + for (int32_t i = 0; i < size; i++) { + UnicodeString *line = (UnicodeString*)source.vtzlines->elementAt(i); + vtzlines->addElement(line->clone(), status); + if (U_FAILURE(status)) { + break; + } + } + } + if (U_FAILURE(status) && vtzlines != NULL) { + delete vtzlines; + } + } +} + +VTimeZone::~VTimeZone() { + if (tz != NULL) { + delete tz; + } + if (vtzlines != NULL) { + delete vtzlines; + } +} + +VTimeZone& +VTimeZone::operator=(const VTimeZone& right) { + if (this == &right) { + return *this; + } + if (!(*this == right)) { + BasicTimeZone::operator=(right); + if (tz != NULL) { + delete tz; + tz = NULL; + } + if (right.tz != NULL) { + tz = right.tz->clone(); + } + if (vtzlines != NULL) { + delete vtzlines; + } + if (right.vtzlines != NULL) { + UErrorCode status = U_ZERO_ERROR; + int32_t size = right.vtzlines->size(); + vtzlines = new UVector(uprv_deleteUObject, uhash_compareUnicodeString, size, status); + if (U_SUCCESS(status)) { + for (int32_t i = 0; i < size; i++) { + UnicodeString *line = (UnicodeString*)right.vtzlines->elementAt(i); + vtzlines->addElement(line->clone(), status); + if (U_FAILURE(status)) { + break; + } + } + } + if (U_FAILURE(status) && vtzlines != NULL) { + delete vtzlines; + vtzlines = NULL; + } + } + tzurl = right.tzurl; + lastmod = right.lastmod; + olsonzid = right.olsonzid; + icutzver = right.icutzver; + } + return *this; +} + +UBool +VTimeZone::operator==(const TimeZone& that) const { + if (this == &that) { + return TRUE; + } + if (typeid(*this) != typeid(that) || !BasicTimeZone::operator==(that)) { + return FALSE; + } + VTimeZone *vtz = (VTimeZone*)&that; + if (*tz == *(vtz->tz) + && tzurl == vtz->tzurl + && lastmod == vtz->lastmod + /* && olsonzid = that.olsonzid */ + /* && icutzver = that.icutzver */) { + return TRUE; + } + return FALSE; +} + +UBool +VTimeZone::operator!=(const TimeZone& that) const { + return !operator==(that); +} + +VTimeZone* +VTimeZone::createVTimeZoneByID(const UnicodeString& ID) { + VTimeZone *vtz = new VTimeZone(); + vtz->tz = (BasicTimeZone*)TimeZone::createTimeZone(ID); + vtz->tz->getID(vtz->olsonzid); + + // Set ICU tzdata version + UErrorCode status = U_ZERO_ERROR; + UResourceBundle *bundle = NULL; + const UChar* versionStr = NULL; + int32_t len = 0; + bundle = ures_openDirect(NULL, "zoneinfo64", &status); + versionStr = ures_getStringByKey(bundle, "TZVersion", &len, &status); + if (U_SUCCESS(status)) { + vtz->icutzver.setTo(versionStr, len); + } + ures_close(bundle); + return vtz; +} + +VTimeZone* +VTimeZone::createVTimeZoneFromBasicTimeZone(const BasicTimeZone& basic_time_zone, UErrorCode &status) { + if (U_FAILURE(status)) { + return NULL; + } + VTimeZone *vtz = new VTimeZone(); + if (vtz == NULL) { + status = U_MEMORY_ALLOCATION_ERROR; + return NULL; + } + vtz->tz = basic_time_zone.clone(); + if (vtz->tz == NULL) { + status = U_MEMORY_ALLOCATION_ERROR; + delete vtz; + return NULL; + } + vtz->tz->getID(vtz->olsonzid); + + // Set ICU tzdata version + UResourceBundle *bundle = NULL; + const UChar* versionStr = NULL; + int32_t len = 0; + bundle = ures_openDirect(NULL, "zoneinfo64", &status); + versionStr = ures_getStringByKey(bundle, "TZVersion", &len, &status); + if (U_SUCCESS(status)) { + vtz->icutzver.setTo(versionStr, len); + } + ures_close(bundle); + return vtz; +} + +VTimeZone* +VTimeZone::createVTimeZone(const UnicodeString& vtzdata, UErrorCode& status) { + if (U_FAILURE(status)) { + return NULL; + } + VTZReader reader(vtzdata); + VTimeZone *vtz = new VTimeZone(); + vtz->load(reader, status); + if (U_FAILURE(status)) { + delete vtz; + return NULL; + } + return vtz; +} + +UBool +VTimeZone::getTZURL(UnicodeString& url) const { + if (tzurl.length() > 0) { + url = tzurl; + return TRUE; + } + return FALSE; +} + +void +VTimeZone::setTZURL(const UnicodeString& url) { + tzurl = url; +} + +UBool +VTimeZone::getLastModified(UDate& lastModified) const { + if (lastmod != MAX_MILLIS) { + lastModified = lastmod; + return TRUE; + } + return FALSE; +} + +void +VTimeZone::setLastModified(UDate lastModified) { + lastmod = lastModified; +} + +void +VTimeZone::write(UnicodeString& result, UErrorCode& status) const { + result.remove(); + VTZWriter writer(result); + write(writer, status); +} + +void +VTimeZone::write(UDate start, UnicodeString& result, UErrorCode& status) const { + result.remove(); + VTZWriter writer(result); + write(start, writer, status); +} + +void +VTimeZone::writeSimple(UDate time, UnicodeString& result, UErrorCode& status) const { + result.remove(); + VTZWriter writer(result); + writeSimple(time, writer, status); +} + +VTimeZone* +VTimeZone::clone() const { + return new VTimeZone(*this); +} + +int32_t +VTimeZone::getOffset(uint8_t era, int32_t year, int32_t month, int32_t day, + uint8_t dayOfWeek, int32_t millis, UErrorCode& status) const { + return tz->getOffset(era, year, month, day, dayOfWeek, millis, status); +} + +int32_t +VTimeZone::getOffset(uint8_t era, int32_t year, int32_t month, int32_t day, + uint8_t dayOfWeek, int32_t millis, + int32_t monthLength, UErrorCode& status) const { + return tz->getOffset(era, year, month, day, dayOfWeek, millis, monthLength, status); +} + +void +VTimeZone::getOffset(UDate date, UBool local, int32_t& rawOffset, + int32_t& dstOffset, UErrorCode& status) const { + return tz->getOffset(date, local, rawOffset, dstOffset, status); +} + +void +VTimeZone::setRawOffset(int32_t offsetMillis) { + tz->setRawOffset(offsetMillis); +} + +int32_t +VTimeZone::getRawOffset(void) const { + return tz->getRawOffset(); +} + +UBool +VTimeZone::useDaylightTime(void) const { + return tz->useDaylightTime(); +} + +UBool +VTimeZone::inDaylightTime(UDate date, UErrorCode& status) const { + return tz->inDaylightTime(date, status); +} + +UBool +VTimeZone::hasSameRules(const TimeZone& other) const { + return tz->hasSameRules(other); +} + +UBool +VTimeZone::getNextTransition(UDate base, UBool inclusive, TimeZoneTransition& result) const { + return tz->getNextTransition(base, inclusive, result); +} + +UBool +VTimeZone::getPreviousTransition(UDate base, UBool inclusive, TimeZoneTransition& result) const { + return tz->getPreviousTransition(base, inclusive, result); +} + +int32_t +VTimeZone::countTransitionRules(UErrorCode& status) const { + return tz->countTransitionRules(status); +} + +void +VTimeZone::getTimeZoneRules(const InitialTimeZoneRule*& initial, + const TimeZoneRule* trsrules[], int32_t& trscount, + UErrorCode& status) const { + tz->getTimeZoneRules(initial, trsrules, trscount, status); +} + +void +VTimeZone::load(VTZReader& reader, UErrorCode& status) { + vtzlines = new UVector(uprv_deleteUObject, uhash_compareUnicodeString, DEFAULT_VTIMEZONE_LINES, status); + if (U_FAILURE(status)) { + return; + } + UBool eol = FALSE; + UBool start = FALSE; + UBool success = FALSE; + UnicodeString line; + + while (TRUE) { + UChar ch = reader.read(); + if (ch == 0xFFFF) { + // end of file + if (start && line.startsWith(ICAL_END_VTIMEZONE, -1)) { + vtzlines->addElement(new UnicodeString(line), status); + if (U_FAILURE(status)) { + goto cleanupVtzlines; + } + success = TRUE; + } + break; + } + if (ch == 0x000D) { + // CR, must be followed by LF according to the definition in RFC2445 + continue; + } + if (eol) { + if (ch != 0x0009 && ch != 0x0020) { + // NOT followed by TAB/SP -> new line + if (start) { + if (line.length() > 0) { + vtzlines->addElement(new UnicodeString(line), status); + if (U_FAILURE(status)) { + goto cleanupVtzlines; + } + } + } + line.remove(); + if (ch != 0x000A) { + line.append(ch); + } + } + eol = FALSE; + } else { + if (ch == 0x000A) { + // LF + eol = TRUE; + if (start) { + if (line.startsWith(ICAL_END_VTIMEZONE, -1)) { + vtzlines->addElement(new UnicodeString(line), status); + if (U_FAILURE(status)) { + goto cleanupVtzlines; + } + success = TRUE; + break; + } + } else { + if (line.startsWith(ICAL_BEGIN_VTIMEZONE, -1)) { + vtzlines->addElement(new UnicodeString(line), status); + if (U_FAILURE(status)) { + goto cleanupVtzlines; + } + line.remove(); + start = TRUE; + eol = FALSE; + } + } + } else { + line.append(ch); + } + } + } + if (!success) { + if (U_SUCCESS(status)) { + status = U_INVALID_STATE_ERROR; + } + goto cleanupVtzlines; + } + parse(status); + return; + +cleanupVtzlines: + delete vtzlines; + vtzlines = NULL; +} + +// parser state +#define INI 0 // Initial state +#define VTZ 1 // In VTIMEZONE +#define TZI 2 // In STANDARD or DAYLIGHT + +#define DEF_DSTSAVINGS (60*60*1000) +#define DEF_TZSTARTTIME (0.0) + +void +VTimeZone::parse(UErrorCode& status) { + if (U_FAILURE(status)) { + return; + } + if (vtzlines == NULL || vtzlines->size() == 0) { + status = U_INVALID_STATE_ERROR; + return; + } + InitialTimeZoneRule *initialRule = NULL; + RuleBasedTimeZone *rbtz = NULL; + + // timezone ID + UnicodeString tzid; + + int32_t state = INI; + int32_t n = 0; + UBool dst = FALSE; // current zone type + UnicodeString from; // current zone from offset + UnicodeString to; // current zone offset + UnicodeString zonename; // current zone name + UnicodeString dtstart; // current zone starts + UBool isRRULE = FALSE; // true if the rule is described by RRULE + int32_t initialRawOffset = 0; // initial offset + int32_t initialDSTSavings = 0; // initial offset + UDate firstStart = MAX_MILLIS; // the earliest rule start time + UnicodeString name; // RFC2445 prop name + UnicodeString value; // RFC2445 prop value + + UVector *dates = NULL; // list of RDATE or RRULE strings + UVector *rules = NULL; // list of TimeZoneRule instances + + int32_t finalRuleIdx = -1; + int32_t finalRuleCount = 0; + + rules = new UVector(status); + if (U_FAILURE(status)) { + goto cleanupParse; + } + // Set the deleter to remove TimeZoneRule vectors to avoid memory leaks due to unowned TimeZoneRules. + rules->setDeleter(deleteTimeZoneRule); + + dates = new UVector(uprv_deleteUObject, uhash_compareUnicodeString, status); + if (U_FAILURE(status)) { + goto cleanupParse; + } + if (rules == NULL || dates == NULL) { + status = U_MEMORY_ALLOCATION_ERROR; + goto cleanupParse; + } + + for (n = 0; n < vtzlines->size(); n++) { + UnicodeString *line = (UnicodeString*)vtzlines->elementAt(n); + int32_t valueSep = line->indexOf(COLON); + if (valueSep < 0) { + continue; + } + name.setTo(*line, 0, valueSep); + value.setTo(*line, valueSep + 1); + + switch (state) { + case INI: + if (name.compare(ICAL_BEGIN, -1) == 0 + && value.compare(ICAL_VTIMEZONE, -1) == 0) { + state = VTZ; + } + break; + + case VTZ: + if (name.compare(ICAL_TZID, -1) == 0) { + tzid = value; + } else if (name.compare(ICAL_TZURL, -1) == 0) { + tzurl = value; + } else if (name.compare(ICAL_LASTMOD, -1) == 0) { + // Always in 'Z' format, so the offset argument for the parse method + // can be any value. + lastmod = parseDateTimeString(value, 0, status); + if (U_FAILURE(status)) { + goto cleanupParse; + } + } else if (name.compare(ICAL_BEGIN, -1) == 0) { + UBool isDST = (value.compare(ICAL_DAYLIGHT, -1) == 0); + if (value.compare(ICAL_STANDARD, -1) == 0 || isDST) { + // tzid must be ready at this point + if (tzid.length() == 0) { + goto cleanupParse; + } + // initialize current zone properties + if (dates->size() != 0) { + dates->removeAllElements(); + } + isRRULE = FALSE; + from.remove(); + to.remove(); + zonename.remove(); + dst = isDST; + state = TZI; + } else { + // BEGIN property other than STANDARD/DAYLIGHT + // must not be there. + goto cleanupParse; + } + } else if (name.compare(ICAL_END, -1) == 0) { + break; + } + break; + case TZI: + if (name.compare(ICAL_DTSTART, -1) == 0) { + dtstart = value; + } else if (name.compare(ICAL_TZNAME, -1) == 0) { + zonename = value; + } else if (name.compare(ICAL_TZOFFSETFROM, -1) == 0) { + from = value; + } else if (name.compare(ICAL_TZOFFSETTO, -1) == 0) { + to = value; + } else if (name.compare(ICAL_RDATE, -1) == 0) { + // RDATE mixed with RRULE is not supported + if (isRRULE) { + goto cleanupParse; + } + // RDATE value may contain multiple date delimited + // by comma + UBool nextDate = TRUE; + int32_t dstart = 0; + UnicodeString *dstr; + while (nextDate) { + int32_t dend = value.indexOf(COMMA, dstart); + if (dend == -1) { + dstr = new UnicodeString(value, dstart); + nextDate = FALSE; + } else { + dstr = new UnicodeString(value, dstart, dend - dstart); + } + dates->addElement(dstr, status); + if (U_FAILURE(status)) { + goto cleanupParse; + } + dstart = dend + 1; + } + } else if (name.compare(ICAL_RRULE, -1) == 0) { + // RRULE mixed with RDATE is not supported + if (!isRRULE && dates->size() != 0) { + goto cleanupParse; + } + isRRULE = true; + dates->addElement(new UnicodeString(value), status); + if (U_FAILURE(status)) { + goto cleanupParse; + } + } else if (name.compare(ICAL_END, -1) == 0) { + // Mandatory properties + if (dtstart.length() == 0 || from.length() == 0 || to.length() == 0) { + goto cleanupParse; + } + // if zonename is not available, create one from tzid + if (zonename.length() == 0) { + getDefaultTZName(tzid, dst, zonename); + } + + // create a time zone rule + TimeZoneRule *rule = NULL; + int32_t fromOffset = 0; + int32_t toOffset = 0; + int32_t rawOffset = 0; + int32_t dstSavings = 0; + UDate start = 0; + + // Parse TZOFFSETFROM/TZOFFSETTO + fromOffset = offsetStrToMillis(from, status); + toOffset = offsetStrToMillis(to, status); + if (U_FAILURE(status)) { + goto cleanupParse; + } + + if (dst) { + // If daylight, use the previous offset as rawoffset if positive + if (toOffset - fromOffset > 0) { + rawOffset = fromOffset; + dstSavings = toOffset - fromOffset; + } else { + // This is rare case.. just use 1 hour DST savings + rawOffset = toOffset - DEF_DSTSAVINGS; + dstSavings = DEF_DSTSAVINGS; + } + } else { + rawOffset = toOffset; + dstSavings = 0; + } + + // start time + start = parseDateTimeString(dtstart, fromOffset, status); + if (U_FAILURE(status)) { + goto cleanupParse; + } + + // Create the rule + UDate actualStart = MAX_MILLIS; + if (isRRULE) { + rule = createRuleByRRULE(zonename, rawOffset, dstSavings, start, dates, fromOffset, status); + } else { + rule = createRuleByRDATE(zonename, rawOffset, dstSavings, start, dates, fromOffset, status); + } + if (U_FAILURE(status) || rule == NULL) { + goto cleanupParse; + } else { + UBool startAvail = rule->getFirstStart(fromOffset, 0, actualStart); + if (startAvail && actualStart < firstStart) { + // save from offset information for the earliest rule + firstStart = actualStart; + // If this is STD, assume the time before this transtion + // is DST when the difference is 1 hour. This might not be + // accurate, but VTIMEZONE data does not have such info. + if (dstSavings > 0) { + initialRawOffset = fromOffset; + initialDSTSavings = 0; + } else { + if (fromOffset - toOffset == DEF_DSTSAVINGS) { + initialRawOffset = fromOffset - DEF_DSTSAVINGS; + initialDSTSavings = DEF_DSTSAVINGS; + } else { + initialRawOffset = fromOffset; + initialDSTSavings = 0; + } + } + } + } + rules->addElement(rule, status); + if (U_FAILURE(status)) { + goto cleanupParse; + } + state = VTZ; + } + break; + } + } + // Must have at least one rule + if (rules->size() == 0) { + goto cleanupParse; + } + + // Create a initial rule + getDefaultTZName(tzid, FALSE, zonename); + initialRule = new InitialTimeZoneRule(zonename, + initialRawOffset, initialDSTSavings); + if (initialRule == NULL) { + status = U_MEMORY_ALLOCATION_ERROR; + goto cleanupParse; + } + + // Finally, create the RuleBasedTimeZone + rbtz = new RuleBasedTimeZone(tzid, initialRule); + if (rbtz == NULL) { + status = U_MEMORY_ALLOCATION_ERROR; + goto cleanupParse; + } + initialRule = NULL; // already adopted by RBTZ, no need to delete + + for (n = 0; n < rules->size(); n++) { + TimeZoneRule *r = (TimeZoneRule*)rules->elementAt(n); + AnnualTimeZoneRule *atzrule = dynamic_cast(r); + if (atzrule != NULL) { + if (atzrule->getEndYear() == AnnualTimeZoneRule::MAX_YEAR) { + finalRuleCount++; + finalRuleIdx = n; + } + } + } + if (finalRuleCount > 2) { + // Too many final rules + status = U_ILLEGAL_ARGUMENT_ERROR; + goto cleanupParse; + } + + if (finalRuleCount == 1) { + if (rules->size() == 1) { + // Only one final rule, only governs the initial rule, + // which is already initialized, thus, we do not need to + // add this transition rule + rules->removeAllElements(); + } else { + // Normalize the final rule + AnnualTimeZoneRule *finalRule = (AnnualTimeZoneRule*)rules->elementAt(finalRuleIdx); + int32_t tmpRaw = finalRule->getRawOffset(); + int32_t tmpDST = finalRule->getDSTSavings(); + + // Find the last non-final rule + UDate finalStart, start; + finalRule->getFirstStart(initialRawOffset, initialDSTSavings, finalStart); + start = finalStart; + for (n = 0; n < rules->size(); n++) { + if (finalRuleIdx == n) { + continue; + } + TimeZoneRule *r = (TimeZoneRule*)rules->elementAt(n); + UDate lastStart; + r->getFinalStart(tmpRaw, tmpDST, lastStart); + if (lastStart > start) { + finalRule->getNextStart(lastStart, + r->getRawOffset(), + r->getDSTSavings(), + FALSE, + start); + } + } + + TimeZoneRule *newRule; + UnicodeString tznam; + if (start == finalStart) { + // Transform this into a single transition + newRule = new TimeArrayTimeZoneRule( + finalRule->getName(tznam), + finalRule->getRawOffset(), + finalRule->getDSTSavings(), + &finalStart, + 1, + DateTimeRule::UTC_TIME); + } else { + // Update the end year + int32_t y, m, d, dow, doy, mid; + Grego::timeToFields(start, y, m, d, dow, doy, mid); + newRule = new AnnualTimeZoneRule( + finalRule->getName(tznam), + finalRule->getRawOffset(), + finalRule->getDSTSavings(), + *(finalRule->getRule()), + finalRule->getStartYear(), + y); + } + if (newRule == NULL) { + status = U_MEMORY_ALLOCATION_ERROR; + goto cleanupParse; + } + rules->removeElementAt(finalRuleIdx); + rules->addElement(newRule, status); + if (U_FAILURE(status)) { + delete newRule; + goto cleanupParse; + } + } + } + + while (!rules->isEmpty()) { + TimeZoneRule *tzr = (TimeZoneRule*)rules->orphanElementAt(0); + rbtz->addTransitionRule(tzr, status); + if (U_FAILURE(status)) { + goto cleanupParse; + } + } + rbtz->complete(status); + if (U_FAILURE(status)) { + goto cleanupParse; + } + delete rules; + delete dates; + + tz = rbtz; + setID(tzid); + return; + +cleanupParse: + if (rules != NULL) { + while (!rules->isEmpty()) { + TimeZoneRule *r = (TimeZoneRule*)rules->orphanElementAt(0); + delete r; + } + delete rules; + } + if (dates != NULL) { + delete dates; + } + if (initialRule != NULL) { + delete initialRule; + } + if (rbtz != NULL) { + delete rbtz; + } + return; +} + +void +VTimeZone::write(VTZWriter& writer, UErrorCode& status) const { + if (vtzlines != NULL) { + for (int32_t i = 0; i < vtzlines->size(); i++) { + UnicodeString *line = (UnicodeString*)vtzlines->elementAt(i); + if (line->startsWith(ICAL_TZURL, -1) + && line->charAt(u_strlen(ICAL_TZURL)) == COLON) { + writer.write(ICAL_TZURL); + writer.write(COLON); + writer.write(tzurl); + writer.write(ICAL_NEWLINE); + } else if (line->startsWith(ICAL_LASTMOD, -1) + && line->charAt(u_strlen(ICAL_LASTMOD)) == COLON) { + UnicodeString utcString; + writer.write(ICAL_LASTMOD); + writer.write(COLON); + writer.write(getUTCDateTimeString(lastmod, utcString)); + writer.write(ICAL_NEWLINE); + } else { + writer.write(*line); + writer.write(ICAL_NEWLINE); + } + } + } else { + UnicodeString icutzprop; + UVector customProps(nullptr, uhash_compareUnicodeString, status); + if (olsonzid.length() > 0 && icutzver.length() > 0) { + icutzprop.append(olsonzid); + icutzprop.append(u'['); + icutzprop.append(icutzver); + icutzprop.append(u']'); + customProps.addElement(&icutzprop, status); + } + writeZone(writer, *tz, &customProps, status); + } +} + +void +VTimeZone::write(UDate start, VTZWriter& writer, UErrorCode& status) const { + if (U_FAILURE(status)) { + return; + } + InitialTimeZoneRule *initial = NULL; + UVector *transitionRules = NULL; + UVector customProps(uprv_deleteUObject, uhash_compareUnicodeString, status); + UnicodeString tzid; + + // Extract rules applicable to dates after the start time + getTimeZoneRulesAfter(start, initial, transitionRules, status); + if (U_FAILURE(status)) { + return; + } + + // Create a RuleBasedTimeZone with the subset rule + getID(tzid); + RuleBasedTimeZone rbtz(tzid, initial); + if (transitionRules != NULL) { + while (!transitionRules->isEmpty()) { + TimeZoneRule *tr = (TimeZoneRule*)transitionRules->orphanElementAt(0); + rbtz.addTransitionRule(tr, status); + if (U_FAILURE(status)) { + goto cleanupWritePartial; + } + } + delete transitionRules; + transitionRules = NULL; + } + rbtz.complete(status); + if (U_FAILURE(status)) { + goto cleanupWritePartial; + } + + if (olsonzid.length() > 0 && icutzver.length() > 0) { + UnicodeString *icutzprop = new UnicodeString(ICU_TZINFO_PROP); + icutzprop->append(olsonzid); + icutzprop->append((UChar)0x005B/*'['*/); + icutzprop->append(icutzver); + icutzprop->append(ICU_TZINFO_PARTIAL, -1); + appendMillis(start, *icutzprop); + icutzprop->append((UChar)0x005D/*']'*/); + customProps.addElement(icutzprop, status); + if (U_FAILURE(status)) { + delete icutzprop; + goto cleanupWritePartial; + } + } + writeZone(writer, rbtz, &customProps, status); + return; + +cleanupWritePartial: + if (initial != NULL) { + delete initial; + } + if (transitionRules != NULL) { + while (!transitionRules->isEmpty()) { + TimeZoneRule *tr = (TimeZoneRule*)transitionRules->orphanElementAt(0); + delete tr; + } + delete transitionRules; + } +} + +void +VTimeZone::writeSimple(UDate time, VTZWriter& writer, UErrorCode& status) const { + if (U_FAILURE(status)) { + return; + } + + UVector customProps(uprv_deleteUObject, uhash_compareUnicodeString, status); + UnicodeString tzid; + + // Extract simple rules + InitialTimeZoneRule *initial = NULL; + AnnualTimeZoneRule *std = NULL, *dst = NULL; + getSimpleRulesNear(time, initial, std, dst, status); + if (U_SUCCESS(status)) { + // Create a RuleBasedTimeZone with the subset rule + getID(tzid); + RuleBasedTimeZone rbtz(tzid, initial); + if (std != NULL && dst != NULL) { + rbtz.addTransitionRule(std, status); + rbtz.addTransitionRule(dst, status); + } + if (U_FAILURE(status)) { + goto cleanupWriteSimple; + } + + if (olsonzid.length() > 0 && icutzver.length() > 0) { + UnicodeString *icutzprop = new UnicodeString(ICU_TZINFO_PROP); + icutzprop->append(olsonzid); + icutzprop->append((UChar)0x005B/*'['*/); + icutzprop->append(icutzver); + icutzprop->append(ICU_TZINFO_SIMPLE, -1); + appendMillis(time, *icutzprop); + icutzprop->append((UChar)0x005D/*']'*/); + customProps.addElement(icutzprop, status); + if (U_FAILURE(status)) { + delete icutzprop; + goto cleanupWriteSimple; + } + } + writeZone(writer, rbtz, &customProps, status); + } + return; + +cleanupWriteSimple: + if (initial != NULL) { + delete initial; + } + if (std != NULL) { + delete std; + } + if (dst != NULL) { + delete dst; + } +} + +void +VTimeZone::writeZone(VTZWriter& w, BasicTimeZone& basictz, + UVector* customProps, UErrorCode& status) const { + if (U_FAILURE(status)) { + return; + } + writeHeaders(w, status); + if (U_FAILURE(status)) { + return; + } + + if (customProps != NULL) { + for (int32_t i = 0; i < customProps->size(); i++) { + UnicodeString *custprop = (UnicodeString*)customProps->elementAt(i); + w.write(*custprop); + w.write(ICAL_NEWLINE); + } + } + + UDate t = MIN_MILLIS; + UnicodeString dstName; + int32_t dstFromOffset = 0; + int32_t dstFromDSTSavings = 0; + int32_t dstToOffset = 0; + int32_t dstStartYear = 0; + int32_t dstMonth = 0; + int32_t dstDayOfWeek = 0; + int32_t dstWeekInMonth = 0; + int32_t dstMillisInDay = 0; + UDate dstStartTime = 0.0; + UDate dstUntilTime = 0.0; + int32_t dstCount = 0; + AnnualTimeZoneRule *finalDstRule = NULL; + + UnicodeString stdName; + int32_t stdFromOffset = 0; + int32_t stdFromDSTSavings = 0; + int32_t stdToOffset = 0; + int32_t stdStartYear = 0; + int32_t stdMonth = 0; + int32_t stdDayOfWeek = 0; + int32_t stdWeekInMonth = 0; + int32_t stdMillisInDay = 0; + UDate stdStartTime = 0.0; + UDate stdUntilTime = 0.0; + int32_t stdCount = 0; + AnnualTimeZoneRule *finalStdRule = NULL; + + int32_t year, month, dom, dow, doy, mid; + UBool hasTransitions = FALSE; + TimeZoneTransition tzt; + UBool tztAvail; + UnicodeString name; + UBool isDst; + + // Going through all transitions + while (TRUE) { + tztAvail = basictz.getNextTransition(t, FALSE, tzt); + if (!tztAvail) { + break; + } + hasTransitions = TRUE; + t = tzt.getTime(); + tzt.getTo()->getName(name); + isDst = (tzt.getTo()->getDSTSavings() != 0); + int32_t fromOffset = tzt.getFrom()->getRawOffset() + tzt.getFrom()->getDSTSavings(); + int32_t fromDSTSavings = tzt.getFrom()->getDSTSavings(); + int32_t toOffset = tzt.getTo()->getRawOffset() + tzt.getTo()->getDSTSavings(); + Grego::timeToFields(tzt.getTime() + fromOffset, year, month, dom, dow, doy, mid); + int32_t weekInMonth = Grego::dayOfWeekInMonth(year, month, dom); + UBool sameRule = FALSE; + const AnnualTimeZoneRule *atzrule; + if (isDst) { + if (finalDstRule == NULL + && (atzrule = dynamic_cast(tzt.getTo())) != NULL + && atzrule->getEndYear() == AnnualTimeZoneRule::MAX_YEAR + ) { + finalDstRule = atzrule->clone(); + } + if (dstCount > 0) { + if (year == dstStartYear + dstCount + && name.compare(dstName) == 0 + && dstFromOffset == fromOffset + && dstToOffset == toOffset + && dstMonth == month + && dstDayOfWeek == dow + && dstWeekInMonth == weekInMonth + && dstMillisInDay == mid) { + // Update until time + dstUntilTime = t; + dstCount++; + sameRule = TRUE; + } + if (!sameRule) { + if (dstCount == 1) { + writeZonePropsByTime(w, TRUE, dstName, dstFromOffset, dstToOffset, dstStartTime, + TRUE, status); + } else { + writeZonePropsByDOW(w, TRUE, dstName, dstFromOffset, dstToOffset, + dstMonth, dstWeekInMonth, dstDayOfWeek, dstStartTime, dstUntilTime, status); + } + if (U_FAILURE(status)) { + goto cleanupWriteZone; + } + } + } + if (!sameRule) { + // Reset this DST information + dstName = name; + dstFromOffset = fromOffset; + dstFromDSTSavings = fromDSTSavings; + dstToOffset = toOffset; + dstStartYear = year; + dstMonth = month; + dstDayOfWeek = dow; + dstWeekInMonth = weekInMonth; + dstMillisInDay = mid; + dstStartTime = dstUntilTime = t; + dstCount = 1; + } + if (finalStdRule != NULL && finalDstRule != NULL) { + break; + } + } else { + if (finalStdRule == NULL + && (atzrule = dynamic_cast(tzt.getTo())) != NULL + && atzrule->getEndYear() == AnnualTimeZoneRule::MAX_YEAR + ) { + finalStdRule = atzrule->clone(); + } + if (stdCount > 0) { + if (year == stdStartYear + stdCount + && name.compare(stdName) == 0 + && stdFromOffset == fromOffset + && stdToOffset == toOffset + && stdMonth == month + && stdDayOfWeek == dow + && stdWeekInMonth == weekInMonth + && stdMillisInDay == mid) { + // Update until time + stdUntilTime = t; + stdCount++; + sameRule = TRUE; + } + if (!sameRule) { + if (stdCount == 1) { + writeZonePropsByTime(w, FALSE, stdName, stdFromOffset, stdToOffset, stdStartTime, + TRUE, status); + } else { + writeZonePropsByDOW(w, FALSE, stdName, stdFromOffset, stdToOffset, + stdMonth, stdWeekInMonth, stdDayOfWeek, stdStartTime, stdUntilTime, status); + } + if (U_FAILURE(status)) { + goto cleanupWriteZone; + } + } + } + if (!sameRule) { + // Reset this STD information + stdName = name; + stdFromOffset = fromOffset; + stdFromDSTSavings = fromDSTSavings; + stdToOffset = toOffset; + stdStartYear = year; + stdMonth = month; + stdDayOfWeek = dow; + stdWeekInMonth = weekInMonth; + stdMillisInDay = mid; + stdStartTime = stdUntilTime = t; + stdCount = 1; + } + if (finalStdRule != NULL && finalDstRule != NULL) { + break; + } + } + } + if (!hasTransitions) { + // No transition - put a single non transition RDATE + int32_t raw, dst, offset; + basictz.getOffset(0.0/*any time*/, FALSE, raw, dst, status); + if (U_FAILURE(status)) { + goto cleanupWriteZone; + } + offset = raw + dst; + isDst = (dst != 0); + UnicodeString tzid; + basictz.getID(tzid); + getDefaultTZName(tzid, isDst, name); + writeZonePropsByTime(w, isDst, name, + offset, offset, DEF_TZSTARTTIME - offset, FALSE, status); + if (U_FAILURE(status)) { + goto cleanupWriteZone; + } + } else { + if (dstCount > 0) { + if (finalDstRule == NULL) { + if (dstCount == 1) { + writeZonePropsByTime(w, TRUE, dstName, dstFromOffset, dstToOffset, dstStartTime, + TRUE, status); + } else { + writeZonePropsByDOW(w, TRUE, dstName, dstFromOffset, dstToOffset, + dstMonth, dstWeekInMonth, dstDayOfWeek, dstStartTime, dstUntilTime, status); + } + if (U_FAILURE(status)) { + goto cleanupWriteZone; + } + } else { + if (dstCount == 1) { + writeFinalRule(w, TRUE, finalDstRule, + dstFromOffset - dstFromDSTSavings, dstFromDSTSavings, dstStartTime, status); + } else { + // Use a single rule if possible + if (isEquivalentDateRule(dstMonth, dstWeekInMonth, dstDayOfWeek, finalDstRule->getRule())) { + writeZonePropsByDOW(w, TRUE, dstName, dstFromOffset, dstToOffset, + dstMonth, dstWeekInMonth, dstDayOfWeek, dstStartTime, MAX_MILLIS, status); + } else { + // Not equivalent rule - write out two different rules + writeZonePropsByDOW(w, TRUE, dstName, dstFromOffset, dstToOffset, + dstMonth, dstWeekInMonth, dstDayOfWeek, dstStartTime, dstUntilTime, status); + if (U_FAILURE(status)) { + goto cleanupWriteZone; + } + UDate nextStart; + UBool nextStartAvail = finalDstRule->getNextStart(dstUntilTime, dstFromOffset - dstFromDSTSavings, dstFromDSTSavings, false, nextStart); + U_ASSERT(nextStartAvail); + if (nextStartAvail) { + writeFinalRule(w, TRUE, finalDstRule, + dstFromOffset - dstFromDSTSavings, dstFromDSTSavings, nextStart, status); + } + } + } + if (U_FAILURE(status)) { + goto cleanupWriteZone; + } + } + } + if (stdCount > 0) { + if (finalStdRule == NULL) { + if (stdCount == 1) { + writeZonePropsByTime(w, FALSE, stdName, stdFromOffset, stdToOffset, stdStartTime, + TRUE, status); + } else { + writeZonePropsByDOW(w, FALSE, stdName, stdFromOffset, stdToOffset, + stdMonth, stdWeekInMonth, stdDayOfWeek, stdStartTime, stdUntilTime, status); + } + if (U_FAILURE(status)) { + goto cleanupWriteZone; + } + } else { + if (stdCount == 1) { + writeFinalRule(w, FALSE, finalStdRule, + stdFromOffset - stdFromDSTSavings, stdFromDSTSavings, stdStartTime, status); + } else { + // Use a single rule if possible + if (isEquivalentDateRule(stdMonth, stdWeekInMonth, stdDayOfWeek, finalStdRule->getRule())) { + writeZonePropsByDOW(w, FALSE, stdName, stdFromOffset, stdToOffset, + stdMonth, stdWeekInMonth, stdDayOfWeek, stdStartTime, MAX_MILLIS, status); + } else { + // Not equivalent rule - write out two different rules + writeZonePropsByDOW(w, FALSE, stdName, stdFromOffset, stdToOffset, + stdMonth, stdWeekInMonth, stdDayOfWeek, stdStartTime, stdUntilTime, status); + if (U_FAILURE(status)) { + goto cleanupWriteZone; + } + UDate nextStart; + UBool nextStartAvail = finalStdRule->getNextStart(stdUntilTime, stdFromOffset - stdFromDSTSavings, stdFromDSTSavings, false, nextStart); + U_ASSERT(nextStartAvail); + if (nextStartAvail) { + writeFinalRule(w, FALSE, finalStdRule, + stdFromOffset - stdFromDSTSavings, stdFromDSTSavings, nextStart, status); + } + } + } + if (U_FAILURE(status)) { + goto cleanupWriteZone; + } + } + } + } + writeFooter(w, status); + +cleanupWriteZone: + + if (finalStdRule != NULL) { + delete finalStdRule; + } + if (finalDstRule != NULL) { + delete finalDstRule; + } +} + +void +VTimeZone::writeHeaders(VTZWriter& writer, UErrorCode& status) const { + if (U_FAILURE(status)) { + return; + } + UnicodeString tzid; + tz->getID(tzid); + + writer.write(ICAL_BEGIN); + writer.write(COLON); + writer.write(ICAL_VTIMEZONE); + writer.write(ICAL_NEWLINE); + writer.write(ICAL_TZID); + writer.write(COLON); + writer.write(tzid); + writer.write(ICAL_NEWLINE); + if (tzurl.length() != 0) { + writer.write(ICAL_TZURL); + writer.write(COLON); + writer.write(tzurl); + writer.write(ICAL_NEWLINE); + } + if (lastmod != MAX_MILLIS) { + UnicodeString lastmodStr; + writer.write(ICAL_LASTMOD); + writer.write(COLON); + writer.write(getUTCDateTimeString(lastmod, lastmodStr)); + writer.write(ICAL_NEWLINE); + } +} + +/* + * Write the closing section of the VTIMEZONE definition block + */ +void +VTimeZone::writeFooter(VTZWriter& writer, UErrorCode& status) const { + if (U_FAILURE(status)) { + return; + } + writer.write(ICAL_END); + writer.write(COLON); + writer.write(ICAL_VTIMEZONE); + writer.write(ICAL_NEWLINE); +} + +/* + * Write a single start time + */ +void +VTimeZone::writeZonePropsByTime(VTZWriter& writer, UBool isDst, const UnicodeString& zonename, + int32_t fromOffset, int32_t toOffset, UDate time, UBool withRDATE, + UErrorCode& status) const { + if (U_FAILURE(status)) { + return; + } + beginZoneProps(writer, isDst, zonename, fromOffset, toOffset, time, status); + if (U_FAILURE(status)) { + return; + } + if (withRDATE) { + writer.write(ICAL_RDATE); + writer.write(COLON); + UnicodeString timestr; + writer.write(getDateTimeString(time + fromOffset, timestr)); + writer.write(ICAL_NEWLINE); + } + endZoneProps(writer, isDst, status); + if (U_FAILURE(status)) { + return; + } +} + +/* + * Write start times defined by a DOM rule using VTIMEZONE RRULE + */ +void +VTimeZone::writeZonePropsByDOM(VTZWriter& writer, UBool isDst, const UnicodeString& zonename, + int32_t fromOffset, int32_t toOffset, + int32_t month, int32_t dayOfMonth, UDate startTime, UDate untilTime, + UErrorCode& status) const { + if (U_FAILURE(status)) { + return; + } + beginZoneProps(writer, isDst, zonename, fromOffset, toOffset, startTime, status); + if (U_FAILURE(status)) { + return; + } + beginRRULE(writer, month, status); + if (U_FAILURE(status)) { + return; + } + writer.write(ICAL_BYMONTHDAY); + writer.write(EQUALS_SIGN); + UnicodeString dstr; + appendAsciiDigits(dayOfMonth, 0, dstr); + writer.write(dstr); + if (untilTime != MAX_MILLIS) { + appendUNTIL(writer, getDateTimeString(untilTime + fromOffset, dstr), status); + if (U_FAILURE(status)) { + return; + } + } + writer.write(ICAL_NEWLINE); + endZoneProps(writer, isDst, status); +} + +/* + * Write start times defined by a DOW rule using VTIMEZONE RRULE + */ +void +VTimeZone::writeZonePropsByDOW(VTZWriter& writer, UBool isDst, const UnicodeString& zonename, + int32_t fromOffset, int32_t toOffset, + int32_t month, int32_t weekInMonth, int32_t dayOfWeek, + UDate startTime, UDate untilTime, UErrorCode& status) const { + if (U_FAILURE(status)) { + return; + } + beginZoneProps(writer, isDst, zonename, fromOffset, toOffset, startTime, status); + if (U_FAILURE(status)) { + return; + } + beginRRULE(writer, month, status); + if (U_FAILURE(status)) { + return; + } + writer.write(ICAL_BYDAY); + writer.write(EQUALS_SIGN); + UnicodeString dstr; + appendAsciiDigits(weekInMonth, 0, dstr); + writer.write(dstr); // -4, -3, -2, -1, 1, 2, 3, 4 + writer.write(ICAL_DOW_NAMES[dayOfWeek - 1]); // SU, MO, TU... + + if (untilTime != MAX_MILLIS) { + appendUNTIL(writer, getDateTimeString(untilTime + fromOffset, dstr), status); + if (U_FAILURE(status)) { + return; + } + } + writer.write(ICAL_NEWLINE); + endZoneProps(writer, isDst, status); +} + +/* + * Write start times defined by a DOW_GEQ_DOM rule using VTIMEZONE RRULE + */ +void +VTimeZone::writeZonePropsByDOW_GEQ_DOM(VTZWriter& writer, UBool isDst, const UnicodeString& zonename, + int32_t fromOffset, int32_t toOffset, + int32_t month, int32_t dayOfMonth, int32_t dayOfWeek, + UDate startTime, UDate untilTime, UErrorCode& status) const { + if (U_FAILURE(status)) { + return; + } + // Check if this rule can be converted to DOW rule + if (dayOfMonth%7 == 1) { + // Can be represented by DOW rule + writeZonePropsByDOW(writer, isDst, zonename, fromOffset, toOffset, + month, (dayOfMonth + 6)/7, dayOfWeek, startTime, untilTime, status); + if (U_FAILURE(status)) { + return; + } + } else if (month != UCAL_FEBRUARY && (MONTHLENGTH[month] - dayOfMonth)%7 == 6) { + // Can be represented by DOW rule with negative week number + writeZonePropsByDOW(writer, isDst, zonename, fromOffset, toOffset, + month, -1*((MONTHLENGTH[month] - dayOfMonth + 1)/7), dayOfWeek, startTime, untilTime, status); + if (U_FAILURE(status)) { + return; + } + } else { + // Otherwise, use BYMONTHDAY to include all possible dates + beginZoneProps(writer, isDst, zonename, fromOffset, toOffset, startTime, status); + if (U_FAILURE(status)) { + return; + } + // Check if all days are in the same month + int32_t startDay = dayOfMonth; + int32_t currentMonthDays = 7; + + if (dayOfMonth <= 0) { + // The start day is in previous month + int32_t prevMonthDays = 1 - dayOfMonth; + currentMonthDays -= prevMonthDays; + + int32_t prevMonth = (month - 1) < 0 ? 11 : month - 1; + + // Note: When a rule is separated into two, UNTIL attribute needs to be + // calculated for each of them. For now, we skip this, because we basically use this method + // only for final rules, which does not have the UNTIL attribute + writeZonePropsByDOW_GEQ_DOM_sub(writer, prevMonth, -prevMonthDays, dayOfWeek, prevMonthDays, + MAX_MILLIS /* Do not use UNTIL */, fromOffset, status); + if (U_FAILURE(status)) { + return; + } + + // Start from 1 for the rest + startDay = 1; + } else if (dayOfMonth + 6 > MONTHLENGTH[month]) { + // Note: This code does not actually work well in February. For now, days in month in + // non-leap year. + int32_t nextMonthDays = dayOfMonth + 6 - MONTHLENGTH[month]; + currentMonthDays -= nextMonthDays; + + int32_t nextMonth = (month + 1) > 11 ? 0 : month + 1; + + writeZonePropsByDOW_GEQ_DOM_sub(writer, nextMonth, 1, dayOfWeek, nextMonthDays, + MAX_MILLIS /* Do not use UNTIL */, fromOffset, status); + if (U_FAILURE(status)) { + return; + } + } + writeZonePropsByDOW_GEQ_DOM_sub(writer, month, startDay, dayOfWeek, currentMonthDays, + untilTime, fromOffset, status); + if (U_FAILURE(status)) { + return; + } + endZoneProps(writer, isDst, status); + } +} + +/* + * Called from writeZonePropsByDOW_GEQ_DOM + */ +void +VTimeZone::writeZonePropsByDOW_GEQ_DOM_sub(VTZWriter& writer, int32_t month, int32_t dayOfMonth, + int32_t dayOfWeek, int32_t numDays, + UDate untilTime, int32_t fromOffset, UErrorCode& status) const { + + if (U_FAILURE(status)) { + return; + } + int32_t startDayNum = dayOfMonth; + UBool isFeb = (month == UCAL_FEBRUARY); + if (dayOfMonth < 0 && !isFeb) { + // Use positive number if possible + startDayNum = MONTHLENGTH[month] + dayOfMonth + 1; + } + beginRRULE(writer, month, status); + if (U_FAILURE(status)) { + return; + } + writer.write(ICAL_BYDAY); + writer.write(EQUALS_SIGN); + writer.write(ICAL_DOW_NAMES[dayOfWeek - 1]); // SU, MO, TU... + writer.write(SEMICOLON); + writer.write(ICAL_BYMONTHDAY); + writer.write(EQUALS_SIGN); + + UnicodeString dstr; + appendAsciiDigits(startDayNum, 0, dstr); + writer.write(dstr); + for (int32_t i = 1; i < numDays; i++) { + writer.write(COMMA); + dstr.remove(); + appendAsciiDigits(startDayNum + i, 0, dstr); + writer.write(dstr); + } + + if (untilTime != MAX_MILLIS) { + appendUNTIL(writer, getDateTimeString(untilTime + fromOffset, dstr), status); + if (U_FAILURE(status)) { + return; + } + } + writer.write(ICAL_NEWLINE); +} + +/* + * Write start times defined by a DOW_LEQ_DOM rule using VTIMEZONE RRULE + */ +void +VTimeZone::writeZonePropsByDOW_LEQ_DOM(VTZWriter& writer, UBool isDst, const UnicodeString& zonename, + int32_t fromOffset, int32_t toOffset, + int32_t month, int32_t dayOfMonth, int32_t dayOfWeek, + UDate startTime, UDate untilTime, UErrorCode& status) const { + if (U_FAILURE(status)) { + return; + } + // Check if this rule can be converted to DOW rule + if (dayOfMonth%7 == 0) { + // Can be represented by DOW rule + writeZonePropsByDOW(writer, isDst, zonename, fromOffset, toOffset, + month, dayOfMonth/7, dayOfWeek, startTime, untilTime, status); + } else if (month != UCAL_FEBRUARY && (MONTHLENGTH[month] - dayOfMonth)%7 == 0){ + // Can be represented by DOW rule with negative week number + writeZonePropsByDOW(writer, isDst, zonename, fromOffset, toOffset, + month, -1*((MONTHLENGTH[month] - dayOfMonth)/7 + 1), dayOfWeek, startTime, untilTime, status); + } else if (month == UCAL_FEBRUARY && dayOfMonth == 29) { + // Specical case for February + writeZonePropsByDOW(writer, isDst, zonename, fromOffset, toOffset, + UCAL_FEBRUARY, -1, dayOfWeek, startTime, untilTime, status); + } else { + // Otherwise, convert this to DOW_GEQ_DOM rule + writeZonePropsByDOW_GEQ_DOM(writer, isDst, zonename, fromOffset, toOffset, + month, dayOfMonth - 6, dayOfWeek, startTime, untilTime, status); + } +} + +/* + * Write the final time zone rule using RRULE, with no UNTIL attribute + */ +void +VTimeZone::writeFinalRule(VTZWriter& writer, UBool isDst, const AnnualTimeZoneRule* rule, + int32_t fromRawOffset, int32_t fromDSTSavings, + UDate startTime, UErrorCode& status) const { + if (U_FAILURE(status)) { + return; + } + UBool modifiedRule = TRUE; + const DateTimeRule *dtrule = toWallTimeRule(rule->getRule(), fromRawOffset, fromDSTSavings); + if (dtrule == NULL) { + modifiedRule = FALSE; + dtrule = rule->getRule(); + } + + // If the rule's mills in a day is out of range, adjust start time. + // Olson tzdata supports 24:00 of a day, but VTIMEZONE does not. + // See ticket#7008/#7518 + + int32_t timeInDay = dtrule->getRuleMillisInDay(); + if (timeInDay < 0) { + startTime = startTime + (0 - timeInDay); + } else if (timeInDay >= U_MILLIS_PER_DAY) { + startTime = startTime - (timeInDay - (U_MILLIS_PER_DAY - 1)); + } + + int32_t toOffset = rule->getRawOffset() + rule->getDSTSavings(); + UnicodeString name; + rule->getName(name); + switch (dtrule->getDateRuleType()) { + case DateTimeRule::DOM: + writeZonePropsByDOM(writer, isDst, name, fromRawOffset + fromDSTSavings, toOffset, + dtrule->getRuleMonth(), dtrule->getRuleDayOfMonth(), startTime, MAX_MILLIS, status); + break; + case DateTimeRule::DOW: + writeZonePropsByDOW(writer, isDst, name, fromRawOffset + fromDSTSavings, toOffset, + dtrule->getRuleMonth(), dtrule->getRuleWeekInMonth(), dtrule->getRuleDayOfWeek(), startTime, MAX_MILLIS, status); + break; + case DateTimeRule::DOW_GEQ_DOM: + writeZonePropsByDOW_GEQ_DOM(writer, isDst, name, fromRawOffset + fromDSTSavings, toOffset, + dtrule->getRuleMonth(), dtrule->getRuleDayOfMonth(), dtrule->getRuleDayOfWeek(), startTime, MAX_MILLIS, status); + break; + case DateTimeRule::DOW_LEQ_DOM: + writeZonePropsByDOW_LEQ_DOM(writer, isDst, name, fromRawOffset + fromDSTSavings, toOffset, + dtrule->getRuleMonth(), dtrule->getRuleDayOfMonth(), dtrule->getRuleDayOfWeek(), startTime, MAX_MILLIS, status); + break; + } + if (modifiedRule) { + delete dtrule; + } +} + +/* + * Write the opening section of zone properties + */ +void +VTimeZone::beginZoneProps(VTZWriter& writer, UBool isDst, const UnicodeString& zonename, + int32_t fromOffset, int32_t toOffset, UDate startTime, UErrorCode& status) const { + if (U_FAILURE(status)) { + return; + } + writer.write(ICAL_BEGIN); + writer.write(COLON); + if (isDst) { + writer.write(ICAL_DAYLIGHT); + } else { + writer.write(ICAL_STANDARD); + } + writer.write(ICAL_NEWLINE); + + UnicodeString dstr; + + // TZOFFSETTO + writer.write(ICAL_TZOFFSETTO); + writer.write(COLON); + millisToOffset(toOffset, dstr); + writer.write(dstr); + writer.write(ICAL_NEWLINE); + + // TZOFFSETFROM + writer.write(ICAL_TZOFFSETFROM); + writer.write(COLON); + millisToOffset(fromOffset, dstr); + writer.write(dstr); + writer.write(ICAL_NEWLINE); + + // TZNAME + writer.write(ICAL_TZNAME); + writer.write(COLON); + writer.write(zonename); + writer.write(ICAL_NEWLINE); + + // DTSTART + writer.write(ICAL_DTSTART); + writer.write(COLON); + writer.write(getDateTimeString(startTime + fromOffset, dstr)); + writer.write(ICAL_NEWLINE); +} + +/* + * Writes the closing section of zone properties + */ +void +VTimeZone::endZoneProps(VTZWriter& writer, UBool isDst, UErrorCode& status) const { + if (U_FAILURE(status)) { + return; + } + // END:STANDARD or END:DAYLIGHT + writer.write(ICAL_END); + writer.write(COLON); + if (isDst) { + writer.write(ICAL_DAYLIGHT); + } else { + writer.write(ICAL_STANDARD); + } + writer.write(ICAL_NEWLINE); +} + +/* + * Write the beggining part of RRULE line + */ +void +VTimeZone::beginRRULE(VTZWriter& writer, int32_t month, UErrorCode& status) const { + if (U_FAILURE(status)) { + return; + } + UnicodeString dstr; + writer.write(ICAL_RRULE); + writer.write(COLON); + writer.write(ICAL_FREQ); + writer.write(EQUALS_SIGN); + writer.write(ICAL_YEARLY); + writer.write(SEMICOLON); + writer.write(ICAL_BYMONTH); + writer.write(EQUALS_SIGN); + appendAsciiDigits(month + 1, 0, dstr); + writer.write(dstr); + writer.write(SEMICOLON); +} + +/* + * Append the UNTIL attribute after RRULE line + */ +void +VTimeZone::appendUNTIL(VTZWriter& writer, const UnicodeString& until, UErrorCode& status) const { + if (U_FAILURE(status)) { + return; + } + if (until.length() > 0) { + writer.write(SEMICOLON); + writer.write(ICAL_UNTIL); + writer.write(EQUALS_SIGN); + writer.write(until); + } +} + +U_NAMESPACE_END + +#endif /* #if !UCONFIG_NO_FORMATTING */ + +//eof diff --git a/libicui18n/libicui18n/vtzone.cpp.patch b/libicui18n/libicui18n/vtzone.cpp.patch new file mode 100644 index 0000000..d7cff87 --- /dev/null +++ b/libicui18n/libicui18n/vtzone.cpp.patch @@ -0,0 +1,11 @@ +--- libicui18n/i18n/vtzone.cpp 2019-12-23 14:38:40.254889181 +0300 ++++ libicui18n/vtzone.cpp 2020-07-21 14:18:41.380332586 +0300 +@@ -1000,7 +1000,7 @@ VTimeZone::operator=(const VTimeZone& ri + if (this == &right) { + return *this; + } +- if (*this != right) { ++ if (!(*this == right)) { + BasicTimeZone::operator=(right); + if (tz != NULL) { + delete tz; diff --git a/libicui18n/manifest b/libicui18n/manifest index ceb84e6..5bb6a95 100644 --- a/libicui18n/manifest +++ b/libicui18n/manifest @@ -1,10 +1,10 @@ : 1 name: libicui18n -version: 65.1.0+3 +version: 65.1.0+4 upstream-version: 65.1 project: icu summary: ICU high-level internationalization C/C++ library -license: Unicode, BSD3, BSD2 ; Unicode for the most of original files. +license: Unicode-DFS-2016 AND BSD-3-Clause AND BSD-2-Clause ; Unicode License Agreement for the most of original files. topics: C, C++, Unicode, internationalization description-file: README url: http://site.icu-project.org/ @@ -13,7 +13,7 @@ src-url: https://git.build2.org/cgit/packaging/icu/icu/tree/libicui18n/ package-url: https://git.build2.org/cgit/packaging/icu/ email: icu-support@lists.sourceforge.net ; Mailing list. package-email: packaging@build2.org ; Mailing list. -build-email: builds@build2.org +build-error-email: builds@build2.org builds: all depends: * build2 >= 0.12.0 depends: * bpkg >= 0.12.0 diff --git a/libicuio/libicuio/buildfile b/libicuio/libicuio/buildfile index 8d5da32..1334d2f 100644 --- a/libicuio/libicuio/buildfile +++ b/libicuio/libicuio/buildfile @@ -1,10 +1,10 @@ # file : libicuio/buildfile # license : Unicode License; see accompanying LICENSE file -import int_libs = libicui18n%lib{icui18n} -import int_libs += libicuuc%lib{icuuc} +import intf_libs = libicui18n%lib{icui18n} +import intf_libs += libicuuc%lib{icuuc} -lib{icuio}: {hxx cxx}{**} $int_libs +lib{icuio}: {hxx cxx}{**} $intf_libs tclass = $cxx.target.class tsys = $cxx.target.system @@ -91,7 +91,7 @@ switch $tclass, $tsys lib{icuio}: { cc.export.poptions = "-I$src_base/io" - cc.export.libs = $int_libs + cc.export.libs = $intf_libs } liba{icuio}: cc.export.poptions += -DU_STATIC_IMPLEMENTATION diff --git a/libicuio/manifest b/libicuio/manifest index 4261cdd..51590c3 100644 --- a/libicuio/manifest +++ b/libicuio/manifest @@ -1,10 +1,10 @@ : 1 name: libicuio -version: 65.1.0+3 +version: 65.1.0+4 upstream-version: 65.1 project: icu summary: ICU input/output C/C++ library -license: Unicode, BSD3, BSD2 ; Unicode for the most of original files. +license: Unicode-DFS-2016 AND BSD-3-Clause AND BSD-2-Clause ; Unicode License Agreement for the most of original files. topics: C, C++, Unicode, internationalization, input/output description-file: README url: http://site.icu-project.org/ diff --git a/libicuuc/README-DEV b/libicuuc/README-DEV index daebd4e..281a901 100644 --- a/libicuuc/README-DEV +++ b/libicuuc/README-DEV @@ -20,6 +20,11 @@ upstream's fix (commit b7d08bc04a4296982fcef8b6b8a354a9e4e7afca) as a base $ cp libicu/uc/unistr.cpp libicu $ patch -p0 all valid values. 110000 for codepoints +#define UNICODESET_HIGH 0x0110000 + +// LOW <= all valid values. ZERO for codepoints +#define UNICODESET_LOW 0x000000 + +/** Max list [0, 1, 2, ..., max code point, HIGH] */ +constexpr int32_t MAX_LENGTH = UNICODESET_HIGH + 1; + +U_NAMESPACE_BEGIN + +SymbolTable::~SymbolTable() {} + +UOBJECT_DEFINE_RTTI_IMPLEMENTATION(UnicodeSet) + +/** + * Modify the given UChar32 variable so that it is in range, by + * pinning values < UNICODESET_LOW to UNICODESET_LOW, and + * pinning values > UNICODESET_HIGH-1 to UNICODESET_HIGH-1. + * It modifies its argument in-place and also returns it. + */ +static inline UChar32 pinCodePoint(UChar32& c) { + if (c < UNICODESET_LOW) { + c = UNICODESET_LOW; + } else if (c > (UNICODESET_HIGH-1)) { + c = (UNICODESET_HIGH-1); + } + return c; +} + +//---------------------------------------------------------------- +// Debugging +//---------------------------------------------------------------- + +// DO NOT DELETE THIS CODE. This code is used to debug memory leaks. +// To enable the debugging, define the symbol DEBUG_MEM in the line +// below. This will result in text being sent to stdout that looks +// like this: +// DEBUG UnicodeSet: ct 0x00A39B20; 397 [\u0A81-\u0A83\u0A85- +// DEBUG UnicodeSet: dt 0x00A39B20; 396 [\u0A81-\u0A83\u0A85- +// Each line lists a construction (ct) or destruction (dt) event, the +// object address, the number of outstanding objects after the event, +// and the pattern of the object in question. + +// #define DEBUG_MEM + +#ifdef DEBUG_MEM +#include +static int32_t _dbgCount = 0; + +static inline void _dbgct(UnicodeSet* set) { + UnicodeString str; + set->toPattern(str, TRUE); + char buf[40]; + str.extract(0, 39, buf, ""); + printf("DEBUG UnicodeSet: ct 0x%08X; %d %s\n", set, ++_dbgCount, buf); +} + +static inline void _dbgdt(UnicodeSet* set) { + UnicodeString str; + set->toPattern(str, TRUE); + char buf[40]; + str.extract(0, 39, buf, ""); + printf("DEBUG UnicodeSet: dt 0x%08X; %d %s\n", set, --_dbgCount, buf); +} + +#else + +#define _dbgct(set) +#define _dbgdt(set) + +#endif + +//---------------------------------------------------------------- +// UnicodeString in UVector support +//---------------------------------------------------------------- + +static void U_CALLCONV cloneUnicodeString(UElement *dst, UElement *src) { + dst->pointer = new UnicodeString(*(UnicodeString*)src->pointer); +} + +static int8_t U_CALLCONV compareUnicodeString(UElement t1, UElement t2) { + const UnicodeString &a = *(const UnicodeString*)t1.pointer; + const UnicodeString &b = *(const UnicodeString*)t2.pointer; + return a.compare(b); +} + +UBool UnicodeSet::hasStrings() const { + return strings != nullptr && !strings->isEmpty(); +} + +int32_t UnicodeSet::stringsSize() const { + return strings == nullptr ? 0 : strings->size(); +} + +UBool UnicodeSet::stringsContains(const UnicodeString &s) const { + return strings != nullptr && strings->contains((void*) &s); +} + +//---------------------------------------------------------------- +// Constructors &c +//---------------------------------------------------------------- + +/** + * Constructs an empty set. + */ +UnicodeSet::UnicodeSet() { + list[0] = UNICODESET_HIGH; + _dbgct(this); +} + +/** + * Constructs a set containing the given range. If end > + * start then an empty set is created. + * + * @param start first character, inclusive, of range + * @param end last character, inclusive, of range + */ +UnicodeSet::UnicodeSet(UChar32 start, UChar32 end) { + list[0] = UNICODESET_HIGH; + add(start, end); + _dbgct(this); +} + +/** + * Constructs a set that is identical to the given UnicodeSet. + */ +UnicodeSet::UnicodeSet(const UnicodeSet& o) : UnicodeFilter(o) { + *this = o; + _dbgct(this); +} + +// Copy-construct as thawed. +UnicodeSet::UnicodeSet(const UnicodeSet& o, UBool /* asThawed */) : UnicodeFilter(o) { + if (ensureCapacity(o.len)) { + // *this = o except for bmpSet and stringSpan + len = o.len; + uprv_memcpy(list, o.list, (size_t)len*sizeof(UChar32)); + if (o.hasStrings()) { + UErrorCode status = U_ZERO_ERROR; + if (!allocateStrings(status) || + (strings->assign(*o.strings, cloneUnicodeString, status), U_FAILURE(status))) { + setToBogus(); + return; + } + } + if (o.pat) { + setPattern(o.pat, o.patLen); + } + _dbgct(this); + } +} + +/** + * Destructs the set. + */ +UnicodeSet::~UnicodeSet() { + _dbgdt(this); // first! + if (list != stackList) { + uprv_free(list); + } + delete bmpSet; + if (buffer != stackList) { + uprv_free(buffer); + } + delete strings; + delete stringSpan; + releasePattern(); +} + +/** + * Assigns this object to be a copy of another. + */ +UnicodeSet& UnicodeSet::operator=(const UnicodeSet& o) { + return copyFrom(o, FALSE); +} + +UnicodeSet& UnicodeSet::copyFrom(const UnicodeSet& o, UBool asThawed) { + if (this == &o) { + return *this; + } + if (isFrozen()) { + return *this; + } + if (o.isBogus()) { + setToBogus(); + return *this; + } + if (!ensureCapacity(o.len)) { + // ensureCapacity will mark the UnicodeSet as Bogus if OOM failure happens. + return *this; + } + len = o.len; + uprv_memcpy(list, o.list, (size_t)len*sizeof(UChar32)); + if (o.bmpSet != nullptr && !asThawed) { + bmpSet = new BMPSet(*o.bmpSet, list, len); + if (bmpSet == NULL) { // Check for memory allocation error. + setToBogus(); + return *this; + } + } + if (o.hasStrings()) { + UErrorCode status = U_ZERO_ERROR; + if ((strings == nullptr && !allocateStrings(status)) || + (strings->assign(*o.strings, cloneUnicodeString, status), U_FAILURE(status))) { + setToBogus(); + return *this; + } + } else if (hasStrings()) { + strings->removeAllElements(); + } + if (o.stringSpan != nullptr && !asThawed) { + stringSpan = new UnicodeSetStringSpan(*o.stringSpan, *strings); + if (stringSpan == NULL) { // Check for memory allocation error. + setToBogus(); + return *this; + } + } + releasePattern(); + if (o.pat) { + setPattern(o.pat, o.patLen); + } + return *this; +} + +/** + * Returns a copy of this object. All UnicodeMatcher objects have + * to support cloning in order to allow classes using + * UnicodeMatchers, such as Transliterator, to implement cloning. + */ +UnicodeSet* UnicodeSet::clone() const { + return new UnicodeSet(*this); +} + +UnicodeSet *UnicodeSet::cloneAsThawed() const { + return new UnicodeSet(*this, TRUE); +} + +/** + * Compares the specified object with this set for equality. Returns + * true if the two sets + * have the same size, and every member of the specified set is + * contained in this set (or equivalently, every member of this set is + * contained in the specified set). + * + * @param o set to be compared for equality with this set. + * @return true if the specified set is equal to this set. + */ +UBool UnicodeSet::operator==(const UnicodeSet& o) const { + if (len != o.len) return FALSE; + for (int32_t i = 0; i < len; ++i) { + if (list[i] != o.list[i]) return FALSE; + } + if (hasStrings() != o.hasStrings()) { return FALSE; } + if (hasStrings() && !(*strings == *o.strings)) return FALSE; + return TRUE; +} + +/** + * Returns the hash code value for this set. + * + * @return the hash code value for this set. + * @see Object#hashCode() + */ +int32_t UnicodeSet::hashCode(void) const { + uint32_t result = static_cast(len); + for (int32_t i = 0; i < len; ++i) { + result *= 1000003u; + result += list[i]; + } + return static_cast(result); +} + +//---------------------------------------------------------------- +// Public API +//---------------------------------------------------------------- + +/** + * Returns the number of elements in this set (its cardinality), + * Note than the elements of a set may include both individual + * codepoints and strings. + * + * @return the number of elements in this set (its cardinality). + */ +int32_t UnicodeSet::size(void) const { + int32_t n = 0; + int32_t count = getRangeCount(); + for (int32_t i = 0; i < count; ++i) { + n += getRangeEnd(i) - getRangeStart(i) + 1; + } + return n + stringsSize(); +} + +/** + * Returns true if this set contains no elements. + * + * @return true if this set contains no elements. + */ +UBool UnicodeSet::isEmpty(void) const { + return len == 1 && !hasStrings(); +} + +/** + * Returns true if this set contains the given character. + * @param c character to be checked for containment + * @return true if the test condition is met + */ +UBool UnicodeSet::contains(UChar32 c) const { + // Set i to the index of the start item greater than ch + // We know we will terminate without length test! + // LATER: for large sets, add binary search + //int32_t i = -1; + //for (;;) { + // if (c < list[++i]) break; + //} + if (bmpSet != NULL) { + return bmpSet->contains(c); + } + if (stringSpan != NULL) { + return stringSpan->contains(c); + } + if (c >= UNICODESET_HIGH) { // Don't need to check LOW bound + return FALSE; + } + int32_t i = findCodePoint(c); + return (UBool)(i & 1); // return true if odd +} + +/** + * Returns the smallest value i such that c < list[i]. Caller + * must ensure that c is a legal value or this method will enter + * an infinite loop. This method performs a binary search. + * @param c a character in the range MIN_VALUE..MAX_VALUE + * inclusive + * @return the smallest integer i in the range 0..len-1, + * inclusive, such that c < list[i] + */ +int32_t UnicodeSet::findCodePoint(UChar32 c) const { + /* Examples: + findCodePoint(c) + set list[] c=0 1 3 4 7 8 + === ============== =========== + [] [110000] 0 0 0 0 0 0 + [\u0000-\u0003] [0, 4, 110000] 1 1 1 2 2 2 + [\u0004-\u0007] [4, 8, 110000] 0 0 0 1 1 2 + [:Any:] [0, 110000] 1 1 1 1 1 1 + */ + + // Return the smallest i such that c < list[i]. Assume + // list[len - 1] == HIGH and that c is legal (0..HIGH-1). + if (c < list[0]) + return 0; + // High runner test. c is often after the last range, so an + // initial check for this condition pays off. + int32_t lo = 0; + int32_t hi = len - 1; + if (lo >= hi || c >= list[hi-1]) + return hi; + // invariant: c >= list[lo] + // invariant: c < list[hi] + for (;;) { + int32_t i = (lo + hi) >> 1; + if (i == lo) { + break; // Found! + } else if (c < list[i]) { + hi = i; + } else { + lo = i; + } + } + return hi; +} + +/** + * Returns true if this set contains every character + * of the given range. + * @param start first character, inclusive, of the range + * @param end last character, inclusive, of the range + * @return true if the test condition is met + */ +UBool UnicodeSet::contains(UChar32 start, UChar32 end) const { + //int32_t i = -1; + //for (;;) { + // if (start < list[++i]) break; + //} + int32_t i = findCodePoint(start); + return ((i & 1) != 0 && end < list[i]); +} + +/** + * Returns true if this set contains the given + * multicharacter string. + * @param s string to be checked for containment + * @return true if this set contains the specified string + */ +UBool UnicodeSet::contains(const UnicodeString& s) const { + if (s.length() == 0) return FALSE; + int32_t cp = getSingleCP(s); + if (cp < 0) { + return stringsContains(s); + } else { + return contains((UChar32) cp); + } +} + +/** + * Returns true if this set contains all the characters and strings + * of the given set. + * @param c set to be checked for containment + * @return true if the test condition is met + */ +UBool UnicodeSet::containsAll(const UnicodeSet& c) const { + // The specified set is a subset if all of its pairs are contained in + // this set. It's possible to code this more efficiently in terms of + // direct manipulation of the inversion lists if the need arises. + int32_t n = c.getRangeCount(); + for (int i=0; icontainsAll(*c.strings)); +} + +/** + * Returns true if this set contains all the characters + * of the given string. + * @param s string containing characters to be checked for containment + * @return true if the test condition is met + */ +UBool UnicodeSet::containsAll(const UnicodeString& s) const { + return (UBool)(span(s.getBuffer(), s.length(), USET_SPAN_CONTAINED) == + s.length()); +} + +/** + * Returns true if this set contains none of the characters + * of the given range. + * @param start first character, inclusive, of the range + * @param end last character, inclusive, of the range + * @return true if the test condition is met + */ +UBool UnicodeSet::containsNone(UChar32 start, UChar32 end) const { + //int32_t i = -1; + //for (;;) { + // if (start < list[++i]) break; + //} + int32_t i = findCodePoint(start); + return ((i & 1) == 0 && end < list[i]); +} + +/** + * Returns true if this set contains none of the characters and strings + * of the given set. + * @param c set to be checked for containment + * @return true if the test condition is met + */ +UBool UnicodeSet::containsNone(const UnicodeSet& c) const { + // The specified set is a subset if all of its pairs are contained in + // this set. It's possible to code this more efficiently in terms of + // direct manipulation of the inversion lists if the need arises. + int32_t n = c.getRangeCount(); + for (int32_t i=0; icontainsNone(*c.strings); +} + +/** + * Returns true if this set contains none of the characters + * of the given string. + * @param s string containing characters to be checked for containment + * @return true if the test condition is met + */ +UBool UnicodeSet::containsNone(const UnicodeString& s) const { + return (UBool)(span(s.getBuffer(), s.length(), USET_SPAN_NOT_CONTAINED) == + s.length()); +} + +/** + * Returns true if this set contains any character whose low byte + * is the given value. This is used by RuleBasedTransliterator for + * indexing. + */ +UBool UnicodeSet::matchesIndexValue(uint8_t v) const { + /* The index value v, in the range [0,255], is contained in this set if + * it is contained in any pair of this set. Pairs either have the high + * bytes equal, or unequal. If the high bytes are equal, then we have + * aaxx..aayy, where aa is the high byte. Then v is contained if xx <= + * v <= yy. If the high bytes are unequal we have aaxx..bbyy, bb>aa. + * Then v is contained if xx <= v || v <= yy. (This is identical to the + * time zone month containment logic.) + */ + int32_t i; + int32_t rangeCount=getRangeCount(); + for (i=0; isize(); ++i) { + const UnicodeString& s = *(const UnicodeString*)strings->elementAt(i); + //if (s.length() == 0) { + // // Empty strings match everything + // return TRUE; + //} + // assert(s.length() != 0); // We enforce this elsewhere + UChar32 c = s.char32At(0); + if ((c & 0xFF) == v) { + return TRUE; + } + } + } + return FALSE; +} + +/** + * Implementation of UnicodeMatcher::matches(). Always matches the + * longest possible multichar string. + */ +UMatchDegree UnicodeSet::matches(const Replaceable& text, + int32_t& offset, + int32_t limit, + UBool incremental) { + if (offset == limit) { + // Strings, if any, have length != 0, so we don't worry + // about them here. If we ever allow zero-length strings + // we much check for them here. + if (contains(U_ETHER)) { + return incremental ? U_PARTIAL_MATCH : U_MATCH; + } else { + return U_MISMATCH; + } + } else { + if (hasStrings()) { // try strings first + + // might separate forward and backward loops later + // for now they are combined + + // TODO Improve efficiency of this, at least in the forward + // direction, if not in both. In the forward direction we + // can assume the strings are sorted. + + int32_t i; + UBool forward = offset < limit; + + // firstChar is the leftmost char to match in the + // forward direction or the rightmost char to match in + // the reverse direction. + UChar firstChar = text.charAt(offset); + + // If there are multiple strings that can match we + // return the longest match. + int32_t highWaterLength = 0; + + for (i=0; isize(); ++i) { + const UnicodeString& trial = *(const UnicodeString*)strings->elementAt(i); + + //if (trial.length() == 0) { + // return U_MATCH; // null-string always matches + //} + // assert(trial.length() != 0); // We ensure this elsewhere + + UChar c = trial.charAt(forward ? 0 : trial.length() - 1); + + // Strings are sorted, so we can optimize in the + // forward direction. + if (forward && c > firstChar) break; + if (c != firstChar) continue; + + int32_t matchLen = matchRest(text, offset, limit, trial); + + if (incremental) { + int32_t maxLen = forward ? limit-offset : offset-limit; + if (matchLen == maxLen) { + // We have successfully matched but only up to limit. + return U_PARTIAL_MATCH; + } + } + + if (matchLen == trial.length()) { + // We have successfully matched the whole string. + if (matchLen > highWaterLength) { + highWaterLength = matchLen; + } + // In the forward direction we know strings + // are sorted so we can bail early. + if (forward && matchLen < highWaterLength) { + break; + } + continue; + } + } + + // We've checked all strings without a partial match. + // If we have full matches, return the longest one. + if (highWaterLength != 0) { + offset += forward ? highWaterLength : -highWaterLength; + return U_MATCH; + } + } + return UnicodeFilter::matches(text, offset, limit, incremental); + } +} + +/** + * Returns the longest match for s in text at the given position. + * If limit > start then match forward from start+1 to limit + * matching all characters except s.charAt(0). If limit < start, + * go backward starting from start-1 matching all characters + * except s.charAt(s.length()-1). This method assumes that the + * first character, text.charAt(start), matches s, so it does not + * check it. + * @param text the text to match + * @param start the first character to match. In the forward + * direction, text.charAt(start) is matched against s.charAt(0). + * In the reverse direction, it is matched against + * s.charAt(s.length()-1). + * @param limit the limit offset for matching, either last+1 in + * the forward direction, or last-1 in the reverse direction, + * where last is the index of the last character to match. + * @return If part of s matches up to the limit, return |limit - + * start|. If all of s matches before reaching the limit, return + * s.length(). If there is a mismatch between s and text, return + * 0 + */ +int32_t UnicodeSet::matchRest(const Replaceable& text, + int32_t start, int32_t limit, + const UnicodeString& s) { + int32_t i; + int32_t maxLen; + int32_t slen = s.length(); + if (start < limit) { + maxLen = limit - start; + if (maxLen > slen) maxLen = slen; + for (i = 1; i < maxLen; ++i) { + if (text.charAt(start + i) != s.charAt(i)) return 0; + } + } else { + maxLen = start - limit; + if (maxLen > slen) maxLen = slen; + --slen; // <=> slen = s.length() - 1; + for (i = 1; i < maxLen; ++i) { + if (text.charAt(start - i) != s.charAt(slen - i)) return 0; + } + } + return maxLen; +} + +/** + * Implement of UnicodeMatcher + */ +void UnicodeSet::addMatchSetTo(UnicodeSet& toUnionTo) const { + toUnionTo.addAll(*this); +} + +/** + * Returns the index of the given character within this set, where + * the set is ordered by ascending code point. If the character + * is not in this set, return -1. The inverse of this method is + * charAt(). + * @return an index from 0..size()-1, or -1 + */ +int32_t UnicodeSet::indexOf(UChar32 c) const { + if (c < MIN_VALUE || c > MAX_VALUE) { + return -1; + } + int32_t i = 0; + int32_t n = 0; + for (;;) { + UChar32 start = list[i++]; + if (c < start) { + return -1; + } + UChar32 limit = list[i++]; + if (c < limit) { + return n + c - start; + } + n += limit - start; + } +} + +/** + * Returns the character at the given index within this set, where + * the set is ordered by ascending code point. If the index is + * out of range, return (UChar32)-1. The inverse of this method is + * indexOf(). + * @param index an index from 0..size()-1 + * @return the character at the given index, or (UChar32)-1. + */ +UChar32 UnicodeSet::charAt(int32_t index) const { + if (index >= 0) { + // len2 is the largest even integer <= len, that is, it is len + // for even values and len-1 for odd values. With odd values + // the last entry is UNICODESET_HIGH. + int32_t len2 = len & ~1; + for (int32_t i=0; i < len2;) { + UChar32 start = list[i++]; + int32_t count = list[i++] - start; + if (index < count) { + return (UChar32)(start + index); + } + index -= count; + } + } + return (UChar32)-1; +} + +/** + * Make this object represent the range start - end. + * If end > start then this object is set to an + * an empty range. + * + * @param start first character in the set, inclusive + * @rparam end last character in the set, inclusive + */ +UnicodeSet& UnicodeSet::set(UChar32 start, UChar32 end) { + clear(); + complement(start, end); + return *this; +} + +/** + * Adds the specified range to this set if it is not already + * present. If this set already contains the specified range, + * the call leaves this set unchanged. If end > start + * then an empty range is added, leaving the set unchanged. + * + * @param start first character, inclusive, of range to be added + * to this set. + * @param end last character, inclusive, of range to be added + * to this set. + */ +UnicodeSet& UnicodeSet::add(UChar32 start, UChar32 end) { + if (pinCodePoint(start) < pinCodePoint(end)) { + UChar32 limit = end + 1; + // Fast path for adding a new range after the last one. + // Odd list length: [..., lastStart, lastLimit, HIGH] + if ((len & 1) != 0) { + // If the list is empty, set lastLimit low enough to not be adjacent to 0. + UChar32 lastLimit = len == 1 ? -2 : list[len - 2]; + if (lastLimit <= start && !isFrozen() && !isBogus()) { + if (lastLimit == start) { + // Extend the last range. + list[len - 2] = limit; + if (limit == UNICODESET_HIGH) { + --len; + } + } else { + list[len - 1] = start; + if (limit < UNICODESET_HIGH) { + if (ensureCapacity(len + 2)) { + list[len++] = limit; + list[len++] = UNICODESET_HIGH; + } + } else { // limit == UNICODESET_HIGH + if (ensureCapacity(len + 1)) { + list[len++] = UNICODESET_HIGH; + } + } + } + releasePattern(); + return *this; + } + } + // This is slow. Could be much faster using findCodePoint(start) + // and modifying the list, dealing with adjacent & overlapping ranges. + UChar32 range[3] = { start, limit, UNICODESET_HIGH }; + add(range, 2, 0); + } else if (start == end) { + add(start); + } + return *this; +} + +// #define DEBUG_US_ADD + +#ifdef DEBUG_US_ADD +#include +void dump(UChar32 c) { + if (c <= 0xFF) { + printf("%c", (char)c); + } else { + printf("U+%04X", c); + } +} +void dump(const UChar32* list, int32_t len) { + printf("["); + for (int32_t i=0; i "); +#endif + + if (c == list[i]-1) { + // c is before start of next range + list[i] = c; + // if we touched the HIGH mark, then add a new one + if (c == (UNICODESET_HIGH - 1)) { + if (!ensureCapacity(len+1)) { + // ensureCapacity will mark the object as Bogus if OOM failure happens. + return *this; + } + list[len++] = UNICODESET_HIGH; + } + if (i > 0 && c == list[i-1]) { + // collapse adjacent ranges + + // [..., start_k-1, c, c, limit_k, ..., HIGH] + // ^ + // list[i] + + //for (int32_t k=i-1; k 0 && c == list[i-1]) { + // c is after end of prior range + list[i-1]++; + // no need to check for collapse here + } + + else { + // At this point we know the new char is not adjacent to + // any existing ranges, and it is not 10FFFF. + + + // [..., start_k-1, limit_k-1, start_k, limit_k, ..., HIGH] + // ^ + // list[i] + + // [..., start_k-1, limit_k-1, c, c+1, start_k, limit_k, ..., HIGH] + // ^ + // list[i] + + if (!ensureCapacity(len+2)) { + // ensureCapacity will mark the object as Bogus if OOM failure happens. + return *this; + } + + UChar32 *p = list + i; + uprv_memmove(p + 2, p, (len - i) * sizeof(*p)); + list[i] = c; + list[i+1] = c+1; + len += 2; + } + +#ifdef DEBUG_US_ADD + dump(list, len); + printf("\n"); + + for (i=1; i {"ch"} + *
Warning: you cannot add an empty string ("") to a UnicodeSet. + * @param s the source string + * @return the modified set, for chaining + */ +UnicodeSet& UnicodeSet::add(const UnicodeString& s) { + if (s.length() == 0 || isFrozen() || isBogus()) return *this; + int32_t cp = getSingleCP(s); + if (cp < 0) { + if (!stringsContains(s)) { + _add(s); + releasePattern(); + } + } else { + add((UChar32)cp); + } + return *this; +} + +/** + * Adds the given string, in order, to 'strings'. The given string + * must have been checked by the caller to not be empty and to not + * already be in 'strings'. + */ +void UnicodeSet::_add(const UnicodeString& s) { + if (isFrozen() || isBogus()) { + return; + } + UErrorCode ec = U_ZERO_ERROR; + if (strings == nullptr && !allocateStrings(ec)) { + setToBogus(); + return; + } + UnicodeString* t = new UnicodeString(s); + if (t == NULL) { // Check for memory allocation error. + setToBogus(); + return; + } + strings->sortedInsert(t, compareUnicodeString, ec); + if (U_FAILURE(ec)) { + setToBogus(); + delete t; + } +} + +/** + * @return a code point IF the string consists of a single one. + * otherwise returns -1. + * @param string to test + */ +int32_t UnicodeSet::getSingleCP(const UnicodeString& s) { + //if (s.length() < 1) { + // throw new IllegalArgumentException("Can't use zero-length strings in UnicodeSet"); + //} + if (s.length() > 2) return -1; + if (s.length() == 1) return s.charAt(0); + + // at this point, len = 2 + UChar32 cp = s.char32At(0); + if (cp > 0xFFFF) { // is surrogate pair + return cp; + } + return -1; +} + +/** + * Adds each of the characters in this string to the set. Thus "ch" => {"c", "h"} + * If this set already any particular character, it has no effect on that character. + * @param the source string + * @return the modified set, for chaining + */ +UnicodeSet& UnicodeSet::addAll(const UnicodeString& s) { + UChar32 cp; + for (int32_t i = 0; i < s.length(); i += U16_LENGTH(cp)) { + cp = s.char32At(i); + add(cp); + } + return *this; +} + +/** + * Retains EACH of the characters in this string. Note: "ch" == {"c", "h"} + * If this set already any particular character, it has no effect on that character. + * @param the source string + * @return the modified set, for chaining + */ +UnicodeSet& UnicodeSet::retainAll(const UnicodeString& s) { + UnicodeSet set; + set.addAll(s); + retainAll(set); + return *this; +} + +/** + * Complement EACH of the characters in this string. Note: "ch" == {"c", "h"} + * If this set already any particular character, it has no effect on that character. + * @param the source string + * @return the modified set, for chaining + */ +UnicodeSet& UnicodeSet::complementAll(const UnicodeString& s) { + UnicodeSet set; + set.addAll(s); + complementAll(set); + return *this; +} + +/** + * Remove EACH of the characters in this string. Note: "ch" == {"c", "h"} + * If this set already any particular character, it has no effect on that character. + * @param the source string + * @return the modified set, for chaining + */ +UnicodeSet& UnicodeSet::removeAll(const UnicodeString& s) { + UnicodeSet set; + set.addAll(s); + removeAll(set); + return *this; +} + +UnicodeSet& UnicodeSet::removeAllStrings() { + if (!isFrozen() && hasStrings()) { + strings->removeAllElements(); + releasePattern(); + } + return *this; +} + + +/** + * Makes a set from a multicharacter string. Thus "ch" => {"ch"} + *
Warning: you cannot add an empty string ("") to a UnicodeSet. + * @param the source string + * @return a newly created set containing the given string + */ +UnicodeSet* U_EXPORT2 UnicodeSet::createFrom(const UnicodeString& s) { + UnicodeSet *set = new UnicodeSet(); + if (set != NULL) { // Check for memory allocation error. + set->add(s); + } + return set; +} + + +/** + * Makes a set from each of the characters in the string. Thus "ch" => {"c", "h"} + * @param the source string + * @return a newly created set containing the given characters + */ +UnicodeSet* U_EXPORT2 UnicodeSet::createFromAll(const UnicodeString& s) { + UnicodeSet *set = new UnicodeSet(); + if (set != NULL) { // Check for memory allocation error. + set->addAll(s); + } + return set; +} + +/** + * Retain only the elements in this set that are contained in the + * specified range. If end > start then an empty range is + * retained, leaving the set empty. + * + * @param start first character, inclusive, of range to be retained + * to this set. + * @param end last character, inclusive, of range to be retained + * to this set. + */ +UnicodeSet& UnicodeSet::retain(UChar32 start, UChar32 end) { + if (pinCodePoint(start) <= pinCodePoint(end)) { + UChar32 range[3] = { start, end+1, UNICODESET_HIGH }; + retain(range, 2, 0); + } else { + clear(); + } + return *this; +} + +UnicodeSet& UnicodeSet::retain(UChar32 c) { + return retain(c, c); +} + +/** + * Removes the specified range from this set if it is present. + * The set will not contain the specified range once the call + * returns. If end > start then an empty range is + * removed, leaving the set unchanged. + * + * @param start first character, inclusive, of range to be removed + * from this set. + * @param end last character, inclusive, of range to be removed + * from this set. + */ +UnicodeSet& UnicodeSet::remove(UChar32 start, UChar32 end) { + if (pinCodePoint(start) <= pinCodePoint(end)) { + UChar32 range[3] = { start, end+1, UNICODESET_HIGH }; + retain(range, 2, 2); + } + return *this; +} + +/** + * Removes the specified character from this set if it is present. + * The set will not contain the specified range once the call + * returns. + */ +UnicodeSet& UnicodeSet::remove(UChar32 c) { + return remove(c, c); +} + +/** + * Removes the specified string from this set if it is present. + * The set will not contain the specified character once the call + * returns. + * @param the source string + * @return the modified set, for chaining + */ +UnicodeSet& UnicodeSet::remove(const UnicodeString& s) { + if (s.length() == 0 || isFrozen() || isBogus()) return *this; + int32_t cp = getSingleCP(s); + if (cp < 0) { + if (strings != nullptr && strings->removeElement((void*) &s)) { + releasePattern(); + } + } else { + remove((UChar32)cp, (UChar32)cp); + } + return *this; +} + +/** + * Complements the specified range in this set. Any character in + * the range will be removed if it is in this set, or will be + * added if it is not in this set. If end > start + * then an empty range is xor'ed, leaving the set unchanged. + * + * @param start first character, inclusive, of range to be removed + * from this set. + * @param end last character, inclusive, of range to be removed + * from this set. + */ +UnicodeSet& UnicodeSet::complement(UChar32 start, UChar32 end) { + if (isFrozen() || isBogus()) { + return *this; + } + if (pinCodePoint(start) <= pinCodePoint(end)) { + UChar32 range[3] = { start, end+1, UNICODESET_HIGH }; + exclusiveOr(range, 2, 0); + } + releasePattern(); + return *this; +} + +UnicodeSet& UnicodeSet::complement(UChar32 c) { + return complement(c, c); +} + +/** + * This is equivalent to + * complement(MIN_VALUE, MAX_VALUE). + */ +UnicodeSet& UnicodeSet::complement(void) { + if (isFrozen() || isBogus()) { + return *this; + } + if (list[0] == UNICODESET_LOW) { + uprv_memmove(list, list + 1, (size_t)(len-1)*sizeof(UChar32)); + --len; + } else { + if (!ensureCapacity(len+1)) { + return *this; + } + uprv_memmove(list + 1, list, (size_t)len*sizeof(UChar32)); + list[0] = UNICODESET_LOW; + ++len; + } + releasePattern(); + return *this; +} + +/** + * Complement the specified string in this set. + * The set will not contain the specified string once the call + * returns. + *
Warning: you cannot add an empty string ("") to a UnicodeSet. + * @param s the string to complement + * @return this object, for chaining + */ +UnicodeSet& UnicodeSet::complement(const UnicodeString& s) { + if (s.length() == 0 || isFrozen() || isBogus()) return *this; + int32_t cp = getSingleCP(s); + if (cp < 0) { + if (stringsContains(s)) { + strings->removeElement((void*) &s); + } else { + _add(s); + } + releasePattern(); + } else { + complement((UChar32)cp, (UChar32)cp); + } + return *this; +} + +/** + * Adds all of the elements in the specified set to this set if + * they're not already present. This operation effectively + * modifies this set so that its value is the union of the two + * sets. The behavior of this operation is unspecified if the specified + * collection is modified while the operation is in progress. + * + * @param c set whose elements are to be added to this set. + * @see #add(char, char) + */ +UnicodeSet& UnicodeSet::addAll(const UnicodeSet& c) { + if ( c.len>0 && c.list!=NULL ) { + add(c.list, c.len, 0); + } + + // Add strings in order + if ( c.strings!=NULL ) { + for (int32_t i=0; isize(); ++i) { + const UnicodeString* s = (const UnicodeString*)c.strings->elementAt(i); + if (!stringsContains(*s)) { + _add(*s); + } + } + } + return *this; +} + +/** + * Retains only the elements in this set that are contained in the + * specified set. In other words, removes from this set all of + * its elements that are not contained in the specified set. This + * operation effectively modifies this set so that its value is + * the intersection of the two sets. + * + * @param c set that defines which elements this set will retain. + */ +UnicodeSet& UnicodeSet::retainAll(const UnicodeSet& c) { + if (isFrozen() || isBogus()) { + return *this; + } + retain(c.list, c.len, 0); + if (hasStrings()) { + if (!c.hasStrings()) { + strings->removeAllElements(); + } else { + strings->retainAll(*c.strings); + } + } + return *this; +} + +/** + * Removes from this set all of its elements that are contained in the + * specified set. This operation effectively modifies this + * set so that its value is the asymmetric set difference of + * the two sets. + * + * @param c set that defines which elements will be removed from + * this set. + */ +UnicodeSet& UnicodeSet::removeAll(const UnicodeSet& c) { + if (isFrozen() || isBogus()) { + return *this; + } + retain(c.list, c.len, 2); + if (hasStrings() && c.hasStrings()) { + strings->removeAll(*c.strings); + } + return *this; +} + +/** + * Complements in this set all elements contained in the specified + * set. Any character in the other set will be removed if it is + * in this set, or will be added if it is not in this set. + * + * @param c set that defines which elements will be xor'ed from + * this set. + */ +UnicodeSet& UnicodeSet::complementAll(const UnicodeSet& c) { + if (isFrozen() || isBogus()) { + return *this; + } + exclusiveOr(c.list, c.len, 0); + + if (c.strings != nullptr) { + for (int32_t i=0; isize(); ++i) { + void* e = c.strings->elementAt(i); + if (strings == nullptr || !strings->removeElement(e)) { + _add(*(const UnicodeString*)e); + } + } + } + return *this; +} + +/** + * Removes all of the elements from this set. This set will be + * empty after this call returns. + */ +UnicodeSet& UnicodeSet::clear(void) { + if (isFrozen()) { + return *this; + } + list[0] = UNICODESET_HIGH; + len = 1; + releasePattern(); + if (strings != NULL) { + strings->removeAllElements(); + } + // Remove bogus + fFlags = 0; + return *this; +} + +/** + * Iteration method that returns the number of ranges contained in + * this set. + * @see #getRangeStart + * @see #getRangeEnd + */ +int32_t UnicodeSet::getRangeCount() const { + return len/2; +} + +/** + * Iteration method that returns the first character in the + * specified range of this set. + * @see #getRangeCount + * @see #getRangeEnd + */ +UChar32 UnicodeSet::getRangeStart(int32_t index) const { + return list[index*2]; +} + +/** + * Iteration method that returns the last character in the + * specified range of this set. + * @see #getRangeStart + * @see #getRangeEnd + */ +UChar32 UnicodeSet::getRangeEnd(int32_t index) const { + return list[index*2 + 1] - 1; +} + +const UnicodeString* UnicodeSet::getString(int32_t index) const { + return (const UnicodeString*) strings->elementAt(index); +} + +/** + * Reallocate this objects internal structures to take up the least + * possible space, without changing this object's value. + */ +UnicodeSet& UnicodeSet::compact() { + if (isFrozen() || isBogus()) { + return *this; + } + // Delete buffer first to defragment memory less. + if (buffer != stackList) { + uprv_free(buffer); + buffer = NULL; + bufferCapacity = 0; + } + if (list == stackList) { + // pass + } else if (len <= INITIAL_CAPACITY) { + uprv_memcpy(stackList, list, len * sizeof(UChar32)); + uprv_free(list); + list = stackList; + capacity = INITIAL_CAPACITY; + } else if ((len + 7) < capacity) { + // If we have more than a little unused capacity, shrink it to len. + UChar32* temp = (UChar32*) uprv_realloc(list, sizeof(UChar32) * len); + if (temp) { + list = temp; + capacity = len; + } + // else what the heck happened?! We allocated less memory! + // Oh well. We'll keep our original array. + } + if (strings != nullptr && strings->isEmpty()) { + delete strings; + strings = nullptr; + } + return *this; +} + +#ifdef DEBUG_SERIALIZE +#include +#endif + +/** + * Deserialize constructor. + */ +UnicodeSet::UnicodeSet(const uint16_t data[], int32_t dataLen, ESerialization serialization, + UErrorCode &ec) { + + if(U_FAILURE(ec)) { + setToBogus(); + return; + } + + if( (serialization != kSerialized) + || (data==NULL) + || (dataLen < 1)) { + ec = U_ILLEGAL_ARGUMENT_ERROR; + setToBogus(); + return; + } + + // bmp? + int32_t headerSize = ((data[0]&0x8000)) ?2:1; + int32_t bmpLength = (headerSize==1)?data[0]:data[1]; + + int32_t newLength = (((data[0]&0x7FFF)-bmpLength)/2)+bmpLength; +#ifdef DEBUG_SERIALIZE + printf("dataLen %d headerSize %d bmpLen %d len %d. data[0]=%X/%X/%X/%X\n", dataLen,headerSize,bmpLength,newLength, data[0],data[1],data[2],data[3]); +#endif + if(!ensureCapacity(newLength + 1)) { // +1 for HIGH + return; + } + // copy bmp + int32_t i; + for(i = 0; i< bmpLength;i++) { + list[i] = data[i+headerSize]; +#ifdef DEBUG_SERIALIZE + printf("<<16@%d[%d] %X\n", i+headerSize, i, list[i]); +#endif + } + // copy smp + for(i=bmpLength;i0 && dest==NULL)) { + ec=U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + + /* count necessary 16-bit units */ + length=this->len-1; // Subtract 1 to ignore final UNICODESET_HIGH + // assert(length>=0); + if (length==0) { + /* empty set */ + if (destCapacity>0) { + *dest=0; + } else { + ec=U_BUFFER_OVERFLOW_ERROR; + } + return 1; + } + /* now length>0 */ + + if (this->list[length-1]<=0xffff) { + /* all BMP */ + bmpLength=length; + } else if (this->list[0]>=0x10000) { + /* all supplementary */ + bmpLength=0; + length*=2; + } else { + /* some BMP, some supplementary */ + for (bmpLength=0; bmpLengthlist[bmpLength]<=0xffff; ++bmpLength) {} + length=bmpLength+2*(length-bmpLength); + } +#ifdef DEBUG_SERIALIZE + printf(">> bmpLength%d length%d len%d\n", bmpLength, length, len); +#endif + /* length: number of 16-bit array units */ + if (length>0x7fff) { + /* there are only 15 bits for the length in the first serialized word */ + ec=U_INDEX_OUTOFBOUNDS_ERROR; + return 0; + } + + /* + * total serialized length: + * number of 16-bit array units (length) + + * 1 length unit (always) + + * 1 bmpLength unit (if there are supplementary values) + */ + destLength=length+((length>bmpLength)?2:1); + if (destLength<=destCapacity) { + const UChar32 *p; + int32_t i; + +#ifdef DEBUG_SERIALIZE + printf("writeHdr\n"); +#endif + *dest=(uint16_t)length; + if (length>bmpLength) { + *dest|=0x8000; + *++dest=(uint16_t)bmpLength; + } + ++dest; + + /* write the BMP part of the array */ + p=this->list; + for (i=0; i>16); + *dest++=(uint16_t)*p++; + } + } else { + ec=U_BUFFER_OVERFLOW_ERROR; + } + return destLength; +} + +//---------------------------------------------------------------- +// Implementation: Utility methods +//---------------------------------------------------------------- + +/** + * Allocate our strings vector and return TRUE if successful. + */ +UBool UnicodeSet::allocateStrings(UErrorCode &status) { + if (U_FAILURE(status)) { + return FALSE; + } + strings = new UVector(uprv_deleteUObject, + uhash_compareUnicodeString, 1, status); + if (strings == NULL) { // Check for memory allocation error. + status = U_MEMORY_ALLOCATION_ERROR; + return FALSE; + } + if (U_FAILURE(status)) { + delete strings; + strings = NULL; + return FALSE; + } + return TRUE; +} + +int32_t UnicodeSet::nextCapacity(int32_t minCapacity) { + // Grow exponentially to reduce the frequency of allocations. + if (minCapacity < INITIAL_CAPACITY) { + return minCapacity + INITIAL_CAPACITY; + } else if (minCapacity <= 2500) { + return 5 * minCapacity; + } else { + int32_t newCapacity = 2 * minCapacity; + if (newCapacity > MAX_LENGTH) { + newCapacity = MAX_LENGTH; + } + return newCapacity; + } +} + +bool UnicodeSet::ensureCapacity(int32_t newLen) { + if (newLen > MAX_LENGTH) { + newLen = MAX_LENGTH; + } + if (newLen <= capacity) { + return true; + } + int32_t newCapacity = nextCapacity(newLen); + UChar32* temp = (UChar32*) uprv_malloc(newCapacity * sizeof(UChar32)); + if (temp == NULL) { + setToBogus(); // set the object to bogus state if an OOM failure occurred. + return false; + } + // Copy only the actual contents. + uprv_memcpy(temp, list, len * sizeof(UChar32)); + if (list != stackList) { + uprv_free(list); + } + list = temp; + capacity = newCapacity; + return true; +} + +bool UnicodeSet::ensureBufferCapacity(int32_t newLen) { + if (newLen > MAX_LENGTH) { + newLen = MAX_LENGTH; + } + if (newLen <= bufferCapacity) { + return true; + } + int32_t newCapacity = nextCapacity(newLen); + UChar32* temp = (UChar32*) uprv_malloc(newCapacity * sizeof(UChar32)); + if (temp == NULL) { + setToBogus(); + return false; + } + // The buffer has no contents to be copied. + // It is always filled from scratch after this call. + if (buffer != stackList) { + uprv_free(buffer); + } + buffer = temp; + bufferCapacity = newCapacity; + return true; +} + +/** + * Swap list and buffer. + */ +void UnicodeSet::swapBuffers(void) { + // swap list and buffer + UChar32* temp = list; + list = buffer; + buffer = temp; + + int32_t c = capacity; + capacity = bufferCapacity; + bufferCapacity = c; +} + +void UnicodeSet::setToBogus() { + clear(); // Remove everything in the set. + fFlags = kIsBogus; +} + +//---------------------------------------------------------------- +// Implementation: Fundamental operators +//---------------------------------------------------------------- + +static inline UChar32 max(UChar32 a, UChar32 b) { + return (a > b) ? a : b; +} + +// polarity = 0, 3 is normal: x xor y +// polarity = 1, 2: x xor ~y == x === y + +void UnicodeSet::exclusiveOr(const UChar32* other, int32_t otherLen, int8_t polarity) { + if (isFrozen() || isBogus()) { + return; + } + if (!ensureBufferCapacity(len + otherLen)) { + return; + } + + int32_t i = 0, j = 0, k = 0; + UChar32 a = list[i++]; + UChar32 b; + if (polarity == 1 || polarity == 2) { + b = UNICODESET_LOW; + if (other[j] == UNICODESET_LOW) { // skip base if already LOW + ++j; + b = other[j]; + } + } else { + b = other[j++]; + } + // simplest of all the routines + // sort the values, discarding identicals! + for (;;) { + if (a < b) { + buffer[k++] = a; + a = list[i++]; + } else if (b < a) { + buffer[k++] = b; + b = other[j++]; + } else if (a != UNICODESET_HIGH) { // at this point, a == b + // discard both values! + a = list[i++]; + b = other[j++]; + } else { // DONE! + buffer[k++] = UNICODESET_HIGH; + len = k; + break; + } + } + swapBuffers(); + releasePattern(); +} + +// polarity = 0 is normal: x union y +// polarity = 2: x union ~y +// polarity = 1: ~x union y +// polarity = 3: ~x union ~y + +void UnicodeSet::add(const UChar32* other, int32_t otherLen, int8_t polarity) { + if (isFrozen() || isBogus() || other==NULL) { + return; + } + if (!ensureBufferCapacity(len + otherLen)) { + return; + } + + int32_t i = 0, j = 0, k = 0; + UChar32 a = list[i++]; + UChar32 b = other[j++]; + // change from xor is that we have to check overlapping pairs + // polarity bit 1 means a is second, bit 2 means b is. + for (;;) { + switch (polarity) { + case 0: // both first; take lower if unequal + if (a < b) { // take a + // Back up over overlapping ranges in buffer[] + if (k > 0 && a <= buffer[k-1]) { + // Pick latter end value in buffer[] vs. list[] + a = max(list[i], buffer[--k]); + } else { + // No overlap + buffer[k++] = a; + a = list[i]; + } + i++; // Common if/else code factored out + polarity ^= 1; + } else if (b < a) { // take b + if (k > 0 && b <= buffer[k-1]) { + b = max(other[j], buffer[--k]); + } else { + buffer[k++] = b; + b = other[j]; + } + j++; + polarity ^= 2; + } else { // a == b, take a, drop b + if (a == UNICODESET_HIGH) goto loop_end; + // This is symmetrical; it doesn't matter if + // we backtrack with a or b. - liu + if (k > 0 && a <= buffer[k-1]) { + a = max(list[i], buffer[--k]); + } else { + // No overlap + buffer[k++] = a; + a = list[i]; + } + i++; + polarity ^= 1; + b = other[j++]; + polarity ^= 2; + } + break; + case 3: // both second; take higher if unequal, and drop other + if (b <= a) { // take a + if (a == UNICODESET_HIGH) goto loop_end; + buffer[k++] = a; + } else { // take b + if (b == UNICODESET_HIGH) goto loop_end; + buffer[k++] = b; + } + a = list[i++]; + polarity ^= 1; // factored common code + b = other[j++]; + polarity ^= 2; + break; + case 1: // a second, b first; if b < a, overlap + if (a < b) { // no overlap, take a + buffer[k++] = a; a = list[i++]; polarity ^= 1; + } else if (b < a) { // OVERLAP, drop b + b = other[j++]; + polarity ^= 2; + } else { // a == b, drop both! + if (a == UNICODESET_HIGH) goto loop_end; + a = list[i++]; + polarity ^= 1; + b = other[j++]; + polarity ^= 2; + } + break; + case 2: // a first, b second; if a < b, overlap + if (b < a) { // no overlap, take b + buffer[k++] = b; + b = other[j++]; + polarity ^= 2; + } else if (a < b) { // OVERLAP, drop a + a = list[i++]; + polarity ^= 1; + } else { // a == b, drop both! + if (a == UNICODESET_HIGH) goto loop_end; + a = list[i++]; + polarity ^= 1; + b = other[j++]; + polarity ^= 2; + } + break; + } + } + loop_end: + buffer[k++] = UNICODESET_HIGH; // terminate + len = k; + swapBuffers(); + releasePattern(); +} + +// polarity = 0 is normal: x intersect y +// polarity = 2: x intersect ~y == set-minus +// polarity = 1: ~x intersect y +// polarity = 3: ~x intersect ~y + +void UnicodeSet::retain(const UChar32* other, int32_t otherLen, int8_t polarity) { + if (isFrozen() || isBogus()) { + return; + } + if (!ensureBufferCapacity(len + otherLen)) { + return; + } + + int32_t i = 0, j = 0, k = 0; + UChar32 a = list[i++]; + UChar32 b = other[j++]; + // change from xor is that we have to check overlapping pairs + // polarity bit 1 means a is second, bit 2 means b is. + for (;;) { + switch (polarity) { + case 0: // both first; drop the smaller + if (a < b) { // drop a + a = list[i++]; + polarity ^= 1; + } else if (b < a) { // drop b + b = other[j++]; + polarity ^= 2; + } else { // a == b, take one, drop other + if (a == UNICODESET_HIGH) goto loop_end; + buffer[k++] = a; + a = list[i++]; + polarity ^= 1; + b = other[j++]; + polarity ^= 2; + } + break; + case 3: // both second; take lower if unequal + if (a < b) { // take a + buffer[k++] = a; + a = list[i++]; + polarity ^= 1; + } else if (b < a) { // take b + buffer[k++] = b; + b = other[j++]; + polarity ^= 2; + } else { // a == b, take one, drop other + if (a == UNICODESET_HIGH) goto loop_end; + buffer[k++] = a; + a = list[i++]; + polarity ^= 1; + b = other[j++]; + polarity ^= 2; + } + break; + case 1: // a second, b first; + if (a < b) { // NO OVERLAP, drop a + a = list[i++]; + polarity ^= 1; + } else if (b < a) { // OVERLAP, take b + buffer[k++] = b; + b = other[j++]; + polarity ^= 2; + } else { // a == b, drop both! + if (a == UNICODESET_HIGH) goto loop_end; + a = list[i++]; + polarity ^= 1; + b = other[j++]; + polarity ^= 2; + } + break; + case 2: // a first, b second; if a < b, overlap + if (b < a) { // no overlap, drop b + b = other[j++]; + polarity ^= 2; + } else if (a < b) { // OVERLAP, take a + buffer[k++] = a; + a = list[i++]; + polarity ^= 1; + } else { // a == b, drop both! + if (a == UNICODESET_HIGH) goto loop_end; + a = list[i++]; + polarity ^= 1; + b = other[j++]; + polarity ^= 2; + } + break; + } + } + loop_end: + buffer[k++] = UNICODESET_HIGH; // terminate + len = k; + swapBuffers(); + releasePattern(); +} + +/** + * Append the toPattern() representation of a + * string to the given StringBuffer. + */ +void UnicodeSet::_appendToPat(UnicodeString& buf, const UnicodeString& s, UBool +escapeUnprintable) { + UChar32 cp; + for (int32_t i = 0; i < s.length(); i += U16_LENGTH(cp)) { + _appendToPat(buf, cp = s.char32At(i), escapeUnprintable); + } +} + +/** + * Append the toPattern() representation of a + * character to the given StringBuffer. + */ +void UnicodeSet::_appendToPat(UnicodeString& buf, UChar32 c, UBool +escapeUnprintable) { + if (escapeUnprintable && ICU_Utility::isUnprintable(c)) { + // Use hex escape notation (\uxxxx or \Uxxxxxxxx) for anything + // unprintable + if (ICU_Utility::escapeUnprintable(buf, c)) { + return; + } + } + // Okay to let ':' pass through + switch (c) { + case SET_OPEN: + case SET_CLOSE: + case HYPHEN: + case COMPLEMENT: + case INTERSECTION: + case BACKSLASH: + case OPEN_BRACE: + case CLOSE_BRACE: + case COLON: + case SymbolTable::SYMBOL_REF: + buf.append(BACKSLASH); + break; + default: + // Escape whitespace + if (PatternProps::isWhiteSpace(c)) { + buf.append(BACKSLASH); + } + break; + } + buf.append(c); +} + +/** + * Append a string representation of this set to result. This will be + * a cleaned version of the string passed to applyPattern(), if there + * is one. Otherwise it will be generated. + */ +UnicodeString& UnicodeSet::_toPattern(UnicodeString& result, + UBool escapeUnprintable) const +{ + if (pat != NULL) { + int32_t i; + int32_t backslashCount = 0; + for (i=0; i 1 && + getRangeStart(0) == MIN_VALUE && + getRangeEnd(count-1) == MAX_VALUE) { + + // Emit the inverse + result.append(COMPLEMENT); + + for (int32_t i = 1; i < count; ++i) { + UChar32 start = getRangeEnd(i-1)+1; + UChar32 end = getRangeStart(i)-1; + _appendToPat(result, start, escapeUnprintable); + if (start != end) { + if ((start+1) != end) { + result.append(HYPHEN); + } + _appendToPat(result, end, escapeUnprintable); + } + } + } + + // Default; emit the ranges as pairs + else { + for (int32_t i = 0; i < count; ++i) { + UChar32 start = getRangeStart(i); + UChar32 end = getRangeEnd(i); + _appendToPat(result, start, escapeUnprintable); + if (start != end) { + if ((start+1) != end) { + result.append(HYPHEN); + } + _appendToPat(result, end, escapeUnprintable); + } + } + } + + if (strings != nullptr) { + for (int32_t i = 0; isize(); ++i) { + result.append(OPEN_BRACE); + _appendToPat(result, + *(const UnicodeString*) strings->elementAt(i), + escapeUnprintable); + result.append(CLOSE_BRACE); + } + } + return result.append(SET_CLOSE); +} + +/** +* Release existing cached pattern +*/ +void UnicodeSet::releasePattern() { + if (pat) { + uprv_free(pat); + pat = NULL; + patLen = 0; + } +} + +/** +* Set the new pattern to cache. +*/ +void UnicodeSet::setPattern(const char16_t *newPat, int32_t newPatLen) { + releasePattern(); + pat = (UChar *)uprv_malloc((newPatLen + 1) * sizeof(UChar)); + if (pat) { + patLen = newPatLen; + u_memcpy(pat, newPat, patLen); + pat[patLen] = 0; + } + // else we don't care if malloc failed. This was just a nice cache. + // We can regenerate an equivalent pattern later when requested. +} + +UnicodeSet *UnicodeSet::freeze() { + if(!isFrozen() && !isBogus()) { + compact(); + + // Optimize contains() and span() and similar functions. + if (hasStrings()) { + stringSpan = new UnicodeSetStringSpan(*this, *strings, UnicodeSetStringSpan::ALL); + if (stringSpan == nullptr) { + setToBogus(); + return this; + } else if (!stringSpan->needsStringSpanUTF16()) { + // All strings are irrelevant for span() etc. because + // all of each string's code points are contained in this set. + // Do not check needsStringSpanUTF8() because UTF-8 has at most as + // many relevant strings as UTF-16. + // (Thus needsStringSpanUTF8() implies needsStringSpanUTF16().) + delete stringSpan; + stringSpan = NULL; + } + } + if (stringSpan == NULL) { + // No span-relevant strings: Optimize for code point spans. + bmpSet=new BMPSet(list, len); + if (bmpSet == NULL) { // Check for memory allocation error. + setToBogus(); + } + } + } + return this; +} + +int32_t UnicodeSet::span(const UChar *s, int32_t length, USetSpanCondition spanCondition) const { + if(length>0 && bmpSet!=NULL) { + return (int32_t)(bmpSet->span(s, s+length, spanCondition)-s); + } + if(length<0) { + length=u_strlen(s); + } + if(length==0) { + return 0; + } + if(stringSpan!=NULL) { + return stringSpan->span(s, length, spanCondition); + } else if(hasStrings()) { + uint32_t which= spanCondition==USET_SPAN_NOT_CONTAINED ? + UnicodeSetStringSpan::FWD_UTF16_NOT_CONTAINED : + UnicodeSetStringSpan::FWD_UTF16_CONTAINED; + UnicodeSetStringSpan strSpan(*this, *strings, which); + if(strSpan.needsStringSpanUTF16()) { + return strSpan.span(s, length, spanCondition); + } + } + + if(spanCondition!=USET_SPAN_NOT_CONTAINED) { + spanCondition=USET_SPAN_CONTAINED; // Pin to 0/1 values. + } + + UChar32 c; + int32_t start=0, prev=0; + do { + U16_NEXT(s, start, length, c); + if(spanCondition!=contains(c)) { + break; + } + } while((prev=start)0 && bmpSet!=NULL) { + return (int32_t)(bmpSet->spanBack(s, s+length, spanCondition)-s); + } + if(length<0) { + length=u_strlen(s); + } + if(length==0) { + return 0; + } + if(stringSpan!=NULL) { + return stringSpan->spanBack(s, length, spanCondition); + } else if(hasStrings()) { + uint32_t which= spanCondition==USET_SPAN_NOT_CONTAINED ? + UnicodeSetStringSpan::BACK_UTF16_NOT_CONTAINED : + UnicodeSetStringSpan::BACK_UTF16_CONTAINED; + UnicodeSetStringSpan strSpan(*this, *strings, which); + if(strSpan.needsStringSpanUTF16()) { + return strSpan.spanBack(s, length, spanCondition); + } + } + + if(spanCondition!=USET_SPAN_NOT_CONTAINED) { + spanCondition=USET_SPAN_CONTAINED; // Pin to 0/1 values. + } + + UChar32 c; + int32_t prev=length; + do { + U16_PREV(s, 0, length, c); + if(spanCondition!=contains(c)) { + break; + } + } while((prev=length)>0); + return prev; +} + +int32_t UnicodeSet::spanUTF8(const char *s, int32_t length, USetSpanCondition spanCondition) const { + if(length>0 && bmpSet!=NULL) { + const uint8_t *s0=(const uint8_t *)s; + return (int32_t)(bmpSet->spanUTF8(s0, length, spanCondition)-s0); + } + if(length<0) { + length=(int32_t)uprv_strlen(s); + } + if(length==0) { + return 0; + } + if(stringSpan!=NULL) { + return stringSpan->spanUTF8((const uint8_t *)s, length, spanCondition); + } else if(hasStrings()) { + uint32_t which= spanCondition==USET_SPAN_NOT_CONTAINED ? + UnicodeSetStringSpan::FWD_UTF8_NOT_CONTAINED : + UnicodeSetStringSpan::FWD_UTF8_CONTAINED; + UnicodeSetStringSpan strSpan(*this, *strings, which); + if(strSpan.needsStringSpanUTF8()) { + return strSpan.spanUTF8((const uint8_t *)s, length, spanCondition); + } + } + + if(spanCondition!=USET_SPAN_NOT_CONTAINED) { + spanCondition=USET_SPAN_CONTAINED; // Pin to 0/1 values. + } + + UChar32 c; + int32_t start=0, prev=0; + do { + U8_NEXT_OR_FFFD(s, start, length, c); + if(spanCondition!=contains(c)) { + break; + } + } while((prev=start)0 && bmpSet!=NULL) { + const uint8_t *s0=(const uint8_t *)s; + return bmpSet->spanBackUTF8(s0, length, spanCondition); + } + if(length<0) { + length=(int32_t)uprv_strlen(s); + } + if(length==0) { + return 0; + } + if(stringSpan!=NULL) { + return stringSpan->spanBackUTF8((const uint8_t *)s, length, spanCondition); + } else if(hasStrings()) { + uint32_t which= spanCondition==USET_SPAN_NOT_CONTAINED ? + UnicodeSetStringSpan::BACK_UTF8_NOT_CONTAINED : + UnicodeSetStringSpan::BACK_UTF8_CONTAINED; + UnicodeSetStringSpan strSpan(*this, *strings, which); + if(strSpan.needsStringSpanUTF8()) { + return strSpan.spanBackUTF8((const uint8_t *)s, length, spanCondition); + } + } + + if(spanCondition!=USET_SPAN_NOT_CONTAINED) { + spanCondition=USET_SPAN_CONTAINED; // Pin to 0/1 values. + } + + UChar32 c; + int32_t prev=length; + do { + U8_PREV_OR_FFFD(s, 0, length, c); + if(spanCondition!=contains(c)) { + break; + } + } while((prev=length)>0); + return prev; +} + +U_NAMESPACE_END diff --git a/libicuuc/libicu/uniset.cpp.patch b/libicuuc/libicu/uniset.cpp.patch new file mode 100644 index 0000000..eff5c13 --- /dev/null +++ b/libicuuc/libicu/uniset.cpp.patch @@ -0,0 +1,11 @@ +--- libicu/uc/uniset.cpp 2020-07-21 11:57:44.853500497 +0300 ++++ libicu/uniset.cpp 2020-07-21 12:52:58.109222826 +0300 +@@ -302,7 +302,7 @@ UBool UnicodeSet::operator==(const Unico + if (list[i] != o.list[i]) return FALSE; + } + if (hasStrings() != o.hasStrings()) { return FALSE; } +- if (hasStrings() && *strings != *o.strings) return FALSE; ++ if (hasStrings() && !(*strings == *o.strings)) return FALSE; + return TRUE; + } + diff --git a/libicuuc/manifest b/libicuuc/manifest index c916d2a..afb67a2 100644 --- a/libicuuc/manifest +++ b/libicuuc/manifest @@ -1,10 +1,10 @@ : 1 name: libicuuc -version: 65.1.0+3 +version: 65.1.0+4 upstream-version: 65.1 project: icu summary: ICU basic internationalization C/C++ library -license: Unicode, BSD3, BSD2 ; Unicode for the most of original files. +license: Unicode-DFS-2016 AND BSD-3-Clause AND BSD-2-Clause ; Unicode License Agreement for the most of original files. topics: C, C++, Unicode, internationalization description-file: README url: http://site.icu-project.org/ @@ -13,7 +13,7 @@ src-url: https://git.build2.org/cgit/packaging/icu/icu/tree/libicuuc/ package-url: https://git.build2.org/cgit/packaging/icu/ email: icu-support@lists.sourceforge.net ; Mailing list. package-email: packaging@build2.org ; Mailing list. -build-email: builds@build2.org +build-error-email: builds@build2.org builds: all depends: * build2 >= 0.12.0 depends: * bpkg >= 0.12.0 -- cgit v1.1