diff options
27 files changed, 3931 insertions, 38 deletions
@@ -0,0 +1,7 @@ +On the revision: + +On the release: + +- Use legal{} target type for legal documentation (LICENSE, AUTHORS, etc). + + Note that this requires bumping build2 version requirement to 0.13.0. diff --git a/libicui18n/.gitignore b/libicui18n/.gitignore index 4c4fec7..d4a1da2 100644 --- a/libicui18n/.gitignore +++ b/libicui18n/.gitignore @@ -3,11 +3,17 @@ *.d *.t *.i +*.i.* *.ii +*.ii.* *.o *.obj +*.gcm +*.pcm +*.ifc *.so *.so.* +*.dylib *.dll *.a *.lib diff --git a/libicui18n/build/bootstrap.build b/libicui18n/build/bootstrap.build index 3a426ec..697d5d7 100644 --- a/libicui18n/build/bootstrap.build +++ b/libicui18n/build/bootstrap.build @@ -1,4 +1,4 @@ -# file : build/root.build +# file : build/bootstrap.build # license : Unicode License; see accompanying LICENSE file project = libicui18n diff --git a/libicui18n/build/export.build b/libicui18n/build/export.build index 0455eea..a48358c 100644 --- a/libicui18n/build/export.build +++ b/libicui18n/build/export.build @@ -1,4 +1,4 @@ -# file : build/root.build +# file : build/export.build # license : Unicode License; see accompanying LICENSE file $out_root/ diff --git a/libicui18n/libicui18n/.gitattributes b/libicui18n/libicui18n/.gitattributes new file mode 100644 index 0000000..55857bf --- /dev/null +++ b/libicui18n/libicui18n/.gitattributes @@ -0,0 +1 @@ +i18n symlink=dir diff --git a/libicui18n/libicui18n/buildfile b/libicui18n/libicui18n/buildfile index 6ec314f..edd8676 100644 --- a/libicui18n/libicui18n/buildfile +++ b/libicui18n/libicui18n/buildfile @@ -3,7 +3,7 @@ import intf_libs = libicuuc%lib{icuuc} -patched = stsearch olsontz vtzone rbtz dtitvfmt basictz +patched = stsearch olsontz vtzone rbtz dtitvfmt basictz fmtable lib{icui18n}: {hxx}{**} i18n/cxx{* -{$patched}} cxx{$patched} $intf_libs @@ -45,7 +45,7 @@ cxx.poptions =+ "-I$src_base/i18n" obja{*}: cxx.poptions += -DU_STATIC_IMPLEMENTATION -switch $cxx.class, $tsys +switch $cxx.class { case 'gcc' { @@ -71,7 +71,7 @@ switch $cxx.class, $tsys # Disable warnings that pop up with /W3. # - cxx.coptions += /wd4244 /wd4996 + cxx.coptions += /wd4244 /wd4996 /wd5055 /wd5056 } } diff --git a/libicui18n/libicui18n/fmtable.cpp b/libicui18n/libicui18n/fmtable.cpp new file mode 100644 index 0000000..4024dc9 --- /dev/null +++ b/libicui18n/libicui18n/fmtable.cpp @@ -0,0 +1,1043 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* Copyright (C) 1997-2016, International Business Machines Corporation and +* others. All Rights Reserved. +******************************************************************************* +* +* File FMTABLE.CPP +* +* Modification History: +* +* Date Name Description +* 03/25/97 clhuang Initial Implementation. +******************************************************************************** +*/ + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_FORMATTING + +#include <cstdlib> +#include <math.h> +#include "unicode/fmtable.h" +#include "unicode/ustring.h" +#include "unicode/measure.h" +#include "unicode/curramt.h" +#include "unicode/uformattable.h" +#include "charstr.h" +#include "cmemory.h" +#include "cstring.h" +#include "fmtableimp.h" +#include "number_decimalquantity.h" + +// ***************************************************************************** +// class Formattable +// ***************************************************************************** + +U_NAMESPACE_BEGIN + +UOBJECT_DEFINE_RTTI_IMPLEMENTATION(Formattable) + +using number::impl::DecimalQuantity; + + +//-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-. + +// NOTE: As of 3.0, there are limitations to the UObject API. It does +// not (yet) support cloning, operator=, nor operator==. To +// work around this, I implement some simple inlines here. Later +// these can be modified or removed. [alan] + +// NOTE: These inlines assume that all fObjects are in fact instances +// of the Measure class, which is true as of 3.0. [alan] + +// Return TRUE if *a == *b. +static inline UBool objectEquals(const UObject* a, const UObject* b) { + // LATER: return *a == *b; + //return *((const Measure*) a) == *((const Measure*) b); + return ((const Measure*) a)->operator== (*((const Measure*) b)); +} + +// Return a clone of *a. +static inline UObject* objectClone(const UObject* a) { + // LATER: return a->clone(); + return ((const Measure*) a)->clone(); +} + +// Return TRUE if *a is an instance of Measure. +static inline UBool instanceOfMeasure(const UObject* a) { + return dynamic_cast<const Measure*>(a) != NULL; +} + +/** + * Creates a new Formattable array and copies the values from the specified + * original. + * @param array the original array + * @param count the original array count + * @return the new Formattable array. + */ +static Formattable* createArrayCopy(const Formattable* array, int32_t count) { + Formattable *result = new Formattable[count]; + if (result != NULL) { + for (int32_t i=0; i<count; ++i) + result[i] = array[i]; // Don't memcpy! + } + return result; +} + +//-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-. + +/** + * Set 'ec' to 'err' only if 'ec' is not already set to a failing UErrorCode. + */ +static void setError(UErrorCode& ec, UErrorCode err) { + if (U_SUCCESS(ec)) { + ec = err; + } +} + +// +// Common initialization code, shared by constructors. +// Put everything into a known state. +// +void Formattable::init() { + fValue.fInt64 = 0; + fType = kLong; + fDecimalStr = NULL; + fDecimalQuantity = NULL; + fBogus.setToBogus(); +} + +// ------------------------------------- +// default constructor. +// Creates a formattable object with a long value 0. + +Formattable::Formattable() { + init(); +} + +// ------------------------------------- +// Creates a formattable object with a Date instance. + +Formattable::Formattable(UDate date, ISDATE /*isDate*/) +{ + init(); + fType = kDate; + fValue.fDate = date; +} + +// ------------------------------------- +// Creates a formattable object with a double value. + +Formattable::Formattable(double value) +{ + init(); + fType = kDouble; + fValue.fDouble = value; +} + +// ------------------------------------- +// Creates a formattable object with an int32_t value. + +Formattable::Formattable(int32_t value) +{ + init(); + fValue.fInt64 = value; +} + +// ------------------------------------- +// Creates a formattable object with an int64_t value. + +Formattable::Formattable(int64_t value) +{ + init(); + fType = kInt64; + fValue.fInt64 = value; +} + +// ------------------------------------- +// Creates a formattable object with a decimal number value from a string. + +Formattable::Formattable(StringPiece number, UErrorCode &status) { + init(); + setDecimalNumber(number, status); +} + + +// ------------------------------------- +// Creates a formattable object with a UnicodeString instance. + +Formattable::Formattable(const UnicodeString& stringToCopy) +{ + init(); + fType = kString; + fValue.fString = new UnicodeString(stringToCopy); +} + +// ------------------------------------- +// Creates a formattable object with a UnicodeString* value. +// (adopting symantics) + +Formattable::Formattable(UnicodeString* stringToAdopt) +{ + init(); + fType = kString; + fValue.fString = stringToAdopt; +} + +Formattable::Formattable(UObject* objectToAdopt) +{ + init(); + fType = kObject; + fValue.fObject = objectToAdopt; +} + +// ------------------------------------- + +Formattable::Formattable(const Formattable* arrayToCopy, int32_t count) + : UObject(), fType(kArray) +{ + init(); + fType = kArray; + fValue.fArrayAndCount.fArray = createArrayCopy(arrayToCopy, count); + fValue.fArrayAndCount.fCount = count; +} + +// ------------------------------------- +// copy constructor + + +Formattable::Formattable(const Formattable &source) + : UObject(*this) +{ + init(); + *this = source; +} + +// ------------------------------------- +// assignment operator + +Formattable& +Formattable::operator=(const Formattable& source) +{ + if (this != &source) + { + // Disposes the current formattable value/setting. + dispose(); + + // Sets the correct data type for this value. + fType = source.fType; + switch (fType) + { + case kArray: + // Sets each element in the array one by one and records the array count. + fValue.fArrayAndCount.fCount = source.fValue.fArrayAndCount.fCount; + fValue.fArrayAndCount.fArray = createArrayCopy(source.fValue.fArrayAndCount.fArray, + source.fValue.fArrayAndCount.fCount); + break; + case kString: + // Sets the string value. + fValue.fString = new UnicodeString(*source.fValue.fString); + break; + case kDouble: + // Sets the double value. + fValue.fDouble = source.fValue.fDouble; + break; + case kLong: + case kInt64: + // Sets the long value. + fValue.fInt64 = source.fValue.fInt64; + break; + case kDate: + // Sets the Date value. + fValue.fDate = source.fValue.fDate; + break; + case kObject: + fValue.fObject = objectClone(source.fValue.fObject); + break; + } + + UErrorCode status = U_ZERO_ERROR; + if (source.fDecimalQuantity != NULL) { + fDecimalQuantity = new DecimalQuantity(*source.fDecimalQuantity); + } + if (source.fDecimalStr != NULL) { + fDecimalStr = new CharString(*source.fDecimalStr, status); + if (U_FAILURE(status)) { + delete fDecimalStr; + fDecimalStr = NULL; + } + } + } + return *this; +} + +// ------------------------------------- + +UBool +Formattable::operator==(const Formattable& that) const +{ + int32_t i; + + if (this == &that) return TRUE; + + // Returns FALSE if the data types are different. + if (fType != that.fType) return FALSE; + + // Compares the actual data values. + UBool equal = TRUE; + switch (fType) { + case kDate: + equal = (fValue.fDate == that.fValue.fDate); + break; + case kDouble: + equal = (fValue.fDouble == that.fValue.fDouble); + break; + case kLong: + case kInt64: + equal = (fValue.fInt64 == that.fValue.fInt64); + break; + case kString: + equal = (*(fValue.fString) == *(that.fValue.fString)); + break; + case kArray: + if (fValue.fArrayAndCount.fCount != that.fValue.fArrayAndCount.fCount) { + equal = FALSE; + break; + } + // Checks each element for equality. + for (i=0; i<fValue.fArrayAndCount.fCount; ++i) { + if (fValue.fArrayAndCount.fArray[i] != that.fValue.fArrayAndCount.fArray[i]) { + equal = FALSE; + break; + } + } + break; + case kObject: + if (fValue.fObject == NULL || that.fValue.fObject == NULL) { + equal = FALSE; + } else { + equal = objectEquals(fValue.fObject, that.fValue.fObject); + } + break; + } + + // TODO: compare digit lists if numeric. + return equal; +} + +// ------------------------------------- + +Formattable::~Formattable() +{ + dispose(); +} + +// ------------------------------------- + +void Formattable::dispose() +{ + // Deletes the data value if necessary. + switch (fType) { + case kString: + delete fValue.fString; + break; + case kArray: + delete[] fValue.fArrayAndCount.fArray; + break; + case kObject: + delete fValue.fObject; + break; + default: + break; + } + + fType = kLong; + fValue.fInt64 = 0; + + delete fDecimalStr; + fDecimalStr = NULL; + + delete fDecimalQuantity; + fDecimalQuantity = NULL; +} + +Formattable * +Formattable::clone() const { + return new Formattable(*this); +} + +// ------------------------------------- +// Gets the data type of this Formattable object. +Formattable::Type +Formattable::getType() const +{ + return fType; +} + +UBool +Formattable::isNumeric() const { + switch (fType) { + case kDouble: + case kLong: + case kInt64: + return TRUE; + default: + return FALSE; + } +} + +// ------------------------------------- +int32_t +//Formattable::getLong(UErrorCode* status) const +Formattable::getLong(UErrorCode& status) const +{ + if (U_FAILURE(status)) { + return 0; + } + + switch (fType) { + case Formattable::kLong: + return (int32_t)fValue.fInt64; + case Formattable::kInt64: + if (fValue.fInt64 > INT32_MAX) { + status = U_INVALID_FORMAT_ERROR; + return INT32_MAX; + } else if (fValue.fInt64 < INT32_MIN) { + status = U_INVALID_FORMAT_ERROR; + return INT32_MIN; + } else { + return (int32_t)fValue.fInt64; + } + case Formattable::kDouble: + if (fValue.fDouble > INT32_MAX) { + status = U_INVALID_FORMAT_ERROR; + return INT32_MAX; + } else if (fValue.fDouble < INT32_MIN) { + status = U_INVALID_FORMAT_ERROR; + return INT32_MIN; + } else { + return (int32_t)fValue.fDouble; // loses fraction + } + case Formattable::kObject: + if (fValue.fObject == NULL) { + status = U_MEMORY_ALLOCATION_ERROR; + return 0; + } + // TODO Later replace this with instanceof call + if (instanceOfMeasure(fValue.fObject)) { + return ((const Measure*) fValue.fObject)-> + getNumber().getLong(status); + } + U_FALLTHROUGH; + default: + status = U_INVALID_FORMAT_ERROR; + return 0; + } +} + +// ------------------------------------- +// Maximum int that can be represented exactly in a double. (53 bits) +// Larger ints may be rounded to a near-by value as not all are representable. +// TODO: move this constant elsewhere, possibly configure it for different +// floating point formats, if any non-standard ones are still in use. +static const int64_t U_DOUBLE_MAX_EXACT_INT = 9007199254740992LL; + +int64_t +Formattable::getInt64(UErrorCode& status) const +{ + if (U_FAILURE(status)) { + return 0; + } + + switch (fType) { + case Formattable::kLong: + case Formattable::kInt64: + return fValue.fInt64; + case Formattable::kDouble: + if (fValue.fDouble > (double)U_INT64_MAX) { + status = U_INVALID_FORMAT_ERROR; + return U_INT64_MAX; + } else if (fValue.fDouble < (double)U_INT64_MIN) { + status = U_INVALID_FORMAT_ERROR; + return U_INT64_MIN; + } else if (fabs(fValue.fDouble) > U_DOUBLE_MAX_EXACT_INT && fDecimalQuantity != NULL) { + if (fDecimalQuantity->fitsInLong(true)) { + return fDecimalQuantity->toLong(); + } else { + // Unexpected + status = U_INVALID_FORMAT_ERROR; + return fDecimalQuantity->isNegative() ? U_INT64_MIN : U_INT64_MAX; + } + } else { + return (int64_t)fValue.fDouble; + } + case Formattable::kObject: + if (fValue.fObject == NULL) { + status = U_MEMORY_ALLOCATION_ERROR; + return 0; + } + if (instanceOfMeasure(fValue.fObject)) { + return ((const Measure*) fValue.fObject)-> + getNumber().getInt64(status); + } + U_FALLTHROUGH; + default: + status = U_INVALID_FORMAT_ERROR; + return 0; + } +} + +// ------------------------------------- +double +Formattable::getDouble(UErrorCode& status) const +{ + if (U_FAILURE(status)) { + return 0; + } + + switch (fType) { + case Formattable::kLong: + case Formattable::kInt64: // loses precision + return (double)fValue.fInt64; + case Formattable::kDouble: + return fValue.fDouble; + case Formattable::kObject: + if (fValue.fObject == NULL) { + status = U_MEMORY_ALLOCATION_ERROR; + return 0; + } + // TODO Later replace this with instanceof call + if (instanceOfMeasure(fValue.fObject)) { + return ((const Measure*) fValue.fObject)-> + getNumber().getDouble(status); + } + U_FALLTHROUGH; + default: + status = U_INVALID_FORMAT_ERROR; + return 0; + } +} + +const UObject* +Formattable::getObject() const { + return (fType == kObject) ? fValue.fObject : NULL; +} + +// ------------------------------------- +// Sets the value to a double value d. + +void +Formattable::setDouble(double d) +{ + dispose(); + fType = kDouble; + fValue.fDouble = d; +} + +// ------------------------------------- +// Sets the value to a long value l. + +void +Formattable::setLong(int32_t l) +{ + dispose(); + fType = kLong; + fValue.fInt64 = l; +} + +// ------------------------------------- +// Sets the value to an int64 value ll. + +void +Formattable::setInt64(int64_t ll) +{ + dispose(); + fType = kInt64; + fValue.fInt64 = ll; +} + +// ------------------------------------- +// Sets the value to a Date instance d. + +void +Formattable::setDate(UDate d) +{ + dispose(); + fType = kDate; + fValue.fDate = d; +} + +// ------------------------------------- +// Sets the value to a string value stringToCopy. + +void +Formattable::setString(const UnicodeString& stringToCopy) +{ + dispose(); + fType = kString; + fValue.fString = new UnicodeString(stringToCopy); +} + +// ------------------------------------- +// Sets the value to an array of Formattable objects. + +void +Formattable::setArray(const Formattable* array, int32_t count) +{ + dispose(); + fType = kArray; + fValue.fArrayAndCount.fArray = createArrayCopy(array, count); + fValue.fArrayAndCount.fCount = count; +} + +// ------------------------------------- +// Adopts the stringToAdopt value. + +void +Formattable::adoptString(UnicodeString* stringToAdopt) +{ + dispose(); + fType = kString; + fValue.fString = stringToAdopt; +} + +// ------------------------------------- +// Adopts the array value and its count. + +void +Formattable::adoptArray(Formattable* array, int32_t count) +{ + dispose(); + fType = kArray; + fValue.fArrayAndCount.fArray = array; + fValue.fArrayAndCount.fCount = count; +} + +void +Formattable::adoptObject(UObject* objectToAdopt) { + dispose(); + fType = kObject; + fValue.fObject = objectToAdopt; +} + +// ------------------------------------- +UnicodeString& +Formattable::getString(UnicodeString& result, UErrorCode& status) const +{ + if (fType != kString) { + setError(status, U_INVALID_FORMAT_ERROR); + result.setToBogus(); + } else { + if (fValue.fString == NULL) { + setError(status, U_MEMORY_ALLOCATION_ERROR); + } else { + result = *fValue.fString; + } + } + return result; +} + +// ------------------------------------- +const UnicodeString& +Formattable::getString(UErrorCode& status) const +{ + if (fType != kString) { + setError(status, U_INVALID_FORMAT_ERROR); + return *getBogus(); + } + if (fValue.fString == NULL) { + setError(status, U_MEMORY_ALLOCATION_ERROR); + return *getBogus(); + } + return *fValue.fString; +} + +// ------------------------------------- +UnicodeString& +Formattable::getString(UErrorCode& status) +{ + if (fType != kString) { + setError(status, U_INVALID_FORMAT_ERROR); + return *getBogus(); + } + if (fValue.fString == NULL) { + setError(status, U_MEMORY_ALLOCATION_ERROR); + return *getBogus(); + } + return *fValue.fString; +} + +// ------------------------------------- +const Formattable* +Formattable::getArray(int32_t& count, UErrorCode& status) const +{ + if (fType != kArray) { + setError(status, U_INVALID_FORMAT_ERROR); + count = 0; + return NULL; + } + count = fValue.fArrayAndCount.fCount; + return fValue.fArrayAndCount.fArray; +} + +// ------------------------------------- +// Gets the bogus string, ensures mondo bogosity. + +UnicodeString* +Formattable::getBogus() const +{ + return (UnicodeString*)&fBogus; /* cast away const :-( */ +} + + +// -------------------------------------- +StringPiece Formattable::getDecimalNumber(UErrorCode &status) { + if (U_FAILURE(status)) { + return ""; + } + if (fDecimalStr != NULL) { + return fDecimalStr->toStringPiece(); + } + + CharString *decimalStr = internalGetCharString(status); + if(decimalStr == NULL) { + return ""; // getDecimalNumber returns "" for error cases + } else { + return decimalStr->toStringPiece(); + } +} + +CharString *Formattable::internalGetCharString(UErrorCode &status) { + if(fDecimalStr == NULL) { + if (fDecimalQuantity == NULL) { + // No decimal number for the formattable yet. Which means the value was + // set directly by the user as an int, int64 or double. If the value came + // from parsing, or from the user setting a decimal number, fDecimalNum + // would already be set. + // + LocalPointer<DecimalQuantity> dq(new DecimalQuantity(), status); + if (U_FAILURE(status)) { return nullptr; } + populateDecimalQuantity(*dq, status); + if (U_FAILURE(status)) { return nullptr; } + fDecimalQuantity = dq.orphan(); + } + + fDecimalStr = new CharString(); + if (fDecimalStr == NULL) { + status = U_MEMORY_ALLOCATION_ERROR; + return NULL; + } + // Older ICUs called uprv_decNumberToString here, which is not exactly the same as + // DecimalQuantity::toScientificString(). The biggest difference is that uprv_decNumberToString does + // not print scientific notation for magnitudes greater than -5 and smaller than some amount (+5?). + if (fDecimalQuantity->isInfinite()) { + fDecimalStr->append("Infinity", status); + } else if (fDecimalQuantity->isNaN()) { + fDecimalStr->append("NaN", status); + } else if (fDecimalQuantity->isZeroish()) { + fDecimalStr->append("0", -1, status); + } else if (fType==kLong || fType==kInt64 || // use toPlainString for integer types + (fDecimalQuantity->getMagnitude() != INT32_MIN && std::abs(fDecimalQuantity->getMagnitude()) < 5)) { + fDecimalStr->appendInvariantChars(fDecimalQuantity->toPlainString(), status); + } else { + fDecimalStr->appendInvariantChars(fDecimalQuantity->toScientificString(), status); + } + } + return fDecimalStr; +} + +void +Formattable::populateDecimalQuantity(number::impl::DecimalQuantity& output, UErrorCode& status) const { + if (fDecimalQuantity != nullptr) { + output = *fDecimalQuantity; + return; + } + + switch (fType) { + case kDouble: + output.setToDouble(this->getDouble()); + output.roundToInfinity(); + break; + case kLong: + output.setToInt(this->getLong()); + break; + case kInt64: + output.setToLong(this->getInt64()); + break; + default: + // The formattable's value is not a numeric type. + status = U_INVALID_STATE_ERROR; + } +} + +// --------------------------------------- +void +Formattable::adoptDecimalQuantity(DecimalQuantity *dq) { + if (fDecimalQuantity != NULL) { + delete fDecimalQuantity; + } + fDecimalQuantity = dq; + if (dq == NULL) { // allow adoptDigitList(NULL) to clear + return; + } + + // Set the value into the Union of simple type values. + // Cannot use the set() functions because they would delete the fDecimalNum value. + if (fDecimalQuantity->fitsInLong()) { + fValue.fInt64 = fDecimalQuantity->toLong(); + if (fValue.fInt64 <= INT32_MAX && fValue.fInt64 >= INT32_MIN) { + fType = kLong; + } else { + fType = kInt64; + } + } else { + fType = kDouble; + fValue.fDouble = fDecimalQuantity->toDouble(); + } +} + + +// --------------------------------------- +void +Formattable::setDecimalNumber(StringPiece numberString, UErrorCode &status) { + if (U_FAILURE(status)) { + return; + } + dispose(); + + auto* dq = new DecimalQuantity(); + dq->setToDecNumber(numberString, status); + adoptDecimalQuantity(dq); + + // Note that we do not hang on to the caller's input string. + // If we are asked for the string, we will regenerate one from fDecimalQuantity. +} + +#if 0 +//---------------------------------------------------- +// console I/O +//---------------------------------------------------- +#ifdef _DEBUG + +#include <iostream> +using namespace std; + +#include "unicode/datefmt.h" +#include "unistrm.h" + +class FormattableStreamer /* not : public UObject because all methods are static */ { +public: + static void streamOut(ostream& stream, const Formattable& obj); + +private: + FormattableStreamer() {} // private - forbid instantiation +}; + +// This is for debugging purposes only. This will send a displayable +// form of the Formattable object to the output stream. + +void +FormattableStreamer::streamOut(ostream& stream, const Formattable& obj) +{ + static DateFormat *defDateFormat = 0; + + UnicodeString buffer; + switch(obj.getType()) { + case Formattable::kDate : + // Creates a DateFormat instance for formatting the + // Date instance. + if (defDateFormat == 0) { + defDateFormat = DateFormat::createInstance(); + } + defDateFormat->format(obj.getDate(), buffer); + stream << buffer; + break; + case Formattable::kDouble : + // Output the double as is. + stream << obj.getDouble() << 'D'; + break; + case Formattable::kLong : + // Output the double as is. + stream << obj.getLong() << 'L'; + break; + case Formattable::kString: + // Output the double as is. Please see UnicodeString console + // I/O routine for more details. + stream << '"' << obj.getString(buffer) << '"'; + break; + case Formattable::kArray: + int32_t i, count; + const Formattable* array; + array = obj.getArray(count); + stream << '['; + // Recursively calling the console I/O routine for each element in the array. + for (i=0; i<count; ++i) { + FormattableStreamer::streamOut(stream, array[i]); + stream << ( (i==(count-1)) ? "" : ", " ); + } + stream << ']'; + break; + default: + // Not a recognizable Formattable object. + stream << "INVALID_Formattable"; + } + stream.flush(); +} +#endif + +#endif + +U_NAMESPACE_END + +/* ---- UFormattable implementation ---- */ + +U_NAMESPACE_USE + +U_DRAFT UFormattable* U_EXPORT2 +ufmt_open(UErrorCode *status) { + if( U_FAILURE(*status) ) { + return NULL; + } + UFormattable *fmt = (new Formattable())->toUFormattable(); + + if( fmt == NULL ) { + *status = U_MEMORY_ALLOCATION_ERROR; + } + return fmt; +} + +U_DRAFT void U_EXPORT2 +ufmt_close(UFormattable *fmt) { + Formattable *obj = Formattable::fromUFormattable(fmt); + + delete obj; +} + +U_INTERNAL UFormattableType U_EXPORT2 +ufmt_getType(const UFormattable *fmt, UErrorCode *status) { + if(U_FAILURE(*status)) { + return (UFormattableType)UFMT_COUNT; + } + const Formattable *obj = Formattable::fromUFormattable(fmt); + return (UFormattableType)obj->getType(); +} + + +U_INTERNAL UBool U_EXPORT2 +ufmt_isNumeric(const UFormattable *fmt) { + const Formattable *obj = Formattable::fromUFormattable(fmt); + return obj->isNumeric(); +} + +U_DRAFT UDate U_EXPORT2 +ufmt_getDate(const UFormattable *fmt, UErrorCode *status) { + const Formattable *obj = Formattable::fromUFormattable(fmt); + + return obj->getDate(*status); +} + +U_DRAFT double U_EXPORT2 +ufmt_getDouble(UFormattable *fmt, UErrorCode *status) { + Formattable *obj = Formattable::fromUFormattable(fmt); + + return obj->getDouble(*status); +} + +U_DRAFT int32_t U_EXPORT2 +ufmt_getLong(UFormattable *fmt, UErrorCode *status) { + Formattable *obj = Formattable::fromUFormattable(fmt); + + return obj->getLong(*status); +} + + +U_DRAFT const void *U_EXPORT2 +ufmt_getObject(const UFormattable *fmt, UErrorCode *status) { + const Formattable *obj = Formattable::fromUFormattable(fmt); + + const void *ret = obj->getObject(); + if( ret==NULL && + (obj->getType() != Formattable::kObject) && + U_SUCCESS( *status )) { + *status = U_INVALID_FORMAT_ERROR; + } + return ret; +} + +U_DRAFT const UChar* U_EXPORT2 +ufmt_getUChars(UFormattable *fmt, int32_t *len, UErrorCode *status) { + Formattable *obj = Formattable::fromUFormattable(fmt); + + // avoid bogosity by checking the type first. + if( obj->getType() != Formattable::kString ) { + if( U_SUCCESS(*status) ){ + *status = U_INVALID_FORMAT_ERROR; + } + return NULL; + } + + // This should return a valid string + UnicodeString &str = obj->getString(*status); + if( U_SUCCESS(*status) && len != NULL ) { + *len = str.length(); + } + return str.getTerminatedBuffer(); +} + +U_DRAFT int32_t U_EXPORT2 +ufmt_getArrayLength(const UFormattable* fmt, UErrorCode *status) { + const Formattable *obj = Formattable::fromUFormattable(fmt); + + int32_t count; + (void)obj->getArray(count, *status); + return count; +} + +U_DRAFT UFormattable * U_EXPORT2 +ufmt_getArrayItemByIndex(UFormattable* fmt, int32_t n, UErrorCode *status) { + Formattable *obj = Formattable::fromUFormattable(fmt); + int32_t count; + (void)obj->getArray(count, *status); + if(U_FAILURE(*status)) { + return NULL; + } else if(n<0 || n>=count) { + setError(*status, U_INDEX_OUTOFBOUNDS_ERROR); + return NULL; + } else { + return (*obj)[n].toUFormattable(); // returns non-const Formattable + } +} + +U_DRAFT const char * U_EXPORT2 +ufmt_getDecNumChars(UFormattable *fmt, int32_t *len, UErrorCode *status) { + if(U_FAILURE(*status)) { + return ""; + } + Formattable *obj = Formattable::fromUFormattable(fmt); + CharString *charString = obj->internalGetCharString(*status); + if(U_FAILURE(*status)) { + return ""; + } + if(charString == NULL) { + *status = U_MEMORY_ALLOCATION_ERROR; + return ""; + } else { + if(len!=NULL) { + *len = charString->length(); + } + return charString->data(); + } +} + +U_DRAFT int64_t U_EXPORT2 +ufmt_getInt64(UFormattable *fmt, UErrorCode *status) { + Formattable *obj = Formattable::fromUFormattable(fmt); + return obj->getInt64(*status); +} + +#endif /* #if !UCONFIG_NO_FORMATTING */ + +//eof diff --git a/libicui18n/libicui18n/fmtable.cpp.patch b/libicui18n/libicui18n/fmtable.cpp.patch new file mode 100644 index 0000000..e753db1 --- /dev/null +++ b/libicui18n/libicui18n/fmtable.cpp.patch @@ -0,0 +1,12 @@ +--- i18n/fmtable.cpp 2023-04-05 17:32:46.385668953 +0200 ++++ fmtable.cpp 2023-04-06 14:01:47.787233628 +0200 +@@ -56,7 +56,8 @@ + // Return TRUE if *a == *b. + static inline UBool objectEquals(const UObject* a, const UObject* b) { + // LATER: return *a == *b; +- return *((const Measure*) a) == *((const Measure*) b); ++ //return *((const Measure*) a) == *((const Measure*) b); ++ return ((const Measure*) a)->operator== (*((const Measure*) b)); + } + + // Return a clone of *a. diff --git a/libicui18n/manifest b/libicui18n/manifest index 5bb6a95..5440b47 100644 --- a/libicui18n/manifest +++ b/libicui18n/manifest @@ -1,6 +1,6 @@ : 1 name: libicui18n -version: 65.1.0+4 +version: 65.1.0+10 upstream-version: 65.1 project: icu summary: ICU high-level internationalization C/C++ library @@ -18,3 +18,8 @@ builds: all depends: * build2 >= 0.12.0 depends: * bpkg >= 0.12.0 depends: libicuuc == $ + +# System package mapping. +# +debian-name: libicu-dev +fedora-name: libicu-devel diff --git a/libicuio/.gitignore b/libicuio/.gitignore index 4c4fec7..d4a1da2 100644 --- a/libicuio/.gitignore +++ b/libicuio/.gitignore @@ -3,11 +3,17 @@ *.d *.t *.i +*.i.* *.ii +*.ii.* *.o *.obj +*.gcm +*.pcm +*.ifc *.so *.so.* +*.dylib *.dll *.a *.lib diff --git a/libicuio/README-DEV b/libicuio/README-DEV index 0e048ef..8cd01d9 100644 --- a/libicuio/README-DEV +++ b/libicuio/README-DEV @@ -5,3 +5,8 @@ See ../README-DEV for general notes on ICU packaging. Symlink the required upstream directories into libicuio/: $ ln -s ../../upstream/icu4c/source/io libicuio + +Fix the 'unable to find numeric literal operator' GCC 11 error: + +$ cp libicuio/io/ufile.cpp libicuio +$ patch -p0 <libicuio/ufile.cpp.patch diff --git a/libicuio/build/bootstrap.build b/libicuio/build/bootstrap.build index ef574c9..06b4637 100644 --- a/libicuio/build/bootstrap.build +++ b/libicuio/build/bootstrap.build @@ -1,4 +1,4 @@ -# file : build/root.build +# file : build/bootstrap.build # license : Unicode License; see accompanying LICENSE file project = libicuio diff --git a/libicuio/build/export.build b/libicuio/build/export.build index d40d387..a7e9429 100644 --- a/libicuio/build/export.build +++ b/libicuio/build/export.build @@ -1,4 +1,4 @@ -# file : build/root.build +# file : build/export.build # license : Unicode License; see accompanying LICENSE file $out_root/ diff --git a/libicuio/libicuio/.gitattributes b/libicuio/libicuio/.gitattributes new file mode 100644 index 0000000..2a80cb7 --- /dev/null +++ b/libicuio/libicuio/.gitattributes @@ -0,0 +1 @@ +io symlink=dir diff --git a/libicuio/libicuio/buildfile b/libicuio/libicuio/buildfile index 1334d2f..a400b40 100644 --- a/libicuio/libicuio/buildfile +++ b/libicuio/libicuio/buildfile @@ -4,7 +4,9 @@ import intf_libs = libicui18n%lib{icui18n} import intf_libs += libicuuc%lib{icuuc} -lib{icuio}: {hxx cxx}{**} $intf_libs +patched = ufile + +lib{icuio}: io/hxx{**} io/cxx{** -{$patched}} cxx{$patched} $intf_libs tclass = $cxx.target.class tsys = $cxx.target.system @@ -44,11 +46,21 @@ cxx.poptions =+ "-I$src_base/io" obja{*}: cxx.poptions += -DU_STATIC_IMPLEMENTATION -switch $cxx.class, $tsys +switch $cxx.class { case 'gcc' + { cxx.coptions += -fvisibility=hidden + # Disable warnings that pop up with -Wall -Wextra. Upstream doesn't seem + # to care about these and it is not easy to disable specific warnings in a + # way that works across compilers/version (some -Wno-* options are only + # recognized in newer versions). There are still some warnings left that + # appear for certain platforms/compilers. We pass them through but disable + # treating them as errors. + # + cxx.coptions += -Wno-all -Wno-extra -Wno-error + } case 'msvc' cxx.coptions += /utf-8 /Zc:wchar_t /GF /Gy \ /wd4996 # Disable warnings that pop up with /W3. diff --git a/libicuio/libicuio/ufile.cpp b/libicuio/libicuio/ufile.cpp new file mode 100644 index 0000000..2c00d21 --- /dev/null +++ b/libicuio/libicuio/ufile.cpp @@ -0,0 +1,337 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* +* Copyright (C) 1998-2015, International Business Machines +* Corporation and others. All Rights Reserved. +* +****************************************************************************** +* +* File ufile.cpp +* +* Modification History: +* +* Date Name Description +* 11/19/98 stephen Creation. +* 03/12/99 stephen Modified for new C API. +* 06/16/99 stephen Changed T_LocaleBundle to u_locbund +* 07/19/99 stephen Fixed to use ucnv's default codepage. +****************************************************************************** +*/ + +#include "unicode/platform.h" + +#include "locmap.h" +#include "unicode/ustdio.h" + +#if !UCONFIG_NO_CONVERSION + +#include <stdlib.h> + +#include "ufile.h" +#include "unicode/uloc.h" +#include "unicode/ures.h" +#include "unicode/ucnv.h" +#include "unicode/ustring.h" +#include "cstring.h" +#include "cmemory.h" + +#if U_PLATFORM_USES_ONLY_WIN32_API && !defined(fileno) +/* Windows likes to rename Unix-like functions */ +#define fileno _fileno +#endif + +static UFILE* +finit_owner(FILE *f, + const char *locale, + const char *codepage, + UBool takeOwnership + ) +{ + UErrorCode status = U_ZERO_ERROR; + UFILE *result; + if(f == NULL) { + return 0; + } + result = (UFILE*) uprv_malloc(sizeof(UFILE)); + if(result == NULL) { + return 0; + } + + uprv_memset(result, 0, sizeof(UFILE)); + result->fFileno = fileno(f); + result->fFile = f; + + result->str.fBuffer = result->fUCBuffer; + result->str.fPos = result->fUCBuffer; + result->str.fLimit = result->fUCBuffer; + +#if !UCONFIG_NO_FORMATTING + /* if locale is 0, use the default */ + if(u_locbund_init(&result->str.fBundle, locale) == 0) { + /* DO NOT FCLOSE HERE! */ + uprv_free(result); + return 0; + } +#endif + + /* If the codepage is not "" use the ucnv_open default behavior */ + if(codepage == NULL || *codepage != '\0') { + result->fConverter = ucnv_open(codepage, &status); + } + /* else result->fConverter is already memset'd to NULL. */ + + if(U_SUCCESS(status)) { + result->fOwnFile = takeOwnership; + } + else { +#if !UCONFIG_NO_FORMATTING + u_locbund_close(&result->str.fBundle); +#endif + /* DO NOT fclose here!!!!!! */ + uprv_free(result); + result = NULL; + } + + return result; +} + +U_CAPI UFILE* U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ +u_finit(FILE *f, + const char *locale, + const char *codepage) +{ + return finit_owner(f, locale, codepage, FALSE); +} + +U_CAPI UFILE* U_EXPORT2 +u_fadopt(FILE *f, + const char *locale, + const char *codepage) +{ + return finit_owner(f, locale, codepage, TRUE); +} + +U_CAPI UFILE* U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ +u_fopen(const char *filename, + const char *perm, + const char *locale, + const char *codepage) +{ + UFILE *result; + FILE *systemFile = fopen(filename, perm); + if(systemFile == 0) { + return 0; + } + + result = finit_owner(systemFile, locale, codepage, TRUE); + + if (!result) { + /* Something bad happened. + Maybe the converter couldn't be opened. */ + fclose(systemFile); + } + + return result; /* not a file leak */ +} + +U_CAPI UFILE* U_EXPORT2 +u_fopen_u(const UChar *filename, + const char *perm, + const char *locale, + const char *codepage) +{ + UFILE *result; + char buffer[256]; + + u_austrcpy(buffer, filename); + + result = u_fopen(buffer, perm, locale, codepage); +#if U_PLATFORM_USES_ONLY_WIN32_API + /* Try Windows API _wfopen if the above fails. */ + if (!result) { + // TODO: test this code path, including wperm. + wchar_t wperm[40] = {}; + size_t retVal; + mbstowcs_s(&retVal, wperm, UPRV_LENGTHOF(wperm), perm, _TRUNCATE); + FILE *systemFile = _wfopen((const wchar_t *)filename, wperm); + if (systemFile) { + result = finit_owner(systemFile, locale, codepage, TRUE); + } + if (!result) { + /* Something bad happened. + Maybe the converter couldn't be opened. */ + fclose(systemFile); + } + } +#endif + return result; /* not a file leak */ +} + +U_CAPI UFILE* U_EXPORT2 +u_fstropen(UChar *stringBuf, + int32_t capacity, + const char *locale) +{ + UFILE *result; + + if (capacity < 0) { + return NULL; + } + + result = (UFILE*) uprv_malloc(sizeof(UFILE)); + /* Null pointer test */ + if (result == NULL) { + return NULL; /* Just get out. */ + } + uprv_memset(result, 0, sizeof(UFILE)); + result->str.fBuffer = stringBuf; + result->str.fPos = stringBuf; + result->str.fLimit = stringBuf+capacity; + +#if !UCONFIG_NO_FORMATTING + /* if locale is 0, use the default */ + if(u_locbund_init(&result->str.fBundle, locale) == 0) { + /* DO NOT FCLOSE HERE! */ + uprv_free(result); + return 0; + } +#endif + + return result; +} + +U_CAPI UBool U_EXPORT2 +u_feof(UFILE *f) +{ + UBool endOfBuffer; + if (f == NULL) { + return TRUE; + } + endOfBuffer = (UBool)(f->str.fPos >= f->str.fLimit); + if (f->fFile != NULL) { + return endOfBuffer && feof(f->fFile); + } + return endOfBuffer; +} + +U_CAPI void U_EXPORT2 +u_fflush(UFILE *file) +{ + ufile_flush_translit(file); + ufile_flush_io(file); + if (file->fFile) { + fflush(file->fFile); + } + else if (file->str.fPos < file->str.fLimit) { + *(file->str.fPos++) = 0; + } + /* TODO: flush input */ +} + +U_CAPI void +u_frewind(UFILE *file) +{ + u_fflush(file); + ucnv_reset(file->fConverter); + if (file->fFile) { + rewind(file->fFile); + file->str.fLimit = file->fUCBuffer; + file->str.fPos = file->fUCBuffer; + } + else { + file->str.fPos = file->str.fBuffer; + } +} + +U_CAPI void U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ +u_fclose(UFILE *file) +{ + if (file) { + u_fflush(file); + ufile_close_translit(file); + + if(file->fOwnFile) + fclose(file->fFile); + +#if !UCONFIG_NO_FORMATTING + u_locbund_close(&file->str.fBundle); +#endif + + ucnv_close(file->fConverter); + uprv_free(file); + } +} + +U_CAPI FILE* U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ +u_fgetfile( UFILE *f) +{ + return f->fFile; +} + +#if !UCONFIG_NO_FORMATTING + +U_CAPI const char* U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ +u_fgetlocale( UFILE *file) +{ + return file->str.fBundle.fLocale; +} + +U_CAPI int32_t U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ +u_fsetlocale(UFILE *file, + const char *locale) +{ + u_locbund_close(&file->str.fBundle); + + return u_locbund_init(&file->str.fBundle, locale) == 0 ? -1 : 0; +} + +#endif + +U_CAPI const char* U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ +u_fgetcodepage(UFILE *file) +{ + UErrorCode status = U_ZERO_ERROR; + const char *codepage = NULL; + + if (file->fConverter) { + codepage = ucnv_getName(file->fConverter, &status); + if(U_FAILURE(status)) + return 0; + } + return codepage; +} + +U_CAPI int32_t U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ +u_fsetcodepage( const char *codepage, + UFILE *file) +{ + UErrorCode status = U_ZERO_ERROR; + int32_t retVal = -1; + + /* We use the normal default codepage for this system, and not the one for the locale. */ + if ((file->str.fPos == file->str.fBuffer) && (file->str.fLimit == file->str.fBuffer)) { + ucnv_close(file->fConverter); + file->fConverter = ucnv_open(codepage, &status); + if(U_SUCCESS(status)) { + retVal = 0; + } + } + return retVal; +} + + +U_CAPI UConverter * U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ +u_fgetConverter(UFILE *file) +{ + return file->fConverter; +} +#if !UCONFIG_NO_FORMATTING +U_CAPI const UNumberFormat* U_EXPORT2 u_fgetNumberFormat(UFILE *file) +{ + return u_locbund_getNumberFormat(&file->str.fBundle, UNUM_DECIMAL); +} +#endif + +#endif diff --git a/libicuio/libicuio/ufile.cpp.patch b/libicuio/libicuio/ufile.cpp.patch new file mode 100644 index 0000000..9397298 --- /dev/null +++ b/libicuio/libicuio/ufile.cpp.patch @@ -0,0 +1,15 @@ +--- libicuio/io/ufile.cpp 2019-12-23 14:38:40.255889178 +0300 ++++ libicuio/ufile.cpp 2021-11-22 10:50:17.658962827 +0300 +@@ -21,12 +21,6 @@ + */ + + #include "unicode/platform.h" +-#if defined(__GNUC__) && !defined(__clang__) && defined(__STRICT_ANSI__) +-// g++, fileno isn't defined if __STRICT_ANSI__ is defined. +-// clang fails to compile the <string> header unless __STRICT_ANSI__ is defined. +-// __GNUC__ is set by both gcc and clang. +-#undef __STRICT_ANSI__ +-#endif + + #include "locmap.h" + #include "unicode/ustdio.h" diff --git a/libicuio/manifest b/libicuio/manifest index 51590c3..a0b1011 100644 --- a/libicuio/manifest +++ b/libicuio/manifest @@ -1,6 +1,6 @@ : 1 name: libicuio -version: 65.1.0+4 +version: 65.1.0+10 upstream-version: 65.1 project: icu summary: ICU input/output C/C++ library @@ -13,9 +13,14 @@ src-url: https://git.build2.org/cgit/packaging/icu/icu/tree/libicuio/ package-url: https://git.build2.org/cgit/packaging/icu/ email: icu-support@lists.sourceforge.net ; Mailing list. package-email: packaging@build2.org ; Mailing list. -build-email: builds@build2.org +build-error-email: builds@build2.org builds: all depends: * build2 >= 0.12.0 depends: * bpkg >= 0.12.0 depends: libicuuc == $ depends: libicui18n == $ + +# System package mapping. +# +debian-name: libicu-dev +fedora-name: libicu-devel diff --git a/libicuuc/.gitignore b/libicuuc/.gitignore index 4c4fec7..d4a1da2 100644 --- a/libicuuc/.gitignore +++ b/libicuuc/.gitignore @@ -3,11 +3,17 @@ *.d *.t *.i +*.i.* *.ii +*.ii.* *.o *.obj +*.gcm +*.pcm +*.ifc *.so *.so.* +*.dylib *.dll *.a *.lib diff --git a/libicuuc/README-DEV b/libicuuc/README-DEV index 281a901..b88f136 100644 --- a/libicuuc/README-DEV +++ b/libicuuc/README-DEV @@ -20,6 +20,11 @@ upstream's fix (commit b7d08bc04a4296982fcef8b6b8a354a9e4e7afca) as a base $ cp libicu/uc/unistr.cpp libicu $ patch -p0 <libicu/unistr.cpp.patch +Also we fix the 'unable to find numeric literal operator' MinGW GCC 11 error: + +$ cp libicu/uc/putil.cpp libicu +$ patch -p0 <libicu/putil.cpp.patch + And we also fix Clang 10 compilation error: $ cp libicu/uc/uniset.cpp libicu diff --git a/libicuuc/build/bootstrap.build b/libicuuc/build/bootstrap.build index 8ffc1af..9d96232 100644 --- a/libicuuc/build/bootstrap.build +++ b/libicuuc/build/bootstrap.build @@ -1,4 +1,4 @@ -# file : build/root.build +# file : build/bootstrap.build # license : Unicode License; see accompanying LICENSE file project = libicuuc @@ -24,7 +24,7 @@ using dist # icu4c/source/configure.ac). The ABI compatibility is preserved for the # shared updates. See also: # -# http://userguide.icu-project.org/design#TOC-Version-Numbers-in-ICU +# https://unicode-org.github.io/icu/userguide/icu/design.html#version-numbers-in-icu # abi_version_major = "$version.major" abi_version_patch = ($version.patch != 0 ? ".$version.patch" : "") diff --git a/libicuuc/build/export.build b/libicuuc/build/export.build index ff8ec28..2a3a606 100644 --- a/libicuuc/build/export.build +++ b/libicuuc/build/export.build @@ -1,4 +1,4 @@ -# file : build/root.build +# file : build/export.build # license : Unicode License; see accompanying LICENSE file $out_root/ diff --git a/libicuuc/libicu/.gitattributes b/libicuuc/libicu/.gitattributes new file mode 100644 index 0000000..c31aa68 --- /dev/null +++ b/libicuuc/libicu/.gitattributes @@ -0,0 +1 @@ +uc symlink=dir diff --git a/libicuuc/libicu/buildfile b/libicuuc/libicu/buildfile index e5c451f..63a6287 100644 --- a/libicuuc/libicu/buildfile +++ b/libicuuc/libicu/buildfile @@ -3,9 +3,15 @@ ./: lib{icuuc} lib{icudata} -patched = ucasemap unistr uniset +patched = putil ucasemap uniset unistr -lib{icuuc}: uc/hxx{**} uc/cxx{* -{$patched}} cxx{$patched} lib{icudata} +# Note that some of the uc/unicode/*.h files are C++ headers and some are C +# headers. We will assume all of them as C headers, which is a bit of a hack +# but doing it properly is too painful. +# +lib{icuuc}: uc/hxx{** -unicode/*} uc/unicode/h{*} \ + uc/cxx{* -{$patched}} cxx{$patched} \ + lib{icudata} lib{icudata}: data/{h c}{*} uc/unicode/h{*} @@ -21,6 +27,9 @@ windows = ($tclass == 'windows') # Build options. # +# Note that we build without plugins support (UCONFIG_ENABLE_PLUGINS macro is +# undefined) and thus we don't define the DEFAULT_ICU_PLUGINS macro either. +# cc.poptions += -DU_ATTRIBUTE_DEPRECATED= cxx.poptions += -DU_COMMON_IMPLEMENTATION @@ -65,11 +74,11 @@ cc.poptions =+ "-I$src_root" "-I$src_base/uc" obja{*}: cc.poptions += -DU_STATIC_IMPLEMENTATION -switch $c.class, $tsys +switch $c.class { case 'gcc' { - cc.coptions += -fvisibility=hidden + cc.coptions += -fvisibility=hidden # Disable warnings that pop up with -Wall -Wextra. Upstream doesn't seem # to care about these and it is not easy to disable specific warnings in a @@ -97,21 +106,6 @@ switch $c.class, $tsys } } -# If install.root is specified, then define the DEFAULT_ICU_PLUGINS macro when -# compiling uc/icuplug.cpp and leave it undefined otherwise. -# -if ($install.root != [null]) -{ - plugins_dir = $install.resolve($install.lib)/icu - - # Escape backslashes and quotes in the directory path prior to representing - # it as C++ string literal. - # - pd = $regex.replace($plugins_dir, '(\\|")', '\\\1') - - uc/obj{icuplug}: cxx.poptions += -DDEFAULT_ICU_PLUGINS="\"$pd\"" -} - switch $tclass, $tsys { case 'linux' @@ -167,6 +161,6 @@ else # Install headers from the uc/unicode/ subdirectory only. # -h{*}: install = false -hxx{*}: install = false -uc/unicode/hxx{*}: install = include/unicode/ +h{*}: install = false +hxx{*}: install = false +uc/unicode/h{*}: install = include/unicode/ diff --git a/libicuuc/libicu/putil.cpp b/libicuuc/libicu/putil.cpp new file mode 100644 index 0000000..58cc8a6 --- /dev/null +++ b/libicuuc/libicu/putil.cpp @@ -0,0 +1,2413 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* +* Copyright (C) 1997-2016, International Business Machines +* Corporation and others. All Rights Reserved. +* +****************************************************************************** +* +* FILE NAME : putil.c (previously putil.cpp and ptypes.cpp) +* +* Date Name Description +* 04/14/97 aliu Creation. +* 04/24/97 aliu Added getDefaultDataDirectory() and +* getDefaultLocaleID(). +* 04/28/97 aliu Rewritten to assume Unix and apply general methods +* for assumed case. Non-UNIX platforms must be +* special-cased. Rewrote numeric methods dealing +* with NaN and Infinity to be platform independent +* over all IEEE 754 platforms. +* 05/13/97 aliu Restored sign of timezone +* (semantics are hours West of GMT) +* 06/16/98 erm Added IEEE_754 stuff, cleaned up isInfinite, isNan, +* nextDouble.. +* 07/22/98 stephen Added remainder, max, min, trunc +* 08/13/98 stephen Added isNegativeInfinity, isPositiveInfinity +* 08/24/98 stephen Added longBitsFromDouble +* 09/08/98 stephen Minor changes for Mac Port +* 03/02/99 stephen Removed openFile(). Added AS400 support. +* Fixed EBCDIC tables +* 04/15/99 stephen Converted to C. +* 06/28/99 stephen Removed mutex locking in u_isBigEndian(). +* 08/04/99 jeffrey R. Added OS/2 changes +* 11/15/99 helena Integrated S/390 IEEE support. +* 04/26/01 Barry N. OS/400 support for uprv_getDefaultLocaleID +* 08/15/01 Steven H. OS/400 support for uprv_getDefaultCodepage +* 01/03/08 Steven L. Fake Time Support +****************************************************************************** +*/ + +// Defines _XOPEN_SOURCE for access to POSIX functions. +// Must be before any other #includes. +#include "uposixdefs.h" + +// First, the platform type. Need this for U_PLATFORM. +#include "unicode/platform.h" + +/* + * Cygwin with GCC requires inclusion of time.h after the above disabling strict asci mode statement. + */ +#include <time.h> + +#if !U_PLATFORM_USES_ONLY_WIN32_API +#include <sys/time.h> +#endif + +/* include the rest of the ICU headers */ +#include "unicode/putil.h" +#include "unicode/ustring.h" +#include "putilimp.h" +#include "uassert.h" +#include "umutex.h" +#include "cmemory.h" +#include "cstring.h" +#include "locmap.h" +#include "ucln_cmn.h" +#include "charstr.h" + +/* Include standard headers. */ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <math.h> +#include <locale.h> +#include <float.h> + +#ifndef U_COMMON_IMPLEMENTATION +#error U_COMMON_IMPLEMENTATION not set - must be set for all ICU source files in common/ - see http://userguide.icu-project.org/howtouseicu +#endif + + +/* include system headers */ +#if U_PLATFORM_USES_ONLY_WIN32_API + /* + * TODO: U_PLATFORM_USES_ONLY_WIN32_API includes MinGW. + * Should Cygwin be included as well (U_PLATFORM_HAS_WIN32_API) + * to use native APIs as much as possible? + */ +#ifndef WIN32_LEAN_AND_MEAN +# define WIN32_LEAN_AND_MEAN +#endif +# define VC_EXTRALEAN +# define NOUSER +# define NOSERVICE +# define NOIME +# define NOMCX +# include <windows.h> +# include "unicode/uloc.h" +# include "wintz.h" +#elif U_PLATFORM == U_PF_OS400 +# include <float.h> +# include <qusec.h> /* error code structure */ +# include <qusrjobi.h> +# include <qliept.h> /* EPT_CALL macro - this include must be after all other "QSYSINCs" */ +# include <mih/testptr.h> /* For uprv_maximumPtr */ +#elif U_PLATFORM == U_PF_OS390 +# include "unicode/ucnv.h" /* Needed for UCNV_SWAP_LFNL_OPTION_STRING */ +#elif U_PLATFORM_IS_DARWIN_BASED || U_PLATFORM_IS_LINUX_BASED || U_PLATFORM == U_PF_BSD || U_PLATFORM == U_PF_SOLARIS +# include <limits.h> +# include <unistd.h> +# if U_PLATFORM == U_PF_SOLARIS +# ifndef _XPG4_2 +# define _XPG4_2 +# endif +# endif +#elif U_PLATFORM == U_PF_QNX +# include <sys/neutrino.h> +#endif + +/* + * Only include langinfo.h if we have a way to get the codeset. If we later + * depend on more feature, we can test on U_HAVE_NL_LANGINFO. + * + */ + +#if U_HAVE_NL_LANGINFO_CODESET +#include <langinfo.h> +#endif + +/** + * Simple things (presence of functions, etc) should just go in configure.in and be added to + * icucfg.h via autoheader. + */ +#if U_PLATFORM_IMPLEMENTS_POSIX +# if U_PLATFORM == U_PF_OS400 +# define HAVE_DLFCN_H 0 +# define HAVE_DLOPEN 0 +# else +# ifndef HAVE_DLFCN_H +# define HAVE_DLFCN_H 1 +# endif +# ifndef HAVE_DLOPEN +# define HAVE_DLOPEN 1 +# endif +# endif +# ifndef HAVE_GETTIMEOFDAY +# define HAVE_GETTIMEOFDAY 1 +# endif +#else +# define HAVE_DLFCN_H 0 +# define HAVE_DLOPEN 0 +# define HAVE_GETTIMEOFDAY 0 +#endif + +U_NAMESPACE_USE + +/* Define the extension for data files, again... */ +#define DATA_TYPE "dat" + +/* Leave this copyright notice here! */ +static const char copyright[] = U_COPYRIGHT_STRING; + +/* floating point implementations ------------------------------------------- */ + +/* We return QNAN rather than SNAN*/ +#define SIGN 0x80000000U + +/* Make it easy to define certain types of constants */ +typedef union { + int64_t i64; /* This must be defined first in order to allow the initialization to work. This is a C89 feature. */ + double d64; +} BitPatternConversion; +static const BitPatternConversion gNan = { (int64_t) INT64_C(0x7FF8000000000000) }; +static const BitPatternConversion gInf = { (int64_t) INT64_C(0x7FF0000000000000) }; + +/*--------------------------------------------------------------------------- + Platform utilities + Our general strategy is to assume we're on a POSIX platform. Platforms which + are non-POSIX must declare themselves so. The default POSIX implementation + will sometimes work for non-POSIX platforms as well (e.g., the NaN-related + functions). + ---------------------------------------------------------------------------*/ + +#if U_PLATFORM_USES_ONLY_WIN32_API || U_PLATFORM == U_PF_OS400 +# undef U_POSIX_LOCALE +#else +# define U_POSIX_LOCALE 1 +#endif + +/* + WARNING! u_topNBytesOfDouble and u_bottomNBytesOfDouble + can't be properly optimized by the gcc compiler sometimes (i.e. gcc 3.2). +*/ +#if !IEEE_754 +static char* +u_topNBytesOfDouble(double* d, int n) +{ +#if U_IS_BIG_ENDIAN + return (char*)d; +#else + return (char*)(d + 1) - n; +#endif +} + +static char* +u_bottomNBytesOfDouble(double* d, int n) +{ +#if U_IS_BIG_ENDIAN + return (char*)(d + 1) - n; +#else + return (char*)d; +#endif +} +#endif /* !IEEE_754 */ + +#if IEEE_754 +static UBool +u_signBit(double d) { + uint8_t hiByte; +#if U_IS_BIG_ENDIAN + hiByte = *(uint8_t *)&d; +#else + hiByte = *(((uint8_t *)&d) + sizeof(double) - 1); +#endif + return (hiByte & 0x80) != 0; +} +#endif + + + +#if defined (U_DEBUG_FAKETIME) +/* Override the clock to test things without having to move the system clock. + * Assumes POSIX gettimeofday() will function + */ +UDate fakeClock_t0 = 0; /** Time to start the clock from **/ +UDate fakeClock_dt = 0; /** Offset (fake time - real time) **/ +UBool fakeClock_set = FALSE; /** True if fake clock has spun up **/ + +static UDate getUTCtime_real() { + struct timeval posixTime; + gettimeofday(&posixTime, NULL); + return (UDate)(((int64_t)posixTime.tv_sec * U_MILLIS_PER_SECOND) + (posixTime.tv_usec/1000)); +} + +static UDate getUTCtime_fake() { + static UMutex fakeClockMutex; + umtx_lock(&fakeClockMutex); + if(!fakeClock_set) { + UDate real = getUTCtime_real(); + const char *fake_start = getenv("U_FAKETIME_START"); + if((fake_start!=NULL) && (fake_start[0]!=0)) { + sscanf(fake_start,"%lf",&fakeClock_t0); + fakeClock_dt = fakeClock_t0 - real; + fprintf(stderr,"U_DEBUG_FAKETIME was set at compile time, so the ICU clock will start at a preset value\n" + "env variable U_FAKETIME_START=%.0f (%s) for an offset of %.0f ms from the current time %.0f\n", + fakeClock_t0, fake_start, fakeClock_dt, real); + } else { + fakeClock_dt = 0; + fprintf(stderr,"U_DEBUG_FAKETIME was set at compile time, but U_FAKETIME_START was not set.\n" + "Set U_FAKETIME_START to the number of milliseconds since 1/1/1970 to set the ICU clock.\n"); + } + fakeClock_set = TRUE; + } + umtx_unlock(&fakeClockMutex); + + return getUTCtime_real() + fakeClock_dt; +} +#endif + +#if U_PLATFORM_USES_ONLY_WIN32_API +typedef union { + int64_t int64; + FILETIME fileTime; +} FileTimeConversion; /* This is like a ULARGE_INTEGER */ + +/* Number of 100 nanoseconds from 1/1/1601 to 1/1/1970 */ +#define EPOCH_BIAS INT64_C(116444736000000000) +#define HECTONANOSECOND_PER_MILLISECOND 10000 + +#endif + +/*--------------------------------------------------------------------------- + Universal Implementations + These are designed to work on all platforms. Try these, and if they + don't work on your platform, then special case your platform with new + implementations. +---------------------------------------------------------------------------*/ + +U_CAPI UDate U_EXPORT2 +uprv_getUTCtime() +{ +#if defined(U_DEBUG_FAKETIME) + return getUTCtime_fake(); /* Hook for overriding the clock */ +#else + return uprv_getRawUTCtime(); +#endif +} + +/* Return UTC (GMT) time measured in milliseconds since 0:00 on 1/1/70.*/ +U_CAPI UDate U_EXPORT2 +uprv_getRawUTCtime() +{ +#if U_PLATFORM_USES_ONLY_WIN32_API + + FileTimeConversion winTime; + GetSystemTimeAsFileTime(&winTime.fileTime); + return (UDate)((winTime.int64 - EPOCH_BIAS) / HECTONANOSECOND_PER_MILLISECOND); +#else + +#if HAVE_GETTIMEOFDAY + struct timeval posixTime; + gettimeofday(&posixTime, NULL); + return (UDate)(((int64_t)posixTime.tv_sec * U_MILLIS_PER_SECOND) + (posixTime.tv_usec/1000)); +#else + time_t epochtime; + time(&epochtime); + return (UDate)epochtime * U_MILLIS_PER_SECOND; +#endif + +#endif +} + +/*----------------------------------------------------------------------------- + IEEE 754 + These methods detect and return NaN and infinity values for doubles + conforming to IEEE 754. Platforms which support this standard include X86, + Mac 680x0, Mac PowerPC, AIX RS/6000, and most others. + If this doesn't work on your platform, you have non-IEEE floating-point, and + will need to code your own versions. A naive implementation is to return 0.0 + for getNaN and getInfinity, and false for isNaN and isInfinite. + ---------------------------------------------------------------------------*/ + +U_CAPI UBool U_EXPORT2 +uprv_isNaN(double number) +{ +#if IEEE_754 + BitPatternConversion convertedNumber; + convertedNumber.d64 = number; + /* Infinity is 0x7FF0000000000000U. Anything greater than that is a NaN */ + return (UBool)((convertedNumber.i64 & U_INT64_MAX) > gInf.i64); + +#elif U_PLATFORM == U_PF_OS390 + uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number, + sizeof(uint32_t)); + uint32_t lowBits = *(uint32_t*)u_bottomNBytesOfDouble(&number, + sizeof(uint32_t)); + + return ((highBits & 0x7F080000L) == 0x7F080000L) && + (lowBits == 0x00000000L); + +#else + /* If your platform doesn't support IEEE 754 but *does* have an NaN value,*/ + /* you'll need to replace this default implementation with what's correct*/ + /* for your platform.*/ + return number != number; +#endif +} + +U_CAPI UBool U_EXPORT2 +uprv_isInfinite(double number) +{ +#if IEEE_754 + BitPatternConversion convertedNumber; + convertedNumber.d64 = number; + /* Infinity is exactly 0x7FF0000000000000U. */ + return (UBool)((convertedNumber.i64 & U_INT64_MAX) == gInf.i64); +#elif U_PLATFORM == U_PF_OS390 + uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number, + sizeof(uint32_t)); + uint32_t lowBits = *(uint32_t*)u_bottomNBytesOfDouble(&number, + sizeof(uint32_t)); + + return ((highBits & ~SIGN) == 0x70FF0000L) && (lowBits == 0x00000000L); + +#else + /* If your platform doesn't support IEEE 754 but *does* have an infinity*/ + /* value, you'll need to replace this default implementation with what's*/ + /* correct for your platform.*/ + return number == (2.0 * number); +#endif +} + +U_CAPI UBool U_EXPORT2 +uprv_isPositiveInfinity(double number) +{ +#if IEEE_754 || U_PLATFORM == U_PF_OS390 + return (UBool)(number > 0 && uprv_isInfinite(number)); +#else + return uprv_isInfinite(number); +#endif +} + +U_CAPI UBool U_EXPORT2 +uprv_isNegativeInfinity(double number) +{ +#if IEEE_754 || U_PLATFORM == U_PF_OS390 + return (UBool)(number < 0 && uprv_isInfinite(number)); + +#else + uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number, + sizeof(uint32_t)); + return((highBits & SIGN) && uprv_isInfinite(number)); + +#endif +} + +U_CAPI double U_EXPORT2 +uprv_getNaN() +{ +#if IEEE_754 || U_PLATFORM == U_PF_OS390 + return gNan.d64; +#else + /* If your platform doesn't support IEEE 754 but *does* have an NaN value,*/ + /* you'll need to replace this default implementation with what's correct*/ + /* for your platform.*/ + return 0.0; +#endif +} + +U_CAPI double U_EXPORT2 +uprv_getInfinity() +{ +#if IEEE_754 || U_PLATFORM == U_PF_OS390 + return gInf.d64; +#else + /* If your platform doesn't support IEEE 754 but *does* have an infinity*/ + /* value, you'll need to replace this default implementation with what's*/ + /* correct for your platform.*/ + return 0.0; +#endif +} + +U_CAPI double U_EXPORT2 +uprv_floor(double x) +{ + return floor(x); +} + +U_CAPI double U_EXPORT2 +uprv_ceil(double x) +{ + return ceil(x); +} + +U_CAPI double U_EXPORT2 +uprv_round(double x) +{ + return uprv_floor(x + 0.5); +} + +U_CAPI double U_EXPORT2 +uprv_fabs(double x) +{ + return fabs(x); +} + +U_CAPI double U_EXPORT2 +uprv_modf(double x, double* y) +{ + return modf(x, y); +} + +U_CAPI double U_EXPORT2 +uprv_fmod(double x, double y) +{ + return fmod(x, y); +} + +U_CAPI double U_EXPORT2 +uprv_pow(double x, double y) +{ + /* This is declared as "double pow(double x, double y)" */ + return pow(x, y); +} + +U_CAPI double U_EXPORT2 +uprv_pow10(int32_t x) +{ + return pow(10.0, (double)x); +} + +U_CAPI double U_EXPORT2 +uprv_fmax(double x, double y) +{ +#if IEEE_754 + /* first handle NaN*/ + if(uprv_isNaN(x) || uprv_isNaN(y)) + return uprv_getNaN(); + + /* check for -0 and 0*/ + if(x == 0.0 && y == 0.0 && u_signBit(x)) + return y; + +#endif + + /* this should work for all flt point w/o NaN and Inf special cases */ + return (x > y ? x : y); +} + +U_CAPI double U_EXPORT2 +uprv_fmin(double x, double y) +{ +#if IEEE_754 + /* first handle NaN*/ + if(uprv_isNaN(x) || uprv_isNaN(y)) + return uprv_getNaN(); + + /* check for -0 and 0*/ + if(x == 0.0 && y == 0.0 && u_signBit(y)) + return y; + +#endif + + /* this should work for all flt point w/o NaN and Inf special cases */ + return (x > y ? y : x); +} + +U_CAPI UBool U_EXPORT2 +uprv_add32_overflow(int32_t a, int32_t b, int32_t* res) { + // NOTE: Some compilers (GCC, Clang) have primitives available, like __builtin_add_overflow. + // This function could be optimized by calling one of those primitives. + auto a64 = static_cast<int64_t>(a); + auto b64 = static_cast<int64_t>(b); + int64_t res64 = a64 + b64; + *res = static_cast<int32_t>(res64); + return res64 != *res; +} + +U_CAPI UBool U_EXPORT2 +uprv_mul32_overflow(int32_t a, int32_t b, int32_t* res) { + // NOTE: Some compilers (GCC, Clang) have primitives available, like __builtin_mul_overflow. + // This function could be optimized by calling one of those primitives. + auto a64 = static_cast<int64_t>(a); + auto b64 = static_cast<int64_t>(b); + int64_t res64 = a64 * b64; + *res = static_cast<int32_t>(res64); + return res64 != *res; +} + +/** + * Truncates the given double. + * trunc(3.3) = 3.0, trunc (-3.3) = -3.0 + * This is different than calling floor() or ceil(): + * floor(3.3) = 3, floor(-3.3) = -4 + * ceil(3.3) = 4, ceil(-3.3) = -3 + */ +U_CAPI double U_EXPORT2 +uprv_trunc(double d) +{ +#if IEEE_754 + /* handle error cases*/ + if(uprv_isNaN(d)) + return uprv_getNaN(); + if(uprv_isInfinite(d)) + return uprv_getInfinity(); + + if(u_signBit(d)) /* Signbit() picks up -0.0; d<0 does not. */ + return ceil(d); + else + return floor(d); + +#else + return d >= 0 ? floor(d) : ceil(d); + +#endif +} + +/** + * Return the largest positive number that can be represented by an integer + * type of arbitrary bit length. + */ +U_CAPI double U_EXPORT2 +uprv_maxMantissa(void) +{ + return pow(2.0, DBL_MANT_DIG + 1.0) - 1.0; +} + +U_CAPI double U_EXPORT2 +uprv_log(double d) +{ + return log(d); +} + +U_CAPI void * U_EXPORT2 +uprv_maximumPtr(void * base) +{ +#if U_PLATFORM == U_PF_OS400 + /* + * With the provided function we should never be out of range of a given segment + * (a traditional/typical segment that is). Our segments have 5 bytes for the + * id and 3 bytes for the offset. The key is that the casting takes care of + * only retrieving the offset portion minus x1000. Hence, the smallest offset + * seen in a program is x001000 and when casted to an int would be 0. + * That's why we can only add 0xffefff. Otherwise, we would exceed the segment. + * + * Currently, 16MB is the current addressing limitation on i5/OS if the activation is + * non-TERASPACE. If it is TERASPACE it is 2GB - 4k(header information). + * This function determines the activation based on the pointer that is passed in and + * calculates the appropriate maximum available size for + * each pointer type (TERASPACE and non-TERASPACE) + * + * Unlike other operating systems, the pointer model isn't determined at + * compile time on i5/OS. + */ + if ((base != NULL) && (_TESTPTR(base, _C_TERASPACE_CHECK))) { + /* if it is a TERASPACE pointer the max is 2GB - 4k */ + return ((void *)(((char *)base)-((uint32_t)(base))+((uint32_t)0x7fffefff))); + } + /* otherwise 16MB since NULL ptr is not checkable or the ptr is not TERASPACE */ + return ((void *)(((char *)base)-((uint32_t)(base))+((uint32_t)0xffefff))); + +#else + return U_MAX_PTR(base); +#endif +} + +/*--------------------------------------------------------------------------- + Platform-specific Implementations + Try these, and if they don't work on your platform, then special case your + platform with new implementations. + ---------------------------------------------------------------------------*/ + +/* Generic time zone layer -------------------------------------------------- */ + +/* Time zone utilities */ +U_CAPI void U_EXPORT2 +uprv_tzset() +{ +#if defined(U_TZSET) + U_TZSET(); +#else + /* no initialization*/ +#endif +} + +U_CAPI int32_t U_EXPORT2 +uprv_timezone() +{ +#ifdef U_TIMEZONE + return U_TIMEZONE; +#else + time_t t, t1, t2; + struct tm tmrec; + int32_t tdiff = 0; + + time(&t); + uprv_memcpy( &tmrec, localtime(&t), sizeof(tmrec) ); +#if U_PLATFORM != U_PF_IPHONE + UBool dst_checked = (tmrec.tm_isdst != 0); /* daylight savings time is checked*/ +#endif + t1 = mktime(&tmrec); /* local time in seconds*/ + uprv_memcpy( &tmrec, gmtime(&t), sizeof(tmrec) ); + t2 = mktime(&tmrec); /* GMT (or UTC) in seconds*/ + tdiff = t2 - t1; + +#if U_PLATFORM != U_PF_IPHONE + /* imitate NT behaviour, which returns same timezone offset to GMT for + winter and summer. + This does not work on all platforms. For instance, on glibc on Linux + and on Mac OS 10.5, tdiff calculated above remains the same + regardless of whether DST is in effect or not. iOS is another + platform where this does not work. Linux + glibc and Mac OS 10.5 + have U_TIMEZONE defined so that this code is not reached. + */ + if (dst_checked) + tdiff += 3600; +#endif + return tdiff; +#endif +} + +/* Note that U_TZNAME does *not* have to be tzname, but if it is, + some platforms need to have it declared here. */ + +#if defined(U_TZNAME) && (U_PLATFORM == U_PF_IRIX || U_PLATFORM_IS_DARWIN_BASED) +/* RS6000 and others reject char **tzname. */ +extern U_IMPORT char *U_TZNAME[]; +#endif + +#if !UCONFIG_NO_FILE_IO && ((U_PLATFORM_IS_DARWIN_BASED && (U_PLATFORM != U_PF_IPHONE || defined(U_TIMEZONE))) || U_PLATFORM_IS_LINUX_BASED || U_PLATFORM == U_PF_BSD || U_PLATFORM == U_PF_SOLARIS) +/* These platforms are likely to use Olson timezone IDs. */ +/* common targets of the symbolic link at TZDEFAULT are: + * "/usr/share/zoneinfo/<olsonID>" default, older Linux distros, macOS to 10.12 + * "../usr/share/zoneinfo/<olsonID>" newer Linux distros: Red Hat Enterprise Linux 7, Ubuntu 16, SuSe Linux 12 + * "/usr/share/lib/zoneinfo/<olsonID>" Solaris + * "../usr/share/lib/zoneinfo/<olsonID>" Solaris + * "/var/db/timezone/zoneinfo/<olsonID>" macOS 10.13 + * To avoid checking lots of paths, just check that the target path + * before the <olsonID> ends with "/zoneinfo/", and the <olsonID> is valid. + */ + +#define CHECK_LOCALTIME_LINK 1 +#if U_PLATFORM_IS_DARWIN_BASED +#include <tzfile.h> +#define TZZONEINFO (TZDIR "/") +#elif U_PLATFORM == U_PF_SOLARIS +#define TZDEFAULT "/etc/localtime" +#define TZZONEINFO "/usr/share/lib/zoneinfo/" +#define TZ_ENV_CHECK "localtime" +#else +#define TZDEFAULT "/etc/localtime" +#define TZZONEINFO "/usr/share/zoneinfo/" +#endif +#define TZZONEINFOTAIL "/zoneinfo/" +#if U_HAVE_DIRENT_H +#define TZFILE_SKIP "posixrules" /* tz file to skip when searching. */ +/* Some Linux distributions have 'localtime' in /usr/share/zoneinfo + symlinked to /etc/localtime, which makes searchForTZFile return + 'localtime' when it's the first match. */ +#define TZFILE_SKIP2 "localtime" +#define SEARCH_TZFILE +#include <dirent.h> /* Needed to search through system timezone files */ +#endif +static char gTimeZoneBuffer[PATH_MAX]; +static char *gTimeZoneBufferPtr = NULL; +#endif + +#if !U_PLATFORM_USES_ONLY_WIN32_API +#define isNonDigit(ch) (ch < '0' || '9' < ch) +static UBool isValidOlsonID(const char *id) { + int32_t idx = 0; + + /* Determine if this is something like Iceland (Olson ID) + or AST4ADT (non-Olson ID) */ + while (id[idx] && isNonDigit(id[idx]) && id[idx] != ',') { + idx++; + } + + /* If we went through the whole string, then it might be okay. + The timezone is sometimes set to "CST-7CDT", "CST6CDT5,J129,J131/19:30", + "GRNLNDST3GRNLNDDT" or similar, so we cannot use it. + The rest of the time it could be an Olson ID. George */ + return (UBool)(id[idx] == 0 + || uprv_strcmp(id, "PST8PDT") == 0 + || uprv_strcmp(id, "MST7MDT") == 0 + || uprv_strcmp(id, "CST6CDT") == 0 + || uprv_strcmp(id, "EST5EDT") == 0); +} + +/* On some Unix-like OS, 'posix' subdirectory in + /usr/share/zoneinfo replicates the top-level contents. 'right' + subdirectory has the same set of files, but individual files + are different from those in the top-level directory or 'posix' + because 'right' has files for TAI (Int'l Atomic Time) while 'posix' + has files for UTC. + When the first match for /etc/localtime is in either of them + (usually in posix because 'right' has different file contents), + or TZ environment variable points to one of them, createTimeZone + fails because, say, 'posix/America/New_York' is not an Olson + timezone id ('America/New_York' is). So, we have to skip + 'posix/' and 'right/' at the beginning. */ +static void skipZoneIDPrefix(const char** id) { + if (uprv_strncmp(*id, "posix/", 6) == 0 + || uprv_strncmp(*id, "right/", 6) == 0) + { + *id += 6; + } +} +#endif + +#if defined(U_TZNAME) && !U_PLATFORM_USES_ONLY_WIN32_API + +#define CONVERT_HOURS_TO_SECONDS(offset) (int32_t)(offset*3600) +typedef struct OffsetZoneMapping { + int32_t offsetSeconds; + int32_t daylightType; /* 0=U_DAYLIGHT_NONE, 1=daylight in June-U_DAYLIGHT_JUNE, 2=daylight in December=U_DAYLIGHT_DECEMBER*/ + const char *stdID; + const char *dstID; + const char *olsonID; +} OffsetZoneMapping; + +enum { U_DAYLIGHT_NONE=0,U_DAYLIGHT_JUNE=1,U_DAYLIGHT_DECEMBER=2 }; + +/* +This list tries to disambiguate a set of abbreviated timezone IDs and offsets +and maps it to an Olson ID. +Before adding anything to this list, take a look at +icu/source/tools/tzcode/tz.alias +Sometimes no daylight savings (0) is important to define due to aliases. +This list can be tested with icu/source/test/compat/tzone.pl +More values could be added to daylightType to increase precision. +*/ +static const struct OffsetZoneMapping OFFSET_ZONE_MAPPINGS[] = { + {-45900, 2, "CHAST", "CHADT", "Pacific/Chatham"}, + {-43200, 1, "PETT", "PETST", "Asia/Kamchatka"}, + {-43200, 2, "NZST", "NZDT", "Pacific/Auckland"}, + {-43200, 1, "ANAT", "ANAST", "Asia/Anadyr"}, + {-39600, 1, "MAGT", "MAGST", "Asia/Magadan"}, + {-37800, 2, "LHST", "LHST", "Australia/Lord_Howe"}, + {-36000, 2, "EST", "EST", "Australia/Sydney"}, + {-36000, 1, "SAKT", "SAKST", "Asia/Sakhalin"}, + {-36000, 1, "VLAT", "VLAST", "Asia/Vladivostok"}, + {-34200, 2, "CST", "CST", "Australia/South"}, + {-32400, 1, "YAKT", "YAKST", "Asia/Yakutsk"}, + {-32400, 1, "CHOT", "CHOST", "Asia/Choibalsan"}, + {-31500, 2, "CWST", "CWST", "Australia/Eucla"}, + {-28800, 1, "IRKT", "IRKST", "Asia/Irkutsk"}, + {-28800, 1, "ULAT", "ULAST", "Asia/Ulaanbaatar"}, + {-28800, 2, "WST", "WST", "Australia/West"}, + {-25200, 1, "HOVT", "HOVST", "Asia/Hovd"}, + {-25200, 1, "KRAT", "KRAST", "Asia/Krasnoyarsk"}, + {-21600, 1, "NOVT", "NOVST", "Asia/Novosibirsk"}, + {-21600, 1, "OMST", "OMSST", "Asia/Omsk"}, + {-18000, 1, "YEKT", "YEKST", "Asia/Yekaterinburg"}, + {-14400, 1, "SAMT", "SAMST", "Europe/Samara"}, + {-14400, 1, "AMT", "AMST", "Asia/Yerevan"}, + {-14400, 1, "AZT", "AZST", "Asia/Baku"}, + {-10800, 1, "AST", "ADT", "Asia/Baghdad"}, + {-10800, 1, "MSK", "MSD", "Europe/Moscow"}, + {-10800, 1, "VOLT", "VOLST", "Europe/Volgograd"}, + {-7200, 0, "EET", "CEST", "Africa/Tripoli"}, + {-7200, 1, "EET", "EEST", "Europe/Athens"}, /* Conflicts with Africa/Cairo */ + {-7200, 1, "IST", "IDT", "Asia/Jerusalem"}, + {-3600, 0, "CET", "WEST", "Africa/Algiers"}, + {-3600, 2, "WAT", "WAST", "Africa/Windhoek"}, + {0, 1, "GMT", "IST", "Europe/Dublin"}, + {0, 1, "GMT", "BST", "Europe/London"}, + {0, 0, "WET", "WEST", "Africa/Casablanca"}, + {0, 0, "WET", "WET", "Africa/El_Aaiun"}, + {3600, 1, "AZOT", "AZOST", "Atlantic/Azores"}, + {3600, 1, "EGT", "EGST", "America/Scoresbysund"}, + {10800, 1, "PMST", "PMDT", "America/Miquelon"}, + {10800, 2, "UYT", "UYST", "America/Montevideo"}, + {10800, 1, "WGT", "WGST", "America/Godthab"}, + {10800, 2, "BRT", "BRST", "Brazil/East"}, + {12600, 1, "NST", "NDT", "America/St_Johns"}, + {14400, 1, "AST", "ADT", "Canada/Atlantic"}, + {14400, 2, "AMT", "AMST", "America/Cuiaba"}, + {14400, 2, "CLT", "CLST", "Chile/Continental"}, + {14400, 2, "FKT", "FKST", "Atlantic/Stanley"}, + {14400, 2, "PYT", "PYST", "America/Asuncion"}, + {18000, 1, "CST", "CDT", "America/Havana"}, + {18000, 1, "EST", "EDT", "US/Eastern"}, /* Conflicts with America/Grand_Turk */ + {21600, 2, "EAST", "EASST", "Chile/EasterIsland"}, + {21600, 0, "CST", "MDT", "Canada/Saskatchewan"}, + {21600, 0, "CST", "CDT", "America/Guatemala"}, + {21600, 1, "CST", "CDT", "US/Central"}, /* Conflicts with Mexico/General */ + {25200, 1, "MST", "MDT", "US/Mountain"}, /* Conflicts with Mexico/BajaSur */ + {28800, 0, "PST", "PST", "Pacific/Pitcairn"}, + {28800, 1, "PST", "PDT", "US/Pacific"}, /* Conflicts with Mexico/BajaNorte */ + {32400, 1, "AKST", "AKDT", "US/Alaska"}, + {36000, 1, "HAST", "HADT", "US/Aleutian"} +}; + +/*#define DEBUG_TZNAME*/ + +static const char* remapShortTimeZone(const char *stdID, const char *dstID, int32_t daylightType, int32_t offset) +{ + int32_t idx; +#ifdef DEBUG_TZNAME + fprintf(stderr, "TZ=%s std=%s dst=%s daylight=%d offset=%d\n", getenv("TZ"), stdID, dstID, daylightType, offset); +#endif + for (idx = 0; idx < UPRV_LENGTHOF(OFFSET_ZONE_MAPPINGS); idx++) + { + if (offset == OFFSET_ZONE_MAPPINGS[idx].offsetSeconds + && daylightType == OFFSET_ZONE_MAPPINGS[idx].daylightType + && strcmp(OFFSET_ZONE_MAPPINGS[idx].stdID, stdID) == 0 + && strcmp(OFFSET_ZONE_MAPPINGS[idx].dstID, dstID) == 0) + { + return OFFSET_ZONE_MAPPINGS[idx].olsonID; + } + } + return NULL; +} +#endif + +#ifdef SEARCH_TZFILE +#define MAX_READ_SIZE 512 + +typedef struct DefaultTZInfo { + char* defaultTZBuffer; + int64_t defaultTZFileSize; + FILE* defaultTZFilePtr; + UBool defaultTZstatus; + int32_t defaultTZPosition; +} DefaultTZInfo; + +/* + * This method compares the two files given to see if they are a match. + * It is currently use to compare two TZ files. + */ +static UBool compareBinaryFiles(const char* defaultTZFileName, const char* TZFileName, DefaultTZInfo* tzInfo) { + FILE* file; + int64_t sizeFile; + int64_t sizeFileLeft; + int32_t sizeFileRead; + int32_t sizeFileToRead; + char bufferFile[MAX_READ_SIZE]; + UBool result = TRUE; + + if (tzInfo->defaultTZFilePtr == NULL) { + tzInfo->defaultTZFilePtr = fopen(defaultTZFileName, "r"); + } + file = fopen(TZFileName, "r"); + + tzInfo->defaultTZPosition = 0; /* reset position to begin search */ + + if (file != NULL && tzInfo->defaultTZFilePtr != NULL) { + /* First check that the file size are equal. */ + if (tzInfo->defaultTZFileSize == 0) { + fseek(tzInfo->defaultTZFilePtr, 0, SEEK_END); + tzInfo->defaultTZFileSize = ftell(tzInfo->defaultTZFilePtr); + } + fseek(file, 0, SEEK_END); + sizeFile = ftell(file); + sizeFileLeft = sizeFile; + + if (sizeFile != tzInfo->defaultTZFileSize) { + result = FALSE; + } else { + /* Store the data from the files in seperate buffers and + * compare each byte to determine equality. + */ + if (tzInfo->defaultTZBuffer == NULL) { + rewind(tzInfo->defaultTZFilePtr); + tzInfo->defaultTZBuffer = (char*)uprv_malloc(sizeof(char) * tzInfo->defaultTZFileSize); + sizeFileRead = fread(tzInfo->defaultTZBuffer, 1, tzInfo->defaultTZFileSize, tzInfo->defaultTZFilePtr); + } + rewind(file); + while(sizeFileLeft > 0) { + uprv_memset(bufferFile, 0, MAX_READ_SIZE); + sizeFileToRead = sizeFileLeft < MAX_READ_SIZE ? sizeFileLeft : MAX_READ_SIZE; + + sizeFileRead = fread(bufferFile, 1, sizeFileToRead, file); + if (memcmp(tzInfo->defaultTZBuffer + tzInfo->defaultTZPosition, bufferFile, sizeFileRead) != 0) { + result = FALSE; + break; + } + sizeFileLeft -= sizeFileRead; + tzInfo->defaultTZPosition += sizeFileRead; + } + } + } else { + result = FALSE; + } + + if (file != NULL) { + fclose(file); + } + + return result; +} + + +/* dirent also lists two entries: "." and ".." that we can safely ignore. */ +#define SKIP1 "." +#define SKIP2 ".." +static UBool U_CALLCONV putil_cleanup(void); +static CharString *gSearchTZFileResult = NULL; + +/* + * This method recursively traverses the directory given for a matching TZ file and returns the first match. + * This function is not thread safe - it uses a global, gSearchTZFileResult, to hold its results. + */ +static char* searchForTZFile(const char* path, DefaultTZInfo* tzInfo) { + DIR* dirp = NULL; + struct dirent* dirEntry = NULL; + char* result = NULL; + UErrorCode status = U_ZERO_ERROR; + + /* Save the current path */ + CharString curpath(path, -1, status); + if (U_FAILURE(status)) { + goto cleanupAndReturn; + } + + dirp = opendir(path); + if (dirp == NULL) { + goto cleanupAndReturn; + } + + if (gSearchTZFileResult == NULL) { + gSearchTZFileResult = new CharString; + if (gSearchTZFileResult == NULL) { + goto cleanupAndReturn; + } + ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup); + } + + /* Check each entry in the directory. */ + while((dirEntry = readdir(dirp)) != NULL) { + const char* dirName = dirEntry->d_name; + if (uprv_strcmp(dirName, SKIP1) != 0 && uprv_strcmp(dirName, SKIP2) != 0 + && uprv_strcmp(TZFILE_SKIP, dirName) != 0 && uprv_strcmp(TZFILE_SKIP2, dirName) != 0) { + /* Create a newpath with the new entry to test each entry in the directory. */ + CharString newpath(curpath, status); + newpath.append(dirName, -1, status); + if (U_FAILURE(status)) { + break; + } + + DIR* subDirp = NULL; + if ((subDirp = opendir(newpath.data())) != NULL) { + /* If this new path is a directory, make a recursive call with the newpath. */ + closedir(subDirp); + newpath.append('/', status); + if (U_FAILURE(status)) { + break; + } + result = searchForTZFile(newpath.data(), tzInfo); + /* + Have to get out here. Otherwise, we'd keep looking + and return the first match in the top-level directory + if there's a match in the top-level. If not, this function + would return NULL and set gTimeZoneBufferPtr to NULL in initDefault(). + It worked without this in most cases because we have a fallback of calling + localtime_r to figure out the default timezone. + */ + if (result != NULL) + break; + } else { + if(compareBinaryFiles(TZDEFAULT, newpath.data(), tzInfo)) { + int32_t amountToSkip = sizeof(TZZONEINFO) - 1; + if (amountToSkip > newpath.length()) { + amountToSkip = newpath.length(); + } + const char* zoneid = newpath.data() + amountToSkip; + skipZoneIDPrefix(&zoneid); + gSearchTZFileResult->clear(); + gSearchTZFileResult->append(zoneid, -1, status); + if (U_FAILURE(status)) { + break; + } + result = gSearchTZFileResult->data(); + /* Get out after the first one found. */ + break; + } + } + } + } + + cleanupAndReturn: + if (dirp) { + closedir(dirp); + } + return result; +} +#endif + +U_CAPI void U_EXPORT2 +uprv_tzname_clear_cache() +{ +#if defined(CHECK_LOCALTIME_LINK) && !defined(DEBUG_SKIP_LOCALTIME_LINK) + gTimeZoneBufferPtr = NULL; +#endif +} + +U_CAPI const char* U_EXPORT2 +uprv_tzname(int n) +{ + (void)n; // Avoid unreferenced parameter warning. + const char *tzid = NULL; +#if U_PLATFORM_USES_ONLY_WIN32_API + tzid = uprv_detectWindowsTimeZone(); + + if (tzid != NULL) { + return tzid; + } + +#ifndef U_TZNAME + // The return value is free'd in timezone.cpp on Windows because + // the other code path returns a pointer to a heap location. + // If we don't have a name already, then tzname wouldn't be any + // better, so just fall back. + return uprv_strdup(""); +#endif // !U_TZNAME + +#else + +/*#if U_PLATFORM_IS_DARWIN_BASED + int ret; + + tzid = getenv("TZFILE"); + if (tzid != NULL) { + return tzid; + } +#endif*/ + +/* This code can be temporarily disabled to test tzname resolution later on. */ +#ifndef DEBUG_TZNAME + tzid = getenv("TZ"); + if (tzid != NULL && isValidOlsonID(tzid) +#if U_PLATFORM == U_PF_SOLARIS + /* When TZ equals localtime on Solaris, check the /etc/localtime file. */ + && uprv_strcmp(tzid, TZ_ENV_CHECK) != 0 +#endif + ) { + /* The colon forces tzset() to treat the remainder as zoneinfo path */ + if (tzid[0] == ':') { + tzid++; + } + /* This might be a good Olson ID. */ + skipZoneIDPrefix(&tzid); + return tzid; + } + /* else U_TZNAME will give a better result. */ +#endif + +#if defined(CHECK_LOCALTIME_LINK) && !defined(DEBUG_SKIP_LOCALTIME_LINK) + /* Caller must handle threading issues */ + if (gTimeZoneBufferPtr == NULL) { + /* + This is a trick to look at the name of the link to get the Olson ID + because the tzfile contents is underspecified. + This isn't guaranteed to work because it may not be a symlink. + */ + int32_t ret = (int32_t)readlink(TZDEFAULT, gTimeZoneBuffer, sizeof(gTimeZoneBuffer)-1); + if (0 < ret) { + int32_t tzZoneInfoTailLen = uprv_strlen(TZZONEINFOTAIL); + gTimeZoneBuffer[ret] = 0; + char * tzZoneInfoTailPtr = uprv_strstr(gTimeZoneBuffer, TZZONEINFOTAIL); + + if (tzZoneInfoTailPtr != NULL + && isValidOlsonID(tzZoneInfoTailPtr + tzZoneInfoTailLen)) + { + return (gTimeZoneBufferPtr = tzZoneInfoTailPtr + tzZoneInfoTailLen); + } + } else { +#if defined(SEARCH_TZFILE) + DefaultTZInfo* tzInfo = (DefaultTZInfo*)uprv_malloc(sizeof(DefaultTZInfo)); + if (tzInfo != NULL) { + tzInfo->defaultTZBuffer = NULL; + tzInfo->defaultTZFileSize = 0; + tzInfo->defaultTZFilePtr = NULL; + tzInfo->defaultTZstatus = FALSE; + tzInfo->defaultTZPosition = 0; + + gTimeZoneBufferPtr = searchForTZFile(TZZONEINFO, tzInfo); + + /* Free previously allocated memory */ + if (tzInfo->defaultTZBuffer != NULL) { + uprv_free(tzInfo->defaultTZBuffer); + } + if (tzInfo->defaultTZFilePtr != NULL) { + fclose(tzInfo->defaultTZFilePtr); + } + uprv_free(tzInfo); + } + + if (gTimeZoneBufferPtr != NULL && isValidOlsonID(gTimeZoneBufferPtr)) { + return gTimeZoneBufferPtr; + } +#endif + } + } + else { + return gTimeZoneBufferPtr; + } +#endif +#endif + +#ifdef U_TZNAME +#if U_PLATFORM_USES_ONLY_WIN32_API + /* The return value is free'd in timezone.cpp on Windows because + * the other code path returns a pointer to a heap location. */ + return uprv_strdup(U_TZNAME[n]); +#else + /* + U_TZNAME is usually a non-unique abbreviation, which isn't normally usable. + So we remap the abbreviation to an olson ID. + + Since Windows exposes a little more timezone information, + we normally don't use this code on Windows because + uprv_detectWindowsTimeZone should have already given the correct answer. + */ + { + struct tm juneSol, decemberSol; + int daylightType; + static const time_t juneSolstice=1182478260; /*2007-06-21 18:11 UT*/ + static const time_t decemberSolstice=1198332540; /*2007-12-22 06:09 UT*/ + + /* This probing will tell us when daylight savings occurs. */ + localtime_r(&juneSolstice, &juneSol); + localtime_r(&decemberSolstice, &decemberSol); + if(decemberSol.tm_isdst > 0) { + daylightType = U_DAYLIGHT_DECEMBER; + } else if(juneSol.tm_isdst > 0) { + daylightType = U_DAYLIGHT_JUNE; + } else { + daylightType = U_DAYLIGHT_NONE; + } + tzid = remapShortTimeZone(U_TZNAME[0], U_TZNAME[1], daylightType, uprv_timezone()); + if (tzid != NULL) { + return tzid; + } + } + return U_TZNAME[n]; +#endif +#else + return ""; +#endif +} + +/* Get and set the ICU data directory --------------------------------------- */ + +static icu::UInitOnce gDataDirInitOnce = U_INITONCE_INITIALIZER; +static char *gDataDirectory = NULL; + +UInitOnce gTimeZoneFilesInitOnce = U_INITONCE_INITIALIZER; +static CharString *gTimeZoneFilesDirectory = NULL; + +#if U_POSIX_LOCALE || U_PLATFORM_USES_ONLY_WIN32_API + static const char *gCorrectedPOSIXLocale = NULL; /* Sometimes heap allocated */ + static bool gCorrectedPOSIXLocaleHeapAllocated = false; +#endif + +static UBool U_CALLCONV putil_cleanup(void) +{ + if (gDataDirectory && *gDataDirectory) { + uprv_free(gDataDirectory); + } + gDataDirectory = NULL; + gDataDirInitOnce.reset(); + + delete gTimeZoneFilesDirectory; + gTimeZoneFilesDirectory = NULL; + gTimeZoneFilesInitOnce.reset(); + +#ifdef SEARCH_TZFILE + delete gSearchTZFileResult; + gSearchTZFileResult = NULL; +#endif + +#if U_POSIX_LOCALE || U_PLATFORM_USES_ONLY_WIN32_API + if (gCorrectedPOSIXLocale && gCorrectedPOSIXLocaleHeapAllocated) { + uprv_free(const_cast<char *>(gCorrectedPOSIXLocale)); + gCorrectedPOSIXLocale = NULL; + gCorrectedPOSIXLocaleHeapAllocated = false; + } +#endif + return TRUE; +} + +/* + * Set the data directory. + * Make a copy of the passed string, and set the global data dir to point to it. + */ +U_CAPI void U_EXPORT2 +u_setDataDirectory(const char *directory) { + char *newDataDir; + int32_t length; + + if(directory==NULL || *directory==0) { + /* A small optimization to prevent the malloc and copy when the + shared library is used, and this is a way to make sure that NULL + is never returned. + */ + newDataDir = (char *)""; + } + else { + length=(int32_t)uprv_strlen(directory); + newDataDir = (char *)uprv_malloc(length + 2); + /* Exit out if newDataDir could not be created. */ + if (newDataDir == NULL) { + return; + } + uprv_strcpy(newDataDir, directory); + +#if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR) + { + char *p; + while((p = uprv_strchr(newDataDir, U_FILE_ALT_SEP_CHAR)) != NULL) { + *p = U_FILE_SEP_CHAR; + } + } +#endif + } + + if (gDataDirectory && *gDataDirectory) { + uprv_free(gDataDirectory); + } + gDataDirectory = newDataDir; + ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup); +} + +U_CAPI UBool U_EXPORT2 +uprv_pathIsAbsolute(const char *path) +{ + if(!path || !*path) { + return FALSE; + } + + if(*path == U_FILE_SEP_CHAR) { + return TRUE; + } + +#if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR) + if(*path == U_FILE_ALT_SEP_CHAR) { + return TRUE; + } +#endif + +#if U_PLATFORM_USES_ONLY_WIN32_API + if( (((path[0] >= 'A') && (path[0] <= 'Z')) || + ((path[0] >= 'a') && (path[0] <= 'z'))) && + path[1] == ':' ) { + return TRUE; + } +#endif + + return FALSE; +} + +/* Backup setting of ICU_DATA_DIR_PREFIX_ENV_VAR + (needed for some Darwin ICU build environments) */ +#if U_PLATFORM_IS_DARWIN_BASED && TARGET_OS_SIMULATOR +# if !defined(ICU_DATA_DIR_PREFIX_ENV_VAR) +# define ICU_DATA_DIR_PREFIX_ENV_VAR "IPHONE_SIMULATOR_ROOT" +# endif +#endif + +#if defined(ICU_DATA_DIR_WINDOWS) +// Helper function to get the ICU Data Directory under the Windows directory location. +static BOOL U_CALLCONV getIcuDataDirectoryUnderWindowsDirectory(char* directoryBuffer, UINT bufferLength) +{ + wchar_t windowsPath[MAX_PATH]; + char windowsPathUtf8[MAX_PATH]; + + UINT length = GetSystemWindowsDirectoryW(windowsPath, UPRV_LENGTHOF(windowsPath)); + if ((length > 0) && (length < (UPRV_LENGTHOF(windowsPath) - 1))) { + // Convert UTF-16 to a UTF-8 string. + UErrorCode status = U_ZERO_ERROR; + int32_t windowsPathUtf8Len = 0; + u_strToUTF8(windowsPathUtf8, static_cast<int32_t>(UPRV_LENGTHOF(windowsPathUtf8)), + &windowsPathUtf8Len, reinterpret_cast<const UChar*>(windowsPath), -1, &status); + + if (U_SUCCESS(status) && (status != U_STRING_NOT_TERMINATED_WARNING) && + (windowsPathUtf8Len < (UPRV_LENGTHOF(windowsPathUtf8) - 1))) { + // Ensure it always has a separator, so we can append the ICU data path. + if (windowsPathUtf8[windowsPathUtf8Len - 1] != U_FILE_SEP_CHAR) { + windowsPathUtf8[windowsPathUtf8Len++] = U_FILE_SEP_CHAR; + windowsPathUtf8[windowsPathUtf8Len] = '\0'; + } + // Check if the concatenated string will fit. + if ((windowsPathUtf8Len + UPRV_LENGTHOF(ICU_DATA_DIR_WINDOWS)) < bufferLength) { + uprv_strcpy(directoryBuffer, windowsPathUtf8); + uprv_strcat(directoryBuffer, ICU_DATA_DIR_WINDOWS); + return TRUE; + } + } + } + + return FALSE; +} +#endif + +static void U_CALLCONV dataDirectoryInitFn() { + /* If we already have the directory, then return immediately. Will happen if user called + * u_setDataDirectory(). + */ + if (gDataDirectory) { + return; + } + + const char *path = NULL; +#if defined(ICU_DATA_DIR_PREFIX_ENV_VAR) + char datadir_path_buffer[PATH_MAX]; +#endif + + /* + When ICU_NO_USER_DATA_OVERRIDE is defined, users aren't allowed to + override ICU's data with the ICU_DATA environment variable. This prevents + problems where multiple custom copies of ICU's specific version of data + are installed on a system. Either the application must define the data + directory with u_setDataDirectory, define ICU_DATA_DIR when compiling + ICU, set the data with udata_setCommonData or trust that all of the + required data is contained in ICU's data library that contains + the entry point defined by U_ICUDATA_ENTRY_POINT. + + There may also be some platforms where environment variables + are not allowed. + */ +# if !defined(ICU_NO_USER_DATA_OVERRIDE) && !UCONFIG_NO_FILE_IO + /* First try to get the environment variable */ +# if U_PLATFORM_HAS_WINUWP_API == 0 // Windows UWP does not support getenv + path=getenv("ICU_DATA"); +# endif +# endif + + /* ICU_DATA_DIR may be set as a compile option. + * U_ICU_DATA_DEFAULT_DIR is provided and is set by ICU at compile time + * and is used only when data is built in archive mode eliminating the need + * for ICU_DATA_DIR to be set. U_ICU_DATA_DEFAULT_DIR is set to the installation + * directory of the data dat file. Users should use ICU_DATA_DIR if they want to + * set their own path. + */ +#if defined(ICU_DATA_DIR) || defined(U_ICU_DATA_DEFAULT_DIR) + if(path==NULL || *path==0) { +# if defined(ICU_DATA_DIR_PREFIX_ENV_VAR) + const char *prefix = getenv(ICU_DATA_DIR_PREFIX_ENV_VAR); +# endif +# ifdef ICU_DATA_DIR + path=ICU_DATA_DIR; +# else + path=U_ICU_DATA_DEFAULT_DIR; +# endif +# if defined(ICU_DATA_DIR_PREFIX_ENV_VAR) + if (prefix != NULL) { + snprintf(datadir_path_buffer, PATH_MAX, "%s%s", prefix, path); + path=datadir_path_buffer; + } +# endif + } +#endif + +#if defined(ICU_DATA_DIR_WINDOWS) + char datadir_path_buffer[MAX_PATH]; + if (getIcuDataDirectoryUnderWindowsDirectory(datadir_path_buffer, UPRV_LENGTHOF(datadir_path_buffer))) { + path = datadir_path_buffer; + } +#endif + + if(path==NULL) { + /* It looks really bad, set it to something. */ + path = ""; + } + + u_setDataDirectory(path); + return; +} + +U_CAPI const char * U_EXPORT2 +u_getDataDirectory(void) { + umtx_initOnce(gDataDirInitOnce, &dataDirectoryInitFn); + return gDataDirectory; +} + +static void setTimeZoneFilesDir(const char *path, UErrorCode &status) { + if (U_FAILURE(status)) { + return; + } + gTimeZoneFilesDirectory->clear(); + gTimeZoneFilesDirectory->append(path, status); +#if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR) + char *p = gTimeZoneFilesDirectory->data(); + while ((p = uprv_strchr(p, U_FILE_ALT_SEP_CHAR)) != NULL) { + *p = U_FILE_SEP_CHAR; + } +#endif +} + +#define TO_STRING(x) TO_STRING_2(x) +#define TO_STRING_2(x) #x + +static void U_CALLCONV TimeZoneDataDirInitFn(UErrorCode &status) { + U_ASSERT(gTimeZoneFilesDirectory == NULL); + ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup); + gTimeZoneFilesDirectory = new CharString(); + if (gTimeZoneFilesDirectory == NULL) { + status = U_MEMORY_ALLOCATION_ERROR; + return; + } + + const char *dir = ""; + +#if U_PLATFORM_HAS_WINUWP_API == 1 +// The UWP version does not support the environment variable setting. + +# if defined(ICU_DATA_DIR_WINDOWS) + // When using the Windows system data, we can possibly pick up time zone data from the Windows directory. + char datadir_path_buffer[MAX_PATH]; + if (getIcuDataDirectoryUnderWindowsDirectory(datadir_path_buffer, UPRV_LENGTHOF(datadir_path_buffer))) { + dir = datadir_path_buffer; + } +# endif + +#else + dir = getenv("ICU_TIMEZONE_FILES_DIR"); +#endif // U_PLATFORM_HAS_WINUWP_API + +#if defined(U_TIMEZONE_FILES_DIR) + if (dir == NULL) { + // Build time configuration setting. + dir = TO_STRING(U_TIMEZONE_FILES_DIR); + } +#endif + + if (dir == NULL) { + dir = ""; + } + + setTimeZoneFilesDir(dir, status); +} + + +U_CAPI const char * U_EXPORT2 +u_getTimeZoneFilesDirectory(UErrorCode *status) { + umtx_initOnce(gTimeZoneFilesInitOnce, &TimeZoneDataDirInitFn, *status); + return U_SUCCESS(*status) ? gTimeZoneFilesDirectory->data() : ""; +} + +U_CAPI void U_EXPORT2 +u_setTimeZoneFilesDirectory(const char *path, UErrorCode *status) { + umtx_initOnce(gTimeZoneFilesInitOnce, &TimeZoneDataDirInitFn, *status); + setTimeZoneFilesDir(path, *status); + + // Note: this function does some extra churn, first setting based on the + // environment, then immediately replacing with the value passed in. + // The logic is simpler that way, and performance shouldn't be an issue. +} + + +#if U_POSIX_LOCALE +/* A helper function used by uprv_getPOSIXIDForDefaultLocale and + * uprv_getPOSIXIDForDefaultCodepage. Returns the posix locale id for + * LC_CTYPE and LC_MESSAGES. It doesn't support other locale categories. + */ +static const char *uprv_getPOSIXIDForCategory(int category) +{ + const char* posixID = NULL; + if (category == LC_MESSAGES || category == LC_CTYPE) { + /* + * On Solaris two different calls to setlocale can result in + * different values. Only get this value once. + * + * We must check this first because an application can set this. + * + * LC_ALL can't be used because it's platform dependent. The LANG + * environment variable seems to affect LC_CTYPE variable by default. + * Here is what setlocale(LC_ALL, NULL) can return. + * HPUX can return 'C C C C C C C' + * Solaris can return /en_US/C/C/C/C/C on the second try. + * Linux can return LC_CTYPE=C;LC_NUMERIC=C;... + * + * The default codepage detection also needs to use LC_CTYPE. + * + * Do not call setlocale(LC_*, "")! Using an empty string instead + * of NULL, will modify the libc behavior. + */ + posixID = setlocale(category, NULL); + if ((posixID == 0) + || (uprv_strcmp("C", posixID) == 0) + || (uprv_strcmp("POSIX", posixID) == 0)) + { + /* Maybe we got some garbage. Try something more reasonable */ + posixID = getenv("LC_ALL"); + /* Solaris speaks POSIX - See IEEE Std 1003.1-2008 + * This is needed to properly handle empty env. variables + */ +#if U_PLATFORM == U_PF_SOLARIS + if ((posixID == 0) || (posixID[0] == '\0')) { + posixID = getenv(category == LC_MESSAGES ? "LC_MESSAGES" : "LC_CTYPE"); + if ((posixID == 0) || (posixID[0] == '\0')) { +#else + if (posixID == 0) { + posixID = getenv(category == LC_MESSAGES ? "LC_MESSAGES" : "LC_CTYPE"); + if (posixID == 0) { +#endif + posixID = getenv("LANG"); + } + } + } + } + if ((posixID==0) + || (uprv_strcmp("C", posixID) == 0) + || (uprv_strcmp("POSIX", posixID) == 0)) + { + /* Nothing worked. Give it a nice POSIX default value. */ + posixID = "en_US_POSIX"; + // Note: this test will not catch 'C.UTF-8', + // that will be handled in uprv_getDefaultLocaleID(). + // Leave this mapping here for the uprv_getPOSIXIDForDefaultCodepage() + // caller which expects to see "en_US_POSIX" in many branches. + } + return posixID; +} + +/* Return just the POSIX id for the default locale, whatever happens to be in + * it. It gets the value from LC_MESSAGES and indirectly from LC_ALL and LANG. + */ +static const char *uprv_getPOSIXIDForDefaultLocale(void) +{ + static const char* posixID = NULL; + if (posixID == 0) { + posixID = uprv_getPOSIXIDForCategory(LC_MESSAGES); + } + return posixID; +} + +#if !U_CHARSET_IS_UTF8 +/* Return just the POSIX id for the default codepage, whatever happens to be in + * it. It gets the value from LC_CTYPE and indirectly from LC_ALL and LANG. + */ +static const char *uprv_getPOSIXIDForDefaultCodepage(void) +{ + static const char* posixID = NULL; + if (posixID == 0) { + posixID = uprv_getPOSIXIDForCategory(LC_CTYPE); + } + return posixID; +} +#endif +#endif + +/* NOTE: The caller should handle thread safety */ +U_CAPI const char* U_EXPORT2 +uprv_getDefaultLocaleID() +{ +#if U_POSIX_LOCALE +/* + Note that: (a '!' means the ID is improper somehow) + LC_ALL ----> default_loc codepage +-------------------------------------------------------- + ab.CD ab CD + ab@CD ab__CD - + ab@CD.EF ab__CD EF + + ab_CD.EF@GH ab_CD_GH EF + +Some 'improper' ways to do the same as above: + ! ab_CD@GH.EF ab_CD_GH EF + ! ab_CD.EF@GH.IJ ab_CD_GH EF + ! ab_CD@ZZ.EF@GH.IJ ab_CD_GH EF + + _CD@GH _CD_GH - + _CD.EF@GH _CD_GH EF + +The variant cannot have dots in it. +The 'rightmost' variant (@xxx) wins. +The leftmost codepage (.xxx) wins. +*/ + const char* posixID = uprv_getPOSIXIDForDefaultLocale(); + + /* Format: (no spaces) + ll [ _CC ] [ . MM ] [ @ VV] + + l = lang, C = ctry, M = charmap, V = variant + */ + + if (gCorrectedPOSIXLocale != nullptr) { + return gCorrectedPOSIXLocale; + } + + // Copy the ID into owned memory. + // Over-allocate in case we replace "C" with "en_US_POSIX" (+10), + null termination + char *correctedPOSIXLocale = static_cast<char *>(uprv_malloc(uprv_strlen(posixID) + 10 + 1)); + if (correctedPOSIXLocale == nullptr) { + return nullptr; + } + uprv_strcpy(correctedPOSIXLocale, posixID); + + char *limit; + if ((limit = uprv_strchr(correctedPOSIXLocale, '.')) != nullptr) { + *limit = 0; + } + if ((limit = uprv_strchr(correctedPOSIXLocale, '@')) != nullptr) { + *limit = 0; + } + + if ((uprv_strcmp("C", correctedPOSIXLocale) == 0) // no @ variant + || (uprv_strcmp("POSIX", correctedPOSIXLocale) == 0)) { + // Raw input was C.* or POSIX.*, Give it a nice POSIX default value. + // (The "C"/"POSIX" case is handled in uprv_getPOSIXIDForCategory()) + uprv_strcpy(correctedPOSIXLocale, "en_US_POSIX"); + } + + /* Note that we scan the *uncorrected* ID. */ + const char *p; + if ((p = uprv_strrchr(posixID, '@')) != nullptr) { + p++; + + /* Take care of any special cases here.. */ + if (!uprv_strcmp(p, "nynorsk")) { + p = "NY"; + /* Don't worry about no__NY. In practice, it won't appear. */ + } + + if (uprv_strchr(correctedPOSIXLocale,'_') == nullptr) { + uprv_strcat(correctedPOSIXLocale, "__"); /* aa@b -> aa__b (note this can make the new locale 1 char longer) */ + } + else { + uprv_strcat(correctedPOSIXLocale, "_"); /* aa_CC@b -> aa_CC_b */ + } + + const char *q; + if ((q = uprv_strchr(p, '.')) != nullptr) { + /* How big will the resulting string be? */ + int32_t len = (int32_t)(uprv_strlen(correctedPOSIXLocale) + (q-p)); + uprv_strncat(correctedPOSIXLocale, p, q-p); // do not include charset + correctedPOSIXLocale[len] = 0; + } + else { + /* Anything following the @ sign */ + uprv_strcat(correctedPOSIXLocale, p); + } + + /* Should there be a map from 'no@nynorsk' -> no_NO_NY here? + * How about 'russian' -> 'ru'? + * Many of the other locales using ISO codes will be handled by the + * canonicalization functions in uloc_getDefault. + */ + } + + if (gCorrectedPOSIXLocale == nullptr) { + gCorrectedPOSIXLocale = correctedPOSIXLocale; + gCorrectedPOSIXLocaleHeapAllocated = true; + ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup); + correctedPOSIXLocale = nullptr; + } + posixID = gCorrectedPOSIXLocale; + + if (correctedPOSIXLocale != nullptr) { /* Was already set - clean up. */ + uprv_free(correctedPOSIXLocale); + } + + return posixID; + +#elif U_PLATFORM_USES_ONLY_WIN32_API +#define POSIX_LOCALE_CAPACITY 64 + UErrorCode status = U_ZERO_ERROR; + char *correctedPOSIXLocale = nullptr; + + // If we have already figured this out just use the cached value + if (gCorrectedPOSIXLocale != nullptr) { + return gCorrectedPOSIXLocale; + } + + // No cached value, need to determine the current value + static WCHAR windowsLocale[LOCALE_NAME_MAX_LENGTH] = {}; + int length = GetLocaleInfoEx(LOCALE_NAME_USER_DEFAULT, LOCALE_SNAME, windowsLocale, LOCALE_NAME_MAX_LENGTH); + + // Now we should have a Windows locale name that needs converted to the POSIX style. + if (length > 0) // If length is 0, then the GetLocaleInfoEx failed. + { + // First we need to go from UTF-16 to char (and also convert from _ to - while we're at it.) + char modifiedWindowsLocale[LOCALE_NAME_MAX_LENGTH] = {}; + + int32_t i; + for (i = 0; i < UPRV_LENGTHOF(modifiedWindowsLocale); i++) + { + if (windowsLocale[i] == '_') + { + modifiedWindowsLocale[i] = '-'; + } + else + { + modifiedWindowsLocale[i] = static_cast<char>(windowsLocale[i]); + } + + if (modifiedWindowsLocale[i] == '\0') + { + break; + } + } + + if (i >= UPRV_LENGTHOF(modifiedWindowsLocale)) + { + // Ran out of room, can't really happen, maybe we'll be lucky about a matching + // locale when tags are dropped + modifiedWindowsLocale[UPRV_LENGTHOF(modifiedWindowsLocale) - 1] = '\0'; + } + + // Now normalize the resulting name + correctedPOSIXLocale = static_cast<char *>(uprv_malloc(POSIX_LOCALE_CAPACITY + 1)); + /* TODO: Should we just exit on memory allocation failure? */ + if (correctedPOSIXLocale) + { + int32_t posixLen = uloc_canonicalize(modifiedWindowsLocale, correctedPOSIXLocale, POSIX_LOCALE_CAPACITY, &status); + if (U_SUCCESS(status)) + { + *(correctedPOSIXLocale + posixLen) = 0; + gCorrectedPOSIXLocale = correctedPOSIXLocale; + gCorrectedPOSIXLocaleHeapAllocated = true; + ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup); + } + else + { + uprv_free(correctedPOSIXLocale); + } + } + } + + // If unable to find a locale we can agree upon, use en-US by default + if (gCorrectedPOSIXLocale == nullptr) { + gCorrectedPOSIXLocale = "en_US"; + } + return gCorrectedPOSIXLocale; + +#elif U_PLATFORM == U_PF_OS400 + /* locales are process scoped and are by definition thread safe */ + static char correctedLocale[64]; + const char *localeID = getenv("LC_ALL"); + char *p; + + if (localeID == NULL) + localeID = getenv("LANG"); + if (localeID == NULL) + localeID = setlocale(LC_ALL, NULL); + /* Make sure we have something... */ + if (localeID == NULL) + return "en_US_POSIX"; + + /* Extract the locale name from the path. */ + if((p = uprv_strrchr(localeID, '/')) != NULL) + { + /* Increment p to start of locale name. */ + p++; + localeID = p; + } + + /* Copy to work location. */ + uprv_strcpy(correctedLocale, localeID); + + /* Strip off the '.locale' extension. */ + if((p = uprv_strchr(correctedLocale, '.')) != NULL) { + *p = 0; + } + + /* Upper case the locale name. */ + T_CString_toUpperCase(correctedLocale); + + /* See if we are using the POSIX locale. Any of the + * following are equivalent and use the same QLGPGCMA + * (POSIX) locale. + * QLGPGCMA2 means UCS2 + * QLGPGCMA_4 means UTF-32 + * QLGPGCMA_8 means UTF-8 + */ + if ((uprv_strcmp("C", correctedLocale) == 0) || + (uprv_strcmp("POSIX", correctedLocale) == 0) || + (uprv_strncmp("QLGPGCMA", correctedLocale, 8) == 0)) + { + uprv_strcpy(correctedLocale, "en_US_POSIX"); + } + else + { + int16_t LocaleLen; + + /* Lower case the lang portion. */ + for(p = correctedLocale; *p != 0 && *p != '_'; p++) + { + *p = uprv_tolower(*p); + } + + /* Adjust for Euro. After '_E' add 'URO'. */ + LocaleLen = uprv_strlen(correctedLocale); + if (correctedLocale[LocaleLen - 2] == '_' && + correctedLocale[LocaleLen - 1] == 'E') + { + uprv_strcat(correctedLocale, "URO"); + } + + /* If using Lotus-based locale then convert to + * equivalent non Lotus. + */ + else if (correctedLocale[LocaleLen - 2] == '_' && + correctedLocale[LocaleLen - 1] == 'L') + { + correctedLocale[LocaleLen - 2] = 0; + } + + /* There are separate simplified and traditional + * locales called zh_HK_S and zh_HK_T. + */ + else if (uprv_strncmp(correctedLocale, "zh_HK", 5) == 0) + { + uprv_strcpy(correctedLocale, "zh_HK"); + } + + /* A special zh_CN_GBK locale... + */ + else if (uprv_strcmp(correctedLocale, "zh_CN_GBK") == 0) + { + uprv_strcpy(correctedLocale, "zh_CN"); + } + + } + + return correctedLocale; +#endif + +} + +#if !U_CHARSET_IS_UTF8 +#if U_POSIX_LOCALE +/* +Due to various platform differences, one platform may specify a charset, +when they really mean a different charset. Remap the names so that they are +compatible with ICU. Only conflicting/ambiguous aliases should be resolved +here. Before adding anything to this function, please consider adding unique +names to the ICU alias table in the data directory. +*/ +static const char* +remapPlatformDependentCodepage(const char *locale, const char *name) { + if (locale != NULL && *locale == 0) { + /* Make sure that an empty locale is handled the same way. */ + locale = NULL; + } + if (name == NULL) { + return NULL; + } +#if U_PLATFORM == U_PF_AIX + if (uprv_strcmp(name, "IBM-943") == 0) { + /* Use the ASCII compatible ibm-943 */ + name = "Shift-JIS"; + } + else if (uprv_strcmp(name, "IBM-1252") == 0) { + /* Use the windows-1252 that contains the Euro */ + name = "IBM-5348"; + } +#elif U_PLATFORM == U_PF_SOLARIS + if (locale != NULL && uprv_strcmp(name, "EUC") == 0) { + /* Solaris underspecifies the "EUC" name. */ + if (uprv_strcmp(locale, "zh_CN") == 0) { + name = "EUC-CN"; + } + else if (uprv_strcmp(locale, "zh_TW") == 0) { + name = "EUC-TW"; + } + else if (uprv_strcmp(locale, "ko_KR") == 0) { + name = "EUC-KR"; + } + } + else if (uprv_strcmp(name, "eucJP") == 0) { + /* + ibm-954 is the best match. + ibm-33722 is the default for eucJP (similar to Windows). + */ + name = "eucjis"; + } + else if (uprv_strcmp(name, "646") == 0) { + /* + * The default codepage given by Solaris is 646 but the C library routines treat it as if it was + * ISO-8859-1 instead of US-ASCII(646). + */ + name = "ISO-8859-1"; + } +#elif U_PLATFORM_IS_DARWIN_BASED + if (locale == NULL && *name == 0) { + /* + No locale was specified, and an empty name was passed in. + This usually indicates that nl_langinfo didn't return valid information. + Mac OS X uses UTF-8 by default (especially the locale data and console). + */ + name = "UTF-8"; + } + else if (uprv_strcmp(name, "CP949") == 0) { + /* Remap CP949 to a similar codepage to avoid issues with backslash and won symbol. */ + name = "EUC-KR"; + } + else if (locale != NULL && uprv_strcmp(locale, "en_US_POSIX") != 0 && uprv_strcmp(name, "US-ASCII") == 0) { + /* + * For non C/POSIX locale, default the code page to UTF-8 instead of US-ASCII. + */ + name = "UTF-8"; + } +#elif U_PLATFORM == U_PF_BSD + if (uprv_strcmp(name, "CP949") == 0) { + /* Remap CP949 to a similar codepage to avoid issues with backslash and won symbol. */ + name = "EUC-KR"; + } +#elif U_PLATFORM == U_PF_HPUX + if (locale != NULL && uprv_strcmp(locale, "zh_HK") == 0 && uprv_strcmp(name, "big5") == 0) { + /* HP decided to extend big5 as hkbig5 even though it's not compatible :-( */ + /* zh_TW.big5 is not the same charset as zh_HK.big5! */ + name = "hkbig5"; + } + else if (uprv_strcmp(name, "eucJP") == 0) { + /* + ibm-1350 is the best match, but unavailable. + ibm-954 is mostly a superset of ibm-1350. + ibm-33722 is the default for eucJP (similar to Windows). + */ + name = "eucjis"; + } +#elif U_PLATFORM == U_PF_LINUX + if (locale != NULL && uprv_strcmp(name, "euc") == 0) { + /* Linux underspecifies the "EUC" name. */ + if (uprv_strcmp(locale, "korean") == 0) { + name = "EUC-KR"; + } + else if (uprv_strcmp(locale, "japanese") == 0) { + /* See comment below about eucJP */ + name = "eucjis"; + } + } + else if (uprv_strcmp(name, "eucjp") == 0) { + /* + ibm-1350 is the best match, but unavailable. + ibm-954 is mostly a superset of ibm-1350. + ibm-33722 is the default for eucJP (similar to Windows). + */ + name = "eucjis"; + } + else if (locale != NULL && uprv_strcmp(locale, "en_US_POSIX") != 0 && + (uprv_strcmp(name, "ANSI_X3.4-1968") == 0 || uprv_strcmp(name, "US-ASCII") == 0)) { + /* + * For non C/POSIX locale, default the code page to UTF-8 instead of US-ASCII. + */ + name = "UTF-8"; + } + /* + * Linux returns ANSI_X3.4-1968 for C/POSIX, but the call site takes care of + * it by falling back to 'US-ASCII' when NULL is returned from this + * function. So, we don't have to worry about it here. + */ +#endif + /* return NULL when "" is passed in */ + if (*name == 0) { + name = NULL; + } + return name; +} + +static const char* +getCodepageFromPOSIXID(const char *localeName, char * buffer, int32_t buffCapacity) +{ + char localeBuf[100]; + const char *name = NULL; + char *variant = NULL; + + if (localeName != NULL && (name = (uprv_strchr(localeName, '.'))) != NULL) { + size_t localeCapacity = uprv_min(sizeof(localeBuf), (name-localeName)+1); + uprv_strncpy(localeBuf, localeName, localeCapacity); + localeBuf[localeCapacity-1] = 0; /* ensure NULL termination */ + name = uprv_strncpy(buffer, name+1, buffCapacity); + buffer[buffCapacity-1] = 0; /* ensure NULL termination */ + if ((variant = const_cast<char *>(uprv_strchr(name, '@'))) != NULL) { + *variant = 0; + } + name = remapPlatformDependentCodepage(localeBuf, name); + } + return name; +} +#endif + +static const char* +int_getDefaultCodepage() +{ +#if U_PLATFORM == U_PF_OS400 + uint32_t ccsid = 37; /* Default to ibm-37 */ + static char codepage[64]; + Qwc_JOBI0400_t jobinfo; + Qus_EC_t error = { sizeof(Qus_EC_t) }; /* SPI error code */ + + EPT_CALL(QUSRJOBI)(&jobinfo, sizeof(jobinfo), "JOBI0400", + "* ", " ", &error); + + if (error.Bytes_Available == 0) { + if (jobinfo.Coded_Char_Set_ID != 0xFFFF) { + ccsid = (uint32_t)jobinfo.Coded_Char_Set_ID; + } + else if (jobinfo.Default_Coded_Char_Set_Id != 0xFFFF) { + ccsid = (uint32_t)jobinfo.Default_Coded_Char_Set_Id; + } + /* else use the default */ + } + sprintf(codepage,"ibm-%d", ccsid); + return codepage; + +#elif U_PLATFORM == U_PF_OS390 + static char codepage[64]; + + strncpy(codepage, nl_langinfo(CODESET),63-strlen(UCNV_SWAP_LFNL_OPTION_STRING)); + strcat(codepage,UCNV_SWAP_LFNL_OPTION_STRING); + codepage[63] = 0; /* NULL terminate */ + + return codepage; + +#elif U_PLATFORM_USES_ONLY_WIN32_API + static char codepage[64]; + DWORD codepageNumber = 0; + +#if U_PLATFORM_HAS_WINUWP_API == 1 + // UWP doesn't have a direct API to get the default ACP as Microsoft would rather + // have folks use Unicode than a "system" code page, however this is the same + // codepage as the system default locale codepage. (FWIW, the system locale is + // ONLY used for codepage, it should never be used for anything else) + GetLocaleInfoEx(LOCALE_NAME_SYSTEM_DEFAULT, LOCALE_IDEFAULTANSICODEPAGE | LOCALE_RETURN_NUMBER, + (LPWSTR)&codepageNumber, sizeof(codepageNumber) / sizeof(WCHAR)); +#else + // Win32 apps can call GetACP + codepageNumber = GetACP(); +#endif + // Special case for UTF-8 + if (codepageNumber == 65001) + { + return "UTF-8"; + } + // Windows codepages can look like windows-1252, so format the found number + // the numbers are eclectic, however all valid system code pages, besides UTF-8 + // are between 3 and 19999 + if (codepageNumber > 0 && codepageNumber < 20000) + { + sprintf(codepage, "windows-%ld", codepageNumber); + return codepage; + } + // If the codepage number call failed then return UTF-8 + return "UTF-8"; + +#elif U_POSIX_LOCALE + static char codesetName[100]; + const char *localeName = NULL; + const char *name = NULL; + + localeName = uprv_getPOSIXIDForDefaultCodepage(); + uprv_memset(codesetName, 0, sizeof(codesetName)); + /* On Solaris nl_langinfo returns C locale values unless setlocale + * was called earlier. + */ +#if (U_HAVE_NL_LANGINFO_CODESET && U_PLATFORM != U_PF_SOLARIS) + /* When available, check nl_langinfo first because it usually gives more + useful names. It depends on LC_CTYPE. + nl_langinfo may use the same buffer as setlocale. */ + { + const char *codeset = nl_langinfo(U_NL_LANGINFO_CODESET); +#if U_PLATFORM_IS_DARWIN_BASED || U_PLATFORM_IS_LINUX_BASED + /* + * On Linux and MacOSX, ensure that default codepage for non C/POSIX locale is UTF-8 + * instead of ASCII. + */ + if (uprv_strcmp(localeName, "en_US_POSIX") != 0) { + codeset = remapPlatformDependentCodepage(localeName, codeset); + } else +#endif + { + codeset = remapPlatformDependentCodepage(NULL, codeset); + } + + if (codeset != NULL) { + uprv_strncpy(codesetName, codeset, sizeof(codesetName)); + codesetName[sizeof(codesetName)-1] = 0; + return codesetName; + } + } +#endif + + /* Use setlocale in a nice way, and then check some environment variables. + Maybe the application used setlocale already. + */ + uprv_memset(codesetName, 0, sizeof(codesetName)); + name = getCodepageFromPOSIXID(localeName, codesetName, sizeof(codesetName)); + if (name) { + /* if we can find the codeset name from setlocale, return that. */ + return name; + } + + if (*codesetName == 0) + { + /* Everything failed. Return US ASCII (ISO 646). */ + (void)uprv_strcpy(codesetName, "US-ASCII"); + } + return codesetName; +#else + return "US-ASCII"; +#endif +} + + +U_CAPI const char* U_EXPORT2 +uprv_getDefaultCodepage() +{ + static char const *name = NULL; + umtx_lock(NULL); + if (name == NULL) { + name = int_getDefaultCodepage(); + } + umtx_unlock(NULL); + return name; +} +#endif /* !U_CHARSET_IS_UTF8 */ + + +/* end of platform-specific implementation -------------- */ + +/* version handling --------------------------------------------------------- */ + +U_CAPI void U_EXPORT2 +u_versionFromString(UVersionInfo versionArray, const char *versionString) { + char *end; + uint16_t part=0; + + if(versionArray==NULL) { + return; + } + + if(versionString!=NULL) { + for(;;) { + versionArray[part]=(uint8_t)uprv_strtoul(versionString, &end, 10); + if(end==versionString || ++part==U_MAX_VERSION_LENGTH || *end!=U_VERSION_DELIMITER) { + break; + } + versionString=end+1; + } + } + + while(part<U_MAX_VERSION_LENGTH) { + versionArray[part++]=0; + } +} + +U_CAPI void U_EXPORT2 +u_versionFromUString(UVersionInfo versionArray, const UChar *versionString) { + if(versionArray!=NULL && versionString!=NULL) { + char versionChars[U_MAX_VERSION_STRING_LENGTH+1]; + int32_t len = u_strlen(versionString); + if(len>U_MAX_VERSION_STRING_LENGTH) { + len = U_MAX_VERSION_STRING_LENGTH; + } + u_UCharsToChars(versionString, versionChars, len); + versionChars[len]=0; + u_versionFromString(versionArray, versionChars); + } +} + +U_CAPI void U_EXPORT2 +u_versionToString(const UVersionInfo versionArray, char *versionString) { + uint16_t count, part; + uint8_t field; + + if(versionString==NULL) { + return; + } + + if(versionArray==NULL) { + versionString[0]=0; + return; + } + + /* count how many fields need to be written */ + for(count=4; count>0 && versionArray[count-1]==0; --count) { + } + + if(count <= 1) { + count = 2; + } + + /* write the first part */ + /* write the decimal field value */ + field=versionArray[0]; + if(field>=100) { + *versionString++=(char)('0'+field/100); + field%=100; + } + if(field>=10) { + *versionString++=(char)('0'+field/10); + field%=10; + } + *versionString++=(char)('0'+field); + + /* write the following parts */ + for(part=1; part<count; ++part) { + /* write a dot first */ + *versionString++=U_VERSION_DELIMITER; + + /* write the decimal field value */ + field=versionArray[part]; + if(field>=100) { + *versionString++=(char)('0'+field/100); + field%=100; + } + if(field>=10) { + *versionString++=(char)('0'+field/10); + field%=10; + } + *versionString++=(char)('0'+field); + } + + /* NUL-terminate */ + *versionString=0; +} + +U_CAPI void U_EXPORT2 +u_getVersion(UVersionInfo versionArray) { + (void)copyright; // Suppress unused variable warning from clang. + u_versionFromString(versionArray, U_ICU_VERSION); +} + +/** + * icucfg.h dependent code + */ + +#if U_ENABLE_DYLOAD && HAVE_DLOPEN && !U_PLATFORM_USES_ONLY_WIN32_API + +#if HAVE_DLFCN_H +#ifdef __MVS__ +#ifndef __SUSV3 +#define __SUSV3 1 +#endif +#endif +#include <dlfcn.h> +#endif /* HAVE_DLFCN_H */ + +U_INTERNAL void * U_EXPORT2 +uprv_dl_open(const char *libName, UErrorCode *status) { + void *ret = NULL; + if(U_FAILURE(*status)) return ret; + ret = dlopen(libName, RTLD_NOW|RTLD_GLOBAL); + if(ret==NULL) { +#ifdef U_TRACE_DYLOAD + printf("dlerror on dlopen(%s): %s\n", libName, dlerror()); +#endif + *status = U_MISSING_RESOURCE_ERROR; + } + return ret; +} + +U_INTERNAL void U_EXPORT2 +uprv_dl_close(void *lib, UErrorCode *status) { + if(U_FAILURE(*status)) return; + dlclose(lib); +} + +U_INTERNAL UVoidFunction* U_EXPORT2 +uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) { + union { + UVoidFunction *fp; + void *vp; + } uret; + uret.fp = NULL; + if(U_FAILURE(*status)) return uret.fp; + uret.vp = dlsym(lib, sym); + if(uret.vp == NULL) { +#ifdef U_TRACE_DYLOAD + printf("dlerror on dlsym(%p,%s): %s\n", lib,sym, dlerror()); +#endif + *status = U_MISSING_RESOURCE_ERROR; + } + return uret.fp; +} + +#elif U_ENABLE_DYLOAD && U_PLATFORM_USES_ONLY_WIN32_API && !U_PLATFORM_HAS_WINUWP_API + +/* Windows API implementation. */ +// Note: UWP does not expose/allow these APIs, so the UWP version gets the null implementation. */ + +U_INTERNAL void * U_EXPORT2 +uprv_dl_open(const char *libName, UErrorCode *status) { + HMODULE lib = NULL; + + if(U_FAILURE(*status)) return NULL; + + lib = LoadLibraryA(libName); + + if(lib==NULL) { + *status = U_MISSING_RESOURCE_ERROR; + } + + return (void*)lib; +} + +U_INTERNAL void U_EXPORT2 +uprv_dl_close(void *lib, UErrorCode *status) { + HMODULE handle = (HMODULE)lib; + if(U_FAILURE(*status)) return; + + FreeLibrary(handle); + + return; +} + +U_INTERNAL UVoidFunction* U_EXPORT2 +uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) { + HMODULE handle = (HMODULE)lib; + UVoidFunction* addr = NULL; + + if(U_FAILURE(*status) || lib==NULL) return NULL; + + addr = (UVoidFunction*)GetProcAddress(handle, sym); + + if(addr==NULL) { + DWORD lastError = GetLastError(); + if(lastError == ERROR_PROC_NOT_FOUND) { + *status = U_MISSING_RESOURCE_ERROR; + } else { + *status = U_UNSUPPORTED_ERROR; /* other unknown error. */ + } + } + + return addr; +} + +#else + +/* No dynamic loading, null (nonexistent) implementation. */ + +U_INTERNAL void * U_EXPORT2 +uprv_dl_open(const char *libName, UErrorCode *status) { + (void)libName; + if(U_FAILURE(*status)) return NULL; + *status = U_UNSUPPORTED_ERROR; + return NULL; +} + +U_INTERNAL void U_EXPORT2 +uprv_dl_close(void *lib, UErrorCode *status) { + (void)lib; + if(U_FAILURE(*status)) return; + *status = U_UNSUPPORTED_ERROR; + return; +} + +U_INTERNAL UVoidFunction* U_EXPORT2 +uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) { + (void)lib; + (void)sym; + if(U_SUCCESS(*status)) { + *status = U_UNSUPPORTED_ERROR; + } + return (UVoidFunction*)NULL; +} + +#endif + +/* + * Hey, Emacs, please set the following: + * + * Local Variables: + * indent-tabs-mode: nil + * End: + * + */ diff --git a/libicuuc/libicu/putil.cpp.patch b/libicuuc/libicu/putil.cpp.patch new file mode 100644 index 0000000..317f3fe --- /dev/null +++ b/libicuuc/libicu/putil.cpp.patch @@ -0,0 +1,14 @@ +--- libicu/uc/putil.cpp 2019-12-23 14:38:39.756890521 +0300 ++++ libicu/putil.cpp 2021-11-22 10:13:32.884483858 +0300 +@@ -46,11 +46,6 @@ + // First, the platform type. Need this for U_PLATFORM. + #include "unicode/platform.h" + +-#if U_PLATFORM == U_PF_MINGW && defined __STRICT_ANSI__ +-/* tzset isn't defined in strict ANSI on MinGW. */ +-#undef __STRICT_ANSI__ +-#endif +- + /* + * Cygwin with GCC requires inclusion of time.h after the above disabling strict asci mode statement. + */ diff --git a/libicuuc/manifest b/libicuuc/manifest index afb67a2..24bad80 100644 --- a/libicuuc/manifest +++ b/libicuuc/manifest @@ -1,6 +1,6 @@ : 1 name: libicuuc -version: 65.1.0+4 +version: 65.1.0+10 upstream-version: 65.1 project: icu summary: ICU basic internationalization C/C++ library @@ -17,3 +17,8 @@ build-error-email: builds@build2.org builds: all depends: * build2 >= 0.12.0 depends: * bpkg >= 0.12.0 + +# System package mapping. +# +debian-name: libicu-dev +fedora-name: libicu-devel |