Skip to content

Commit

Permalink
Merge pull request #91937 from bruvzg/icu_751
Browse files Browse the repository at this point in the history
Update ICU to 75.1
  • Loading branch information
akien-mga committed May 14, 2024
2 parents 3c26cdd + e74fea2 commit 78cce19
Show file tree
Hide file tree
Showing 129 changed files with 7,975 additions and 7,803 deletions.
2 changes: 1 addition & 1 deletion COPYRIGHT.txt
Original file line number Diff line number Diff line change
Expand Up @@ -285,7 +285,7 @@ License: HarfBuzz

Files: ./thirdparty/icu4c/
Comment: International Components for Unicode
Copyright: 1991-2021, Unicode
Copyright: 2016-2024, Unicode, Inc.
License: Unicode

Files: ./thirdparty/jpeg-compressor/
Expand Down
2 changes: 1 addition & 1 deletion modules/text_server_adv/SCsub
Original file line number Diff line number Diff line change
Expand Up @@ -468,7 +468,7 @@ if env["builtin_icu4c"]:
]
thirdparty_sources = [thirdparty_dir + file for file in thirdparty_sources]

icu_data_name = "icudt74l.dat"
icu_data_name = "icudt75l.dat"

if env.editor_build:
env_icu.Depends("#thirdparty/icu4c/icudata.gen.h", "#thirdparty/icu4c/" + icu_data_name)
Expand Down
2 changes: 1 addition & 1 deletion modules/text_server_adv/gdextension_build/SConstruct
Original file line number Diff line number Diff line change
Expand Up @@ -703,7 +703,7 @@ thirdparty_icu_sources = [
]
thirdparty_icu_sources = [thirdparty_icu_dir + file for file in thirdparty_icu_sources]

icu_data_name = "icudt74l.dat"
icu_data_name = "icudt75l.dat"

if env["static_icu_data"]:
env_icu.Depends("../../../thirdparty/icu4c/icudata.gen.h", "../../../thirdparty/icu4c/" + icu_data_name)
Expand Down
6 changes: 3 additions & 3 deletions thirdparty/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -391,7 +391,7 @@ Files extracted from upstream source:
## icu4c

- Upstream: https://github.com/unicode-org/icu
- Version: 74.2 (2d029329c82c7792b985024b2bdab5fc7278fbc8, 2023)
- Version: 75.1 (7750081bda4b3bc1768ae03849ec70f67ea10625, 2024)
- License: Unicode

Files extracted from upstream source:
Expand All @@ -403,7 +403,7 @@ Files extracted from upstream source:

Files generated from upstream source:

- The `icudt74l.dat` built with the provided `godot_data.json` config file (see
- The `icudt75l.dat` built with the provided `godot_data.json` config file (see
https://github.com/unicode-org/icu/blob/master/docs/userguide/icu_data/buildtool.md
for instructions).

Expand All @@ -413,7 +413,7 @@ Files generated from upstream source:
3. Reconfigure ICU with custom data config:
`ICU_DATA_FILTER_FILE={GODOT_SOURCE}/thirdparty/icu4c/godot_data.json ./runConfigureICU {PLATFORM} --with-data-packaging=common`
4. Delete `data/out` folder and rebuild data: `cd data && rm -rf ./out && make`
5. Copy `source/data/out/icudt74l.dat` to the `{GODOT_SOURCE}/thirdparty/icu4c/icudt74l.dat`
5. Copy `source/data/out/icudt75l.dat` to the `{GODOT_SOURCE}/thirdparty/icu4c/icudt75l.dat`


## jpeg-compressor
Expand Down
4 changes: 3 additions & 1 deletion thirdparty/icu4c/LICENSE
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ UNICODE LICENSE V3

COPYRIGHT AND PERMISSION NOTICE

Copyright © 2016-2023 Unicode, Inc.
Copyright © 2016-2024 Unicode, Inc.

NOTICE TO USER: Carefully read the following legal agreement. BY
DOWNLOADING, INSTALLING, COPYING OR OTHERWISE USING DATA FILES, AND/OR
Expand Down Expand Up @@ -38,6 +38,8 @@ not be used in advertising or otherwise to promote the sale, use or other
dealings in these Data Files or Software without prior written
authorization of the copyright holder.

SPDX-License-Identifier: Unicode-3.0

----------------------------------------------------------------------

Third-Party Software Licenses
Expand Down
6 changes: 2 additions & 4 deletions thirdparty/icu4c/common/brkeng.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -114,13 +114,11 @@ UnhandledEngine::handleCharacter(UChar32 c) {
*/

ICULanguageBreakFactory::ICULanguageBreakFactory(UErrorCode &/*status*/) {
fEngines = 0;
fEngines = nullptr;
}

ICULanguageBreakFactory::~ICULanguageBreakFactory() {
if (fEngines != 0) {
delete fEngines;
}
delete fEngines;
}

void ICULanguageBreakFactory::ensureEngines(UErrorCode& status) {
Expand Down
9 changes: 3 additions & 6 deletions thirdparty/icu4c/common/brkiter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -438,17 +438,14 @@ BreakIterator::makeInstance(const Locale& loc, int32_t kind, UErrorCode& status)
UTRACE_ENTRY(UTRACE_UBRK_CREATE_LINE);
uprv_strcpy(lb_lw, "line");
UErrorCode kvStatus = U_ZERO_ERROR;
CharString value;
CharStringByteSink valueSink(&value);
loc.getKeywordValue("lb", valueSink, kvStatus);
auto value = loc.getKeywordValue<CharString>("lb", kvStatus);
if (U_SUCCESS(kvStatus) && (value == "strict" || value == "normal" || value == "loose")) {
uprv_strcat(lb_lw, "_");
uprv_strcat(lb_lw, value.data());
}
// lw=phrase is only supported in Japanese and Korean
if (uprv_strcmp(loc.getLanguage(), "ja") == 0 || uprv_strcmp(loc.getLanguage(), "ko") == 0) {
value.clear();
loc.getKeywordValue("lw", valueSink, kvStatus);
value = loc.getKeywordValue<CharString>("lw", kvStatus);
if (U_SUCCESS(kvStatus) && value == "phrase") {
uprv_strcat(lb_lw, "_");
uprv_strcat(lb_lw, value.data());
Expand Down Expand Up @@ -500,7 +497,7 @@ BreakIterator::makeInstance(const Locale& loc, int32_t kind, UErrorCode& status)
Locale
BreakIterator::getLocale(ULocDataLocaleType type, UErrorCode& status) const {
if (type == ULOC_REQUESTED_LOCALE) {
return Locale(requestLocale);
return {requestLocale};
}
U_LOCALE_BASED(locBased, *this);
return locBased.getLocale(type, status);
Expand Down
112 changes: 90 additions & 22 deletions thirdparty/icu4c/common/bytesinkutil.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,18 +7,52 @@
#ifndef BYTESINKUTIL_H
#define BYTESINKUTIL_H

#include <type_traits>

#include "unicode/utypes.h"
#include "unicode/bytestream.h"
#include "unicode/edits.h"
#include "charstr.h"
#include "cmemory.h"
#include "uassert.h"
#include "ustr_imp.h"

U_NAMESPACE_BEGIN

class ByteSink;
class CharString;
class Edits;

class U_COMMON_API CharStringByteSink : public ByteSink {
public:
CharStringByteSink(CharString* dest);
~CharStringByteSink() override;

CharStringByteSink() = delete;
CharStringByteSink(const CharStringByteSink&) = delete;
CharStringByteSink& operator=(const CharStringByteSink&) = delete;

void Append(const char* bytes, int32_t n) override;

char* GetAppendBuffer(int32_t min_capacity,
int32_t desired_capacity_hint,
char* scratch,
int32_t scratch_capacity,
int32_t* result_capacity) override;

private:
CharString& dest_;
};

// CharString doesn't provide the public API that StringByteSink requires a
// string class to have so this template specialization replaces the default
// implementation of StringByteSink<CharString> with CharStringByteSink.
template<>
class StringByteSink<CharString> : public CharStringByteSink {
public:
StringByteSink(CharString* dest) : CharStringByteSink(dest) { }
StringByteSink(CharString* dest, int32_t /*initialAppendCapacity*/) : CharStringByteSink(dest) { }
};

class U_COMMON_API ByteSinkUtil {
public:
ByteSinkUtil() = delete; // all static
Expand Down Expand Up @@ -57,30 +91,64 @@ class U_COMMON_API ByteSinkUtil {
ByteSink &sink, uint32_t options, Edits *edits,
UErrorCode &errorCode);

private:
static void appendNonEmptyUnchanged(const uint8_t *s, int32_t length,
ByteSink &sink, uint32_t options, Edits *edits);
};

class U_COMMON_API CharStringByteSink : public ByteSink {
public:
CharStringByteSink(CharString* dest);
~CharStringByteSink() override;

CharStringByteSink() = delete;
CharStringByteSink(const CharStringByteSink&) = delete;
CharStringByteSink& operator=(const CharStringByteSink&) = delete;

void Append(const char* bytes, int32_t n) override;
/**
* Calls a lambda that writes to a ByteSink with a CheckedArrayByteSink
* and then returns through u_terminateChars(), in order to implement
* the classic ICU4C C API writing to a fix sized buffer on top of a
* contemporary C++ API.
*
* @param buffer receiving buffer
* @param capacity capacity of receiving buffer
* @param lambda that gets called with the sink as an argument
* @param status set to U_BUFFER_OVERFLOW_ERROR on overflow
* @return number of bytes written, or needed (in case of overflow)
* @internal
*/
template <typename F,
typename = std::enable_if_t<
std::is_invocable_r_v<void, F, ByteSink&, UErrorCode&>>>
static int32_t viaByteSinkToTerminatedChars(char* buffer, int32_t capacity,
F&& lambda,
UErrorCode& status) {
if (U_FAILURE(status)) { return 0; }
CheckedArrayByteSink sink(buffer, capacity);
lambda(sink, status);
if (U_FAILURE(status)) { return 0; }

int32_t reslen = sink.NumberOfBytesAppended();

if (sink.Overflowed()) {
status = U_BUFFER_OVERFLOW_ERROR;
return reslen;
}

return u_terminateChars(buffer, capacity, reslen, &status);
}

char* GetAppendBuffer(int32_t min_capacity,
int32_t desired_capacity_hint,
char* scratch,
int32_t scratch_capacity,
int32_t* result_capacity) override;
/**
* Calls a lambda that writes to a ByteSink with a CharStringByteSink and
* then returns a CharString, in order to implement a contemporary C++ API
* on top of a C/C++ compatibility ByteSink API.
*
* @param lambda that gets called with the sink as an argument
* @param status to check and report
* @return the resulting string, or an empty string (in case of error)
* @internal
*/
template <typename F,
typename = std::enable_if_t<
std::is_invocable_r_v<void, F, ByteSink&, UErrorCode&>>>
static CharString viaByteSinkToCharString(F&& lambda, UErrorCode& status) {
if (U_FAILURE(status)) { return {}; }
CharString result;
CharStringByteSink sink(&result);
lambda(sink, status);
return result;
}

private:
CharString& dest_;
static void appendNonEmptyUnchanged(const uint8_t *s, int32_t length,
ByteSink &sink, uint32_t options, Edits *edits);
};

U_NAMESPACE_END
Expand Down
Loading

0 comments on commit 78cce19

Please sign in to comment.