Update HarfBuzz, ICU and FreeType.

HarfBuzz: Update to version 4.2.1 FreeType: Update to version 2.12.1 ICU: Update to version 71.1
2022-05-17 18:14:19 +03:00 · 2022-05-17 18:14:19 +03:00 · 93fba7ead3
commit 93fba7ead3
parent 7ea8cde983
497 changed files with 21141 additions and 3961 deletions
--- a/thirdparty/icu4c/common/brkeng.cpp
+++ b/thirdparty/icu4c/common/brkeng.cpp
@ -79,6 +79,7 @@ UnhandledEngine::findBreaks( UText *text,
                             int32_t /* startPos */,
                             int32_t endPos,
                             UVector32 &/*foundBreaks*/,
+                             UBool /* isPhraseBreaking */,
                             UErrorCode &status) const {
    if (U_FAILURE(status)) return 0;
    UChar32 c = utext_current32(text); 
--- a/thirdparty/icu4c/common/brkeng.h
+++ b/thirdparty/icu4c/common/brkeng.h
@ -75,6 +75,7 @@ class LanguageBreakEngine : public UMemory {
                              int32_t startPos,
                              int32_t endPos,
                              UVector32 &foundBreaks,
+                              UBool isPhraseBreaking,
                              UErrorCode &status) const = 0;

 };
@ -194,6 +195,7 @@ class UnhandledEngine : public LanguageBreakEngine {
                              int32_t startPos,
                              int32_t endPos,
                              UVector32 &foundBreaks,
+                              UBool isPhraseBreaking,
                              UErrorCode &status) const override;

 /**
--- a/thirdparty/icu4c/common/brkiter.cpp
+++ b/thirdparty/icu4c/common/brkiter.cpp
@ -30,6 +30,7 @@
 #include "unicode/ures.h"
 #include "unicode/ustring.h"
 #include "unicode/filteredbrk.h"
+#include "bytesinkutil.h"
 #include "ucln_cmn.h"
 #include "cstring.h"
 #include "umutex.h"
@ -115,7 +116,7 @@ BreakIterator::buildInstance(const Locale& loc, const char *type, UErrorCode &st
    }

    // Create a RuleBasedBreakIterator
-    result = new RuleBasedBreakIterator(file, status);
+    result = new RuleBasedBreakIterator(file, uprv_strstr(type, "phrase") != NULL, status);

    // If there is a result, set the valid locale and actual locale, and the kind
    if (U_SUCCESS(status) && result != NULL) {
@ -408,7 +409,6 @@ BreakIterator::makeInstance(const Locale& loc, int32_t kind, UErrorCode& status)
    if (U_FAILURE(status)) {
        return NULL;
    }
-    char lbType[kKeyValueLenMax];

    BreakIterator *result = NULL;
    switch (kind) {
@ -428,18 +428,29 @@ BreakIterator::makeInstance(const Locale& loc, int32_t kind, UErrorCode& status)
        break;
    case UBRK_LINE:
        {
+            char lb_lw[kKeyValueLenMax];
            UTRACE_ENTRY(UTRACE_UBRK_CREATE_LINE);
-            uprv_strcpy(lbType, "line");
-            char lbKeyValue[kKeyValueLenMax] = {0};
+            uprv_strcpy(lb_lw, "line");
            UErrorCode kvStatus = U_ZERO_ERROR;
-            int32_t kLen = loc.getKeywordValue("lb", lbKeyValue, kKeyValueLenMax, kvStatus);
-            if (U_SUCCESS(kvStatus) && kLen > 0 && (uprv_strcmp(lbKeyValue,"strict")==0 || uprv_strcmp(lbKeyValue,"normal")==0 || uprv_strcmp(lbKeyValue,"loose")==0)) {
-                uprv_strcat(lbType, "_");
-                uprv_strcat(lbType, lbKeyValue);
+            CharString value;
+            CharStringByteSink valueSink(&value);
+            loc.getKeywordValue("lb", valueSink, kvStatus);
+            if (U_SUCCESS(kvStatus) && (value == "strict" || value == "normal" || value == "loose")) {
+                uprv_strcat(lb_lw, "_");
+                uprv_strcat(lb_lw, value.data());
            }
-            result = BreakIterator::buildInstance(loc, lbType, status);
+            // lw=phrase is only supported in Japanese.
+            if (uprv_strcmp(loc.getLanguage(), "ja") == 0) {
+                value.clear();
+                loc.getKeywordValue("lw", valueSink, kvStatus);
+                if (U_SUCCESS(kvStatus) && value == "phrase") {
+                    uprv_strcat(lb_lw, "_");
+                    uprv_strcat(lb_lw, value.data());
+                }
+            }
+            result = BreakIterator::buildInstance(loc, lb_lw, status);

-            UTRACE_DATA1(UTRACE_INFO, "lb=%s", lbKeyValue);
+            UTRACE_DATA1(UTRACE_INFO, "lb_lw=%s", lb_lw);
            UTRACE_EXIT_STATUS(status);
        }
        break;
--- a/thirdparty/icu4c/common/dictbe.cpp
+++ b/thirdparty/icu4c/common/dictbe.cpp
@ -17,7 +17,10 @@
 #include "dictbe.h"
 #include "unicode/uniset.h"
 #include "unicode/chariter.h"
+#include "unicode/resbund.h"
 #include "unicode/ubrk.h"
+#include "unicode/usetiter.h"
+#include "ubrkimpl.h"
 #include "utracimp.h"
 #include "uvectr32.h"
 #include "uvector.h"
@ -48,6 +51,7 @@ DictionaryBreakEngine::findBreaks( UText *text,
                                 int32_t startPos,
                                 int32_t endPos,
                                 UVector32 &foundBreaks,
+                                 UBool isPhraseBreaking,
                                 UErrorCode& status) const {
    if (U_FAILURE(status)) return 0;
    (void)startPos;            // TODO: remove this param?
@ -68,7 +72,7 @@ DictionaryBreakEngine::findBreaks( UText *text,
    }
    rangeStart = start;
    rangeEnd = current;
-    result = divideUpDictionaryRange(text, rangeStart, rangeEnd, foundBreaks, status);
+    result = divideUpDictionaryRange(text, rangeStart, rangeEnd, foundBreaks, isPhraseBreaking, status);
    utext_setNativeIndex(text, current);
    
    return result;
@ -199,13 +203,13 @@ ThaiBreakEngine::ThaiBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCode
 {
    UTRACE_ENTRY(UTRACE_UBRK_CREATE_BREAK_ENGINE);
    UTRACE_DATA1(UTRACE_INFO, "dictbe=%s", "Thai");
-    fThaiWordSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Thai:]&[:LineBreak=SA:]]"), status);
+    UnicodeSet thaiWordSet(UnicodeString(u"[[:Thai:]&[:LineBreak=SA:]]"), status);
    if (U_SUCCESS(status)) {
-        setCharacters(fThaiWordSet);
+        setCharacters(thaiWordSet);
    }
-    fMarkSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Thai:]&[:LineBreak=SA:]&[:M:]]"), status);
+    fMarkSet.applyPattern(UnicodeString(u"[[:Thai:]&[:LineBreak=SA:]&[:M:]]"), status);
    fMarkSet.add(0x0020);
-    fEndWordSet = fThaiWordSet;
+    fEndWordSet = thaiWordSet;
    fEndWordSet.remove(0x0E31);             // MAI HAN-AKAT
    fEndWordSet.remove(0x0E40, 0x0E44);     // SARA E through SARA AI MAIMALAI
    fBeginWordSet.add(0x0E01, 0x0E2E);      // KO KAI through HO NOKHUK
@ -230,6 +234,7 @@ ThaiBreakEngine::divideUpDictionaryRange( UText *text,
                                                int32_t rangeStart,
                                                int32_t rangeEnd,
                                                UVector32 &foundBreaks,
+                                                UBool /* isPhraseBreaking */,
                                                UErrorCode& status) const {
    if (U_FAILURE(status)) return 0;
    utext_setNativeIndex(text, rangeStart);
@ -441,13 +446,13 @@ LaoBreakEngine::LaoBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCode &s
 {
    UTRACE_ENTRY(UTRACE_UBRK_CREATE_BREAK_ENGINE);
    UTRACE_DATA1(UTRACE_INFO, "dictbe=%s", "Laoo");
-    fLaoWordSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Laoo:]&[:LineBreak=SA:]]"), status);
+    UnicodeSet laoWordSet(UnicodeString(u"[[:Laoo:]&[:LineBreak=SA:]]"), status);
    if (U_SUCCESS(status)) {
-        setCharacters(fLaoWordSet);
+        setCharacters(laoWordSet);
    }
-    fMarkSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Laoo:]&[:LineBreak=SA:]&[:M:]]"), status);
+    fMarkSet.applyPattern(UnicodeString(u"[[:Laoo:]&[:LineBreak=SA:]&[:M:]]"), status);
    fMarkSet.add(0x0020);
-    fEndWordSet = fLaoWordSet;
+    fEndWordSet = laoWordSet;
    fEndWordSet.remove(0x0EC0, 0x0EC4);     // prefix vowels
    fBeginWordSet.add(0x0E81, 0x0EAE);      // basic consonants (including holes for corresponding Thai characters)
    fBeginWordSet.add(0x0EDC, 0x0EDD);      // digraph consonants (no Thai equivalent)
@ -469,6 +474,7 @@ LaoBreakEngine::divideUpDictionaryRange( UText *text,
                                                int32_t rangeStart,
                                                int32_t rangeEnd,
                                                UVector32 &foundBreaks,
+                                                UBool /* isPhraseBreaking */,
                                                UErrorCode& status) const {
    if (U_FAILURE(status)) return 0;
    if ((rangeEnd - rangeStart) < LAO_MIN_WORD_SPAN) {
@ -637,14 +643,13 @@ BurmeseBreakEngine::BurmeseBreakEngine(DictionaryMatcher *adoptDictionary, UErro
 {
    UTRACE_ENTRY(UTRACE_UBRK_CREATE_BREAK_ENGINE);
    UTRACE_DATA1(UTRACE_INFO, "dictbe=%s", "Mymr");
-    fBurmeseWordSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Mymr:]&[:LineBreak=SA:]]"), status);
-    if (U_SUCCESS(status)) {
-        setCharacters(fBurmeseWordSet);
-    }
-    fMarkSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Mymr:]&[:LineBreak=SA:]&[:M:]]"), status);
-    fMarkSet.add(0x0020);
-    fEndWordSet = fBurmeseWordSet;
    fBeginWordSet.add(0x1000, 0x102A);      // basic consonants and independent vowels
+    fEndWordSet.applyPattern(UnicodeString(u"[[:Mymr:]&[:LineBreak=SA:]]"), status);
+    fMarkSet.applyPattern(UnicodeString(u"[[:Mymr:]&[:LineBreak=SA:]&[:M:]]"), status);
+    fMarkSet.add(0x0020);
+    if (U_SUCCESS(status)) {
+        setCharacters(fEndWordSet);
+    }

    // Compact for caching.
    fMarkSet.compact();
@ -662,6 +667,7 @@ BurmeseBreakEngine::divideUpDictionaryRange( UText *text,
                                                int32_t rangeStart,
                                                int32_t rangeEnd,
                                                UVector32 &foundBreaks,
+                                                UBool /* isPhraseBreaking */,
                                                UErrorCode& status ) const {
    if (U_FAILURE(status)) return 0;
    if ((rangeEnd - rangeStart) < BURMESE_MIN_WORD_SPAN) {
@ -830,13 +836,13 @@ KhmerBreakEngine::KhmerBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCod
 {
    UTRACE_ENTRY(UTRACE_UBRK_CREATE_BREAK_ENGINE);
    UTRACE_DATA1(UTRACE_INFO, "dictbe=%s", "Khmr");
-    fKhmerWordSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Khmr:]&[:LineBreak=SA:]]"), status);
+    UnicodeSet khmerWordSet(UnicodeString(u"[[:Khmr:]&[:LineBreak=SA:]]"), status);
    if (U_SUCCESS(status)) {
-        setCharacters(fKhmerWordSet);
+        setCharacters(khmerWordSet);
    }
-    fMarkSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Khmr:]&[:LineBreak=SA:]&[:M:]]"), status);
+    fMarkSet.applyPattern(UnicodeString(u"[[:Khmr:]&[:LineBreak=SA:]&[:M:]]"), status);
    fMarkSet.add(0x0020);
-    fEndWordSet = fKhmerWordSet;
+    fEndWordSet = khmerWordSet;
    fBeginWordSet.add(0x1780, 0x17B3);
    //fBeginWordSet.add(0x17A3, 0x17A4);      // deprecated vowels
    //fEndWordSet.remove(0x17A5, 0x17A9);     // Khmer independent vowels that can't end a word
@ -867,6 +873,7 @@ KhmerBreakEngine::divideUpDictionaryRange( UText *text,
                                                int32_t rangeStart,
                                                int32_t rangeEnd,
                                                UVector32 &foundBreaks,
+                                                UBool /* isPhraseBreaking */,
                                                UErrorCode& status ) const {
    if (U_FAILURE(status)) return 0;
    if ((rangeEnd - rangeStart) < KHMER_MIN_WORD_SPAN) {
@ -1050,25 +1057,27 @@ CjkBreakEngine::CjkBreakEngine(DictionaryMatcher *adoptDictionary, LanguageType
 : DictionaryBreakEngine(), fDictionary(adoptDictionary) {
    UTRACE_ENTRY(UTRACE_UBRK_CREATE_BREAK_ENGINE);
    UTRACE_DATA1(UTRACE_INFO, "dictbe=%s", "Hani");
-    // Korean dictionary only includes Hangul syllables
-    fHangulWordSet.applyPattern(UNICODE_STRING_SIMPLE("[\\uac00-\\ud7a3]"), status);
-    fHanWordSet.applyPattern(UNICODE_STRING_SIMPLE("[:Han:]"), status);
-    fKatakanaWordSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Katakana:]\\uff9e\\uff9f]"), status);
-    fHiraganaWordSet.applyPattern(UNICODE_STRING_SIMPLE("[:Hiragana:]"), status);
    nfkcNorm2 = Normalizer2::getNFKCInstance(status);
+    // Korean dictionary only includes Hangul syllables
+    fHangulWordSet.applyPattern(UnicodeString(u"[\\uac00-\\ud7a3]"), status);
+    fHangulWordSet.compact();
+    // Digits, open puncutation and Alphabetic characters.
+    fDigitOrOpenPunctuationOrAlphabetSet.applyPattern(
+        UnicodeString(u"[[:Nd:][:Pi:][:Ps:][:Alphabetic:]]"), status);
+    fDigitOrOpenPunctuationOrAlphabetSet.compact();
+    fClosePunctuationSet.applyPattern(UnicodeString(u"[[:Pc:][:Pd:][:Pe:][:Pf:][:Po:]]"), status);
+    fClosePunctuationSet.compact();

-    if (U_SUCCESS(status)) {
-        // handle Korean and Japanese/Chinese using different dictionaries
-        if (type == kKorean) {
+    // handle Korean and Japanese/Chinese using different dictionaries
+    if (type == kKorean) {
+        if (U_SUCCESS(status)) {
            setCharacters(fHangulWordSet);
-        } else { //Chinese and Japanese
-            UnicodeSet cjSet;
-            cjSet.addAll(fHanWordSet);
-            cjSet.addAll(fKatakanaWordSet);
-            cjSet.addAll(fHiraganaWordSet);
-            cjSet.add(0xFF70); // HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK
-            cjSet.add(0x30FC); // KATAKANA-HIRAGANA PROLONGED SOUND MARK
+        }
+    } else { //Chinese and Japanese
+        UnicodeSet cjSet(UnicodeString(u"[[:Han:][:Hiragana:][:Katakana:]\\u30fc\\uff70\\uff9e\\uff9f]"), status);
+        if (U_SUCCESS(status)) {
            setCharacters(cjSet);
+            initJapanesePhraseParameter(status);
        }
    }
    UTRACE_EXIT_STATUS(status);
@ -1096,14 +1105,12 @@ static inline bool isKatakana(UChar32 value) {
            (value >= 0xFF66 && value <= 0xFF9f);
 }

-
 // Function for accessing internal utext flags.
 //   Replicates an internal UText function.

 static inline int32_t utext_i32_flag(int32_t bitIndex) {
    return (int32_t)1 << bitIndex;
 }
-
       
 /*
 * @param text A UText representing the text
@ -1117,6 +1124,7 @@ CjkBreakEngine::divideUpDictionaryRange( UText *inText,
        int32_t rangeStart,
        int32_t rangeEnd,
        UVector32 &foundBreaks,
+        UBool isPhraseBreaking,
        UErrorCode& status) const {
    if (U_FAILURE(status)) return 0;
    if (rangeStart >= rangeEnd) {
@ -1347,6 +1355,31 @@ CjkBreakEngine::divideUpDictionaryRange( UText *inText,
    if ((uint32_t)bestSnlp.elementAti(numCodePts) == kuint32max) {
        t_boundary.addElement(numCodePts, status);
        numBreaks++;
+    } else if (isPhraseBreaking) {
+        t_boundary.addElement(numCodePts, status);
+        if(U_SUCCESS(status)) {
+            numBreaks++;
+            int32_t prevIdx = numCodePts;
+
+            int32_t codeUnitIdx = -1;
+            int32_t prevCodeUnitIdx = -1;
+            int32_t length = -1;
+            for (int32_t i = prev.elementAti(numCodePts); i > 0; i = prev.elementAti(i)) {
+                codeUnitIdx = inString.moveIndex32(0, i);
+                prevCodeUnitIdx = inString.moveIndex32(0, prevIdx);
+                // Calculate the length by using the code unit.
+                length = prevCodeUnitIdx - codeUnitIdx;
+                prevIdx = i;
+                // Keep the breakpoint if the pattern is not in the fSkipSet and continuous Katakana
+                // characters don't occur.
+                if (!fSkipSet.containsKey(inString.tempSubString(codeUnitIdx, length))
+                    && (!isKatakana(inString.char32At(inString.moveIndex32(codeUnitIdx, -1)))
+                           || !isKatakana(inString.char32At(codeUnitIdx)))) {
+                    t_boundary.addElement(i, status);
+                    numBreaks++;
+                }
+            }
+        }
    } else {
        for (int32_t i = numCodePts; i > 0; i = prev.elementAti(i)) {
            t_boundary.addElement(i, status);
@ -1367,7 +1400,8 @@ CjkBreakEngine::divideUpDictionaryRange( UText *inText,
    // while reversing t_boundary and pushing values to foundBreaks.
    int32_t prevCPPos = -1;
    int32_t prevUTextPos = -1;
-    for (int32_t i = numBreaks-1; i >= 0; i--) {
+    int32_t correctedNumBreaks = 0;
+    for (int32_t i = numBreaks - 1; i >= 0; i--) {
        int32_t cpPos = t_boundary.elementAti(i);
        U_ASSERT(cpPos > prevCPPos);
        int32_t utextPos =  inputMap.isValid() ? inputMap->elementAti(cpPos) : cpPos + rangeStart;
@ -1375,7 +1409,15 @@ CjkBreakEngine::divideUpDictionaryRange( UText *inText,
        if (utextPos > prevUTextPos) {
            // Boundaries are added to foundBreaks output in ascending order.
            U_ASSERT(foundBreaks.size() == 0 || foundBreaks.peeki() < utextPos);
-            foundBreaks.push(utextPos, status);
+            // In phrase breaking, there has to be a breakpoint between Cj character and close
+            // punctuation.
+            // E.g.［携帯電話］正しい選択 -> ［携帯▁電話］▁正しい▁選択 -> breakpoint between ］ and 正
+            if (utextPos != rangeStart
+                || (isPhraseBreaking && utextPos > 0
+                       && fClosePunctuationSet.contains(utext_char32At(inText, utextPos - 1)))) {
+                foundBreaks.push(utextPos, status);
+                correctedNumBreaks++;
+            }
        } else {
            // Normalization expanded the input text, the dictionary found a boundary
            // within the expansion, giving two boundaries with the same index in the
@ -1387,9 +1429,52 @@ CjkBreakEngine::divideUpDictionaryRange( UText *inText,
    }
    (void)prevCPPos; // suppress compiler warnings about unused variable

+    UChar32 nextChar = utext_char32At(inText, rangeEnd);
+    if (!foundBreaks.isEmpty() && foundBreaks.peeki() == rangeEnd) {
+        // In phrase breaking, there has to be a breakpoint between Cj character and
+        // the number/open punctuation.
+        // E.g. る文字「そうだ、京都」->る▁文字▁「そうだ、▁京都」-> breakpoint between 字 and「
+        // E.g. 乗車率９０％程度だろうか -> 乗車▁率▁９０％▁程度だろうか -> breakpoint between 率 and ９
+        // E.g. しかもロゴがＵｎｉｃｏｄｅ！ -> しかも▁ロゴが▁Ｕｎｉｃｏｄｅ！-> breakpoint between が and Ｕ
+        if (isPhraseBreaking) {
+            if (!fDigitOrOpenPunctuationOrAlphabetSet.contains(nextChar)) {
+                foundBreaks.popi();
+                correctedNumBreaks--;
+            }
+        } else {
+            foundBreaks.popi();
+            correctedNumBreaks--;
+        }
+    }
+
    // inString goes out of scope
    // inputMap goes out of scope
-    return numBreaks;
+    return correctedNumBreaks;
+}
+
+void CjkBreakEngine::initJapanesePhraseParameter(UErrorCode& error) {
+    loadJapaneseExtensions(error);
+    loadHiragana(error);
+}
+
+void CjkBreakEngine::loadJapaneseExtensions(UErrorCode& error) {
+    const char* tag = "extensions";
+    ResourceBundle ja(U_ICUDATA_BRKITR, "ja", error);
+    if (U_SUCCESS(error)) {
+        ResourceBundle bundle = ja.get(tag, error);
+        while (U_SUCCESS(error) && bundle.hasNext()) {
+            fSkipSet.puti(bundle.getNextString(error), 1, error);
+        }
+    }
+}
+
+void CjkBreakEngine::loadHiragana(UErrorCode& error) {
+    UnicodeSet hiraganaWordSet(UnicodeString(u"[:Hiragana:]"), error);
+    hiraganaWordSet.compact();
+    UnicodeSetIterator iterator(hiraganaWordSet);
+    while (iterator.next()) {
+        fSkipSet.puti(UnicodeString(iterator.getCodepoint()), 1, error);
+    }
 }
 #endif

--- a/thirdparty/icu4c/common/dictbe.h
+++ b/thirdparty/icu4c/common/dictbe.h
@ -15,6 +15,7 @@
 #include "unicode/utext.h"

 #include "brkeng.h"
+#include "hash.h"
 #include "uvectr32.h"

 U_NAMESPACE_BEGIN
@ -80,6 +81,7 @@ class DictionaryBreakEngine : public LanguageBreakEngine {
                              int32_t startPos,
                              int32_t endPos,
                              UVector32 &foundBreaks,
+                              UBool isPhraseBreaking,
                              UErrorCode& status ) const override;

 protected:
@ -105,6 +107,7 @@ class DictionaryBreakEngine : public LanguageBreakEngine {
                                           int32_t rangeStart,
                                           int32_t rangeEnd,
                                           UVector32 &foundBreaks,
+                                           UBool isPhraseBreaking,
                                           UErrorCode& status) const = 0;

 };
@ -127,7 +130,6 @@ class ThaiBreakEngine : public DictionaryBreakEngine {
     * @internal
     */

-  UnicodeSet                fThaiWordSet;
  UnicodeSet                fEndWordSet;
  UnicodeSet                fBeginWordSet;
  UnicodeSet                fSuffixSet;
@ -164,6 +166,7 @@ class ThaiBreakEngine : public DictionaryBreakEngine {
                                           int32_t rangeStart,
                                           int32_t rangeEnd,
                                           UVector32 &foundBreaks,
+                                           UBool isPhraseBreaking,
                                           UErrorCode& status) const override;

 };
@ -186,7 +189,6 @@ class LaoBreakEngine : public DictionaryBreakEngine {
     * @internal
     */

-  UnicodeSet                fLaoWordSet;
  UnicodeSet                fEndWordSet;
  UnicodeSet                fBeginWordSet;
  UnicodeSet                fMarkSet;
@ -222,6 +224,7 @@ class LaoBreakEngine : public DictionaryBreakEngine {
                                           int32_t rangeStart,
                                           int32_t rangeEnd,
                                           UVector32 &foundBreaks,
+                                           UBool isPhraseBreaking,
                                           UErrorCode& status) const override;

 };
@ -244,7 +247,6 @@ class BurmeseBreakEngine : public DictionaryBreakEngine {
     * @internal
     */

-  UnicodeSet                fBurmeseWordSet;
  UnicodeSet                fEndWordSet;
  UnicodeSet                fBeginWordSet;
  UnicodeSet                fMarkSet;
@ -280,6 +282,7 @@ class BurmeseBreakEngine : public DictionaryBreakEngine {
                                           int32_t rangeStart,
                                           int32_t rangeEnd,
                                           UVector32 &foundBreaks,
+                                           UBool isPhraseBreaking,
                                           UErrorCode& status) const override;

 };
@ -302,7 +305,6 @@ class KhmerBreakEngine : public DictionaryBreakEngine {
     * @internal
     */

-  UnicodeSet                fKhmerWordSet;
  UnicodeSet                fEndWordSet;
  UnicodeSet                fBeginWordSet;
  UnicodeSet                fMarkSet;
@ -338,6 +340,7 @@ class KhmerBreakEngine : public DictionaryBreakEngine {
                                           int32_t rangeStart,
                                           int32_t rangeEnd,
                                           UVector32 &foundBreaks,
+                                           UBool isPhraseBreaking,
                                           UErrorCode& status) const override;

 };
@ -366,13 +369,22 @@ class CjkBreakEngine : public DictionaryBreakEngine {
     * @internal
     */
  UnicodeSet                fHangulWordSet;
-  UnicodeSet                fHanWordSet;
-  UnicodeSet                fKatakanaWordSet;
-  UnicodeSet                fHiraganaWordSet;
+  UnicodeSet                fDigitOrOpenPunctuationOrAlphabetSet;
+  UnicodeSet                fClosePunctuationSet;

  DictionaryMatcher        *fDictionary;
  const Normalizer2        *nfkcNorm2;

+ private:
+  // Load Japanese extensions.
+  void loadJapaneseExtensions(UErrorCode& error);
+  // Load Japanese Hiragana.
+  void loadHiragana(UErrorCode& error);
+  // Initialize fSkipSet by loading Japanese Hiragana and extensions.
+  void initJapanesePhraseParameter(UErrorCode& error);
+
+  Hashtable fSkipSet;
+
 public:

    /**
@ -404,6 +416,7 @@ class CjkBreakEngine : public DictionaryBreakEngine {
          int32_t rangeStart,
          int32_t rangeEnd,
          UVector32 &foundBreaks,
+          UBool isPhraseBreaking,
          UErrorCode& status) const override;

 };
--- a/thirdparty/icu4c/common/localematcher.cpp
+++ b/thirdparty/icu4c/common/localematcher.cpp
@ -168,12 +168,9 @@ void LocaleMatcher::Builder::clearSupportedLocales() {
 bool LocaleMatcher::Builder::ensureSupportedLocaleVector() {
    if (U_FAILURE(errorCode_)) { return false; }
    if (supportedLocales_ != nullptr) { return true; }
-    supportedLocales_ = new UVector(uprv_deleteUObject, nullptr, errorCode_);
+    LocalPointer<UVector> lpSupportedLocales(new UVector(uprv_deleteUObject, nullptr, errorCode_), errorCode_);
    if (U_FAILURE(errorCode_)) { return false; }
-    if (supportedLocales_ == nullptr) {
-        errorCode_ = U_MEMORY_ALLOCATION_ERROR;
-        return false;
-    }
+    supportedLocales_ = lpSupportedLocales.orphan();
    return true;
 }

@ -187,9 +184,8 @@ LocaleMatcher::Builder &LocaleMatcher::Builder::setSupportedLocalesFromListStrin
    for (int32_t i = 0; i < length; ++i) {
        Locale *locale = list.orphanLocaleAt(i);
        if (locale == nullptr) { continue; }
-        supportedLocales_->addElementX(locale, errorCode_);
+        supportedLocales_->adoptElement(locale, errorCode_);
        if (U_FAILURE(errorCode_)) {
-            delete locale;
            break;
        }
    }
@ -197,35 +193,21 @@ LocaleMatcher::Builder &LocaleMatcher::Builder::setSupportedLocalesFromListStrin
 }

 LocaleMatcher::Builder &LocaleMatcher::Builder::setSupportedLocales(Locale::Iterator &locales) {
-    if (U_FAILURE(errorCode_)) { return *this; }
-    clearSupportedLocales();
-    if (!ensureSupportedLocaleVector()) { return *this; }
-    while (locales.hasNext()) {
-        const Locale &locale = locales.next();
-        Locale *clone = locale.clone();
-        if (clone == nullptr) {
-            errorCode_ = U_MEMORY_ALLOCATION_ERROR;
-            break;
-        }
-        supportedLocales_->addElementX(clone, errorCode_);
-        if (U_FAILURE(errorCode_)) {
-            delete clone;
-            break;
+    if (ensureSupportedLocaleVector()) {
+        clearSupportedLocales();
+        while (locales.hasNext() && U_SUCCESS(errorCode_)) {
+            const Locale &locale = locales.next();
+            LocalPointer<Locale> clone (locale.clone(), errorCode_);
+            supportedLocales_->adoptElement(clone.orphan(), errorCode_);
        }
    }
    return *this;
 }

 LocaleMatcher::Builder &LocaleMatcher::Builder::addSupportedLocale(const Locale &locale) {
-    if (!ensureSupportedLocaleVector()) { return *this; }
-    Locale *clone = locale.clone();
-    if (clone == nullptr) {
-        errorCode_ = U_MEMORY_ALLOCATION_ERROR;
-        return *this;
-    }
-    supportedLocales_->addElementX(clone, errorCode_);
-    if (U_FAILURE(errorCode_)) {
-        delete clone;
+    if (ensureSupportedLocaleVector()) {
+        LocalPointer<Locale> clone(locale.clone(), errorCode_);
+        supportedLocales_->adoptElement(clone.orphan(), errorCode_);
    }
    return *this;
 }
--- a/thirdparty/icu4c/common/locid.cpp
+++ b/thirdparty/icu4c/common/locid.cpp
@ -1204,14 +1204,11 @@ AliasReplacer::parseLanguageReplacement(
    // We have multiple field so we have to allocate and parse
    CharString* str = new CharString(
        replacement, (int32_t)uprv_strlen(replacement), status);
+    LocalPointer<CharString> lpStr(str, status);
+    toBeFreed.adoptElement(lpStr.orphan(), status);
    if (U_FAILURE(status)) {
        return;
    }
-    if (str == nullptr) {
-        status = U_MEMORY_ALLOCATION_ERROR;
-        return;
-    }
-    toBeFreed.addElementX(str, status);
    char* data = str->data();
    replacedLanguage = (const char*) data;
    char* endOfField = uprv_strchr(data, '_');
@ -1420,12 +1417,9 @@ AliasReplacer::replaceTerritory(UVector& toBeFreed, UErrorCode& status)
                               (int32_t)(firstSpace - replacement), status), status);
        }
        if (U_FAILURE(status)) { return false; }
-        if (item.isNull()) {
-            status = U_MEMORY_ALLOCATION_ERROR;
-            return false;
-        }
        replacedRegion = item->data();
-        toBeFreed.addElementX(item.orphan(), status);
+        toBeFreed.adoptElement(item.orphan(), status);
+        if (U_FAILURE(status)) { return false; }
    }
    U_ASSERT(!same(region, replacedRegion));
    region = replacedRegion;
@ -1659,10 +1653,10 @@ AliasReplacer::replace(const Locale& locale, CharString& out, UErrorCode& status
        while ((end = uprv_strchr(start, SEP_CHAR)) != nullptr &&
               U_SUCCESS(status)) {
            *end = NULL_CHAR;  // null terminate inside variantsBuff
-            variants.addElementX(start, status);
+            variants.addElement(start, status);
            start = end + 1;
        }
-        variants.addElementX(start, status);
+        variants.addElement(start, status);
    }
    if (U_FAILURE(status)) { return false; }

--- a/thirdparty/icu4c/common/lstmbe.cpp
+++ b/thirdparty/icu4c/common/lstmbe.cpp
@ -1,8 +1,8 @@
 // © 2021 and later: Unicode, Inc. and others.
 // License & terms of use: http://www.unicode.org/copyright.html

+#include <complex>
 #include <utility>
-#include <ctgmath>

 #include "unicode/utypes.h"

@ -639,6 +639,7 @@ LSTMBreakEngine::divideUpDictionaryRange( UText *text,
                                                int32_t startPos,
                                                int32_t endPos,
                                                UVector32 &foundBreaks,
+                                                UBool /* isPhraseBreaking */,
                                                UErrorCode& status) const {
    if (U_FAILURE(status)) return 0;
    int32_t beginFoundBreakSize = foundBreaks.size();
--- a/thirdparty/icu4c/common/lstmbe.h
+++ b/thirdparty/icu4c/common/lstmbe.h
@ -62,6 +62,7 @@ protected:
                                             int32_t rangeStart,
                                             int32_t rangeEnd,
                                             UVector32 &foundBreaks,
+                                             UBool isPhraseBreaking,
                                             UErrorCode& status) const override;
 private:
    const LSTMData* fData;
--- a/thirdparty/icu4c/common/normalizer2impl.cpp
+++ b/thirdparty/icu4c/common/normalizer2impl.cpp
@ -2496,15 +2496,18 @@ void CanonIterData::addToStartSet(UChar32 origin, UChar32 decompLead, UErrorCode
        // origin is not the first character, or it is U+0000.
        UnicodeSet *set;
        if((canonValue&CANON_HAS_SET)==0) {
-            set=new UnicodeSet;
-            if(set==NULL) {
-                errorCode=U_MEMORY_ALLOCATION_ERROR;
+            LocalPointer<UnicodeSet> lpSet(new UnicodeSet, errorCode);
+            set=lpSet.getAlias();
+            if(U_FAILURE(errorCode)) {
                return;
            }
            UChar32 firstOrigin=(UChar32)(canonValue&CANON_VALUE_MASK);
            canonValue=(canonValue&~CANON_VALUE_MASK)|CANON_HAS_SET|(uint32_t)canonStartSets.size();
            umutablecptrie_set(mutableTrie, decompLead, canonValue, &errorCode);
-            canonStartSets.addElementX(set, errorCode);
+            canonStartSets.adoptElement(lpSet.orphan(), errorCode);
+            if (U_FAILURE(errorCode)) {
+                return;
+            }
            if(firstOrigin!=0) {
                set->add(firstOrigin);
            }
--- a/thirdparty/icu4c/common/rbbi.cpp
+++ b/thirdparty/icu4c/common/rbbi.cpp
@ -82,6 +82,19 @@ RuleBasedBreakIterator::RuleBasedBreakIterator(RBBIDataHeader* data, UErrorCode
    }
 }

+//-------------------------------------------------------------------------------
+//
+//   Constructor   from a UDataMemory handle to precompiled break rules
+//                 stored in an ICU data file. This construcotr is private API,
+//                 only for internal use.
+//
+//-------------------------------------------------------------------------------
+RuleBasedBreakIterator::RuleBasedBreakIterator(UDataMemory* udm, UBool isPhraseBreaking,
+        UErrorCode &status) : RuleBasedBreakIterator(udm, status)
+{
+    fIsPhraseBreaking = isPhraseBreaking;
+}
+
 //
 //  Construct from precompiled binary rules (tables).  This constructor is public API,
 //  taking the rules as a (const uint8_t *) to match the type produced by getBinaryRules().
@ -322,6 +335,7 @@ void RuleBasedBreakIterator::init(UErrorCode &status) {
    fBreakCache           = nullptr;
    fDictionaryCache      = nullptr;
    fLookAheadMatches     = nullptr;
+    fIsPhraseBreaking     = false;

    // Note: IBM xlC is unable to assign or initialize member fText from UTEXT_INITIALIZER.
    // fText                 = UTEXT_INITIALIZER;
--- a/thirdparty/icu4c/common/rbbi_cache.cpp
+++ b/thirdparty/icu4c/common/rbbi_cache.cpp
@ -163,7 +163,7 @@ void RuleBasedBreakIterator::DictionaryCache::populateDictionary(int32_t startPo
        // Ask the language object if there are any breaks. It will add them to the cache and
        // leave the text pointer on the other side of its range, ready to search for the next one.
        if (lbe != NULL) {
-            foundBreakCount += lbe->findBreaks(text, rangeStart, rangeEnd, fBreaks, status);
+            foundBreakCount += lbe->findBreaks(text, rangeStart, rangeEnd, fBreaks, fBI->fIsPhraseBreaking, status);
        }

        // Reload the loop variables for the next go-round
--- a/thirdparty/icu4c/common/serv.cpp
+++ b/thirdparty/icu4c/common/serv.cpp
@ -625,10 +625,7 @@ ICUService::getVisibleIDs(UVector& result, const UnicodeString* matchID, UErrorC
                    }
                }

-                LocalPointer<UnicodeString> idClone(new UnicodeString(*id), status);
-                if (U_SUCCESS(status) && idClone->isBogus()) {
-                    status = U_MEMORY_ALLOCATION_ERROR;
-                }
+                LocalPointer<UnicodeString> idClone(id->clone(), status);
                result.adoptElement(idClone.orphan(), status);
            }
            delete fallbackKey;
--- a/thirdparty/icu4c/common/servls.cpp
+++ b/thirdparty/icu4c/common/servls.cpp
@ -179,7 +179,8 @@ private:

            length = other._ids.size();
            for(i = 0; i < length; ++i) {
-                _ids.addElementX(((UnicodeString *)other._ids.elementAt(i))->clone(), status);
+                LocalPointer<UnicodeString> clonedId(((UnicodeString *)other._ids.elementAt(i))->clone(), status);
+                _ids.adoptElement(clonedId.orphan(), status);
            }

            if(U_SUCCESS(status)) {
--- a/thirdparty/icu4c/common/servnotf.cpp
+++ b/thirdparty/icu4c/common/servnotf.cpp
@ -49,7 +49,11 @@ ICUNotifier::addListener(const EventListener* l, UErrorCode& status)
        if (acceptsListener(*l)) {
            Mutex lmx(&notifyLock);
            if (listeners == NULL) {
-                listeners = new UVector(5, status);
+                LocalPointer<UVector> lpListeners(new UVector(5, status), status);
+                if (U_FAILURE(status)) {
+                    return;
+                }
+                listeners = lpListeners.orphan();
            } else {
                for (int i = 0, e = listeners->size(); i < e; ++i) {
                    const EventListener* el = (const EventListener*)(listeners->elementAt(i));
@ -59,7 +63,7 @@ ICUNotifier::addListener(const EventListener* l, UErrorCode& status)
                }
            }

-            listeners->addElementX((void*)l, status); // cast away const
+            listeners->addElement((void*)l, status); // cast away const
        }
 #ifdef NOTIFIER_DEBUG
        else {
@ -102,13 +106,11 @@ ICUNotifier::removeListener(const EventListener *l, UErrorCode& status)
 void 
 ICUNotifier::notifyChanged(void) 
 {
+    Mutex lmx(&notifyLock);
    if (listeners != NULL) {
-        Mutex lmx(&notifyLock);
-        if (listeners != NULL) {
-            for (int i = 0, e = listeners->size(); i < e; ++i) {
-                EventListener* el = (EventListener*)listeners->elementAt(i);
-                notifyListener(*el);
-            }
+        for (int i = 0, e = listeners->size(); i < e; ++i) {
+            EventListener* el = (EventListener*)listeners->elementAt(i);
+            notifyListener(*el);
        }
    }
 }
--- a/thirdparty/icu4c/common/ubrk.cpp
+++ b/thirdparty/icu4c/common/ubrk.cpp
@ -168,7 +168,7 @@ ubrk_safeClone(
    BreakIterator *newBI = ((BreakIterator *)bi)->clone();
    if (newBI == NULL) {
        *status = U_MEMORY_ALLOCATION_ERROR;
-    } else {
+    } else if (pBufferSize != NULL) {
        *status = U_SAFECLONE_ALLOCATED_WARNING;
    }
    return (UBreakIterator *)newBI;
@ -176,15 +176,7 @@ ubrk_safeClone(

 U_CAPI UBreakIterator * U_EXPORT2
 ubrk_clone(const UBreakIterator *bi, UErrorCode *status) {
-    if (U_FAILURE(*status)) {
-        return nullptr;
-    }
-    BreakIterator *newBI = ((BreakIterator *)bi)->clone();
-    if (newBI == nullptr) {
-        *status = U_MEMORY_ALLOCATION_ERROR;
-        return nullptr;
-    }
-    return (UBreakIterator *)newBI;
+    return ubrk_safeClone(bi, nullptr, nullptr, status);
 }


--- a/thirdparty/icu4c/common/ucase.cpp
+++ b/thirdparty/icu4c/common/ucase.cpp
@ -22,27 +22,14 @@
 #include "unicode/utypes.h"
 #include "unicode/unistr.h"
 #include "unicode/uset.h"
-#include "unicode/udata.h" /* UDataInfo */
 #include "unicode/utf16.h"
-#include "ucmndata.h" /* DataHeader */
-#include "udatamem.h"
-#include "umutex.h"
-#include "uassert.h"
 #include "cmemory.h"
-#include "utrie2.h"
+#include "uassert.h"
 #include "ucase.h"
+#include "umutex.h"
+#include "utrie2.h"

-struct UCaseProps {
-    UDataMemory *mem;
-    const int32_t *indexes;
-    const uint16_t *exceptions;
-    const uint16_t *unfold;
-
-    UTrie2 trie;
-    uint8_t formatVersion[4];
-};
-
-/* ucase_props_data.h is machine-generated by gencase --csource */
+/* ucase_props_data.h is machine-generated by genprops/casepropsbuilder.cpp */
 #define INCLUDED_FROM_UCASE_CPP
 #include "ucase_props_data.h"

@ -77,6 +64,13 @@ ucase_addPropertyStarts(const USetAdder *sa, UErrorCode *pErrorCode) {

 /* data access primitives --------------------------------------------------- */

+U_CAPI const struct UCaseProps * U_EXPORT2
+ucase_getSingleton(int32_t *pExceptionsLength, int32_t *pUnfoldLength) {
+    *pExceptionsLength = UPRV_LENGTHOF(ucase_props_exceptions);
+    *pUnfoldLength = UPRV_LENGTHOF(ucase_props_unfold);
+    return &ucase_props_singleton;
+}
+
 U_CFUNC const UTrie2 * U_EXPORT2
 ucase_getTrie() {
    return &ucase_props_singleton.trie;
@ -690,7 +684,7 @@ ucase_isCaseSensitive(UChar32 c) {
 *   - The general category of C is
 *     Nonspacing Mark (Mn), or Enclosing Mark (Me), or Format Control (Cf), or
 *     Letter Modifier (Lm), or Symbol Modifier (Sk)
- *   - C is one of the following characters 
+ *   - C is one of the following characters
 *     U+0027 APOSTROPHE
 *     U+00AD SOFT HYPHEN (SHY)
 *     U+2019 RIGHT SINGLE QUOTATION MARK
@ -1064,6 +1058,8 @@ ucase_toFullLower(UChar32 c,
    // The sign of the result has meaning, input must be non-negative so that it can be returned as is.
    U_ASSERT(c >= 0);
    UChar32 result=c;
+    // Reset the output pointer in case it was uninitialized.
+    *pString=nullptr;
    uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
    if(!UCASE_HAS_EXCEPTION(props)) {
        if(UCASE_IS_UPPER_OR_TITLE(props)) {
@ -1148,7 +1144,6 @@ ucase_toFullLower(UChar32 c,
                    0307; ; 0307; 0307; tr After_I; # COMBINING DOT ABOVE
                    0307; ; 0307; 0307; az After_I; # COMBINING DOT ABOVE
                 */
-                *pString=nullptr;
                return 0; /* remove the dot (continue without output) */
            } else if(loc==UCASE_LOC_TURKISH && c==0x49 && !isFollowedByDotAbove(iter, context)) {
                /*
@ -1215,6 +1210,8 @@ toUpperOrTitle(UChar32 c,
    // The sign of the result has meaning, input must be non-negative so that it can be returned as is.
    U_ASSERT(c >= 0);
    UChar32 result=c;
+    // Reset the output pointer in case it was uninitialized.
+    *pString=nullptr;
    uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
    if(!UCASE_HAS_EXCEPTION(props)) {
        if(UCASE_GET_TYPE(props)==UCASE_LOWER) {
@ -1252,7 +1249,6 @@ toUpperOrTitle(UChar32 c,

                    0307; 0307; ; ; lt After_Soft_Dotted; # COMBINING DOT ABOVE
                 */
-                *pString=nullptr;
                return 0; /* remove the dot (continue without output) */
            } else if(c==0x0587) {
                // See ICU-13416:
@ -1449,6 +1445,8 @@ ucase_toFullFolding(UChar32 c,
    // The sign of the result has meaning, input must be non-negative so that it can be returned as is.
    U_ASSERT(c >= 0);
    UChar32 result=c;
+    // Reset the output pointer in case it was uninitialized.
+    *pString=nullptr;
    uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
    if(!UCASE_HAS_EXCEPTION(props)) {
        if(UCASE_IS_UPPER_OR_TITLE(props)) {
@ -1542,7 +1540,7 @@ U_CAPI UChar32 U_EXPORT2
 u_tolower(UChar32 c) {
    return ucase_tolower(c);
 }
-    
+
 /* Transforms the Unicode character to its upper case equivalent.*/
 U_CAPI UChar32 U_EXPORT2
 u_toupper(UChar32 c) {
--- a/thirdparty/icu4c/common/ucase.h
+++ b/thirdparty/icu4c/common/ucase.h
@ -312,6 +312,21 @@ UCaseMapFull(UChar32 c,

 U_CDECL_END

+/* for icuexportdata -------------------------------------------------------- */
+
+struct UCaseProps {
+    void *mem;  // TODO: was unused, and type UDataMemory -- remove
+    const int32_t *indexes;
+    const uint16_t *exceptions;
+    const uint16_t *unfold;
+
+    UTrie2 trie;
+    uint8_t formatVersion[4];
+};
+
+U_CAPI const struct UCaseProps * U_EXPORT2
+ucase_getSingleton(int32_t *pExceptionsLength, int32_t *pUnfoldLength);
+
 /* file definitions --------------------------------------------------------- */

 #define UCASE_DATA_NAME "ucase"
--- a/thirdparty/icu4c/common/ucasemap.cpp
+++ b/thirdparty/icu4c/common/ucasemap.cpp
@ -112,8 +112,7 @@ ucasemap_setLocale(UCaseMap *csm, const char *locale, UErrorCode *pErrorCode) {
    if(length==sizeof(csm->locale)) {
        *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
    }
-    if(U_SUCCESS(*pErrorCode)) {
-        csm->caseLocale=UCASE_LOC_UNKNOWN;
+    if(U_SUCCESS(*pErrorCode)) {     
        csm->caseLocale = ucase_getCaseLocale(csm->locale);
    } else {
        csm->locale[0]=0;
@ -420,6 +419,97 @@ void toUpper(int32_t caseLocale, uint32_t options,

 #if !UCONFIG_NO_BREAK_ITERATION

+namespace {
+
+constexpr uint8_t ACUTE_BYTE0 = u8"\u0301"[0];
+
+constexpr uint8_t ACUTE_BYTE1 = u8"\u0301"[1];
+
+/**
+ * Input: c is a letter I with or without acute accent.
+ * start is the index in src after c, and is less than segmentLimit.
+ * If a plain i/I is followed by a plain j/J,
+ * or an i/I with acute (precomposed or decomposed) is followed by a j/J with acute,
+ * then we output accordingly.
+ *
+ * @return the src index after the titlecased sequence, or the start index if no Dutch IJ
+ */
+int32_t maybeTitleDutchIJ(const uint8_t *src, UChar32 c, int32_t start, int32_t segmentLimit,
+                          ByteSink &sink, uint32_t options, icu::Edits *edits, UErrorCode &errorCode) {
+    U_ASSERT(start < segmentLimit);
+
+    int32_t index = start;
+    bool withAcute = false;
+
+    // If the conditions are met, then the following variables tell us what to output.
+    int32_t unchanged1 = 0;  // code units before the j, or the whole sequence (0..3)
+    bool doTitleJ = false;  // true if the j needs to be titlecased
+    int32_t unchanged2 = 0;  // after the j (0 or 1)
+
+    // next character after the first letter
+    UChar32 c2;
+    c2 = src[index++];
+
+    // Is the first letter an i/I with accent?
+    if (c == u'I') {
+        if (c2 == ACUTE_BYTE0 && index < segmentLimit && src[index++] == ACUTE_BYTE1) {
+            withAcute = true;
+            unchanged1 = 2;  // ACUTE is 2 code units in UTF-8
+            if (index == segmentLimit) { return start; }
+            c2 = src[index++];
+        }
+    } else {  // Í
+        withAcute = true;
+    }
+
+    // Is the next character a j/J?
+    if (c2 == u'j') {
+        doTitleJ = true;
+    } else if (c2 == u'J') {
+        ++unchanged1;
+    } else {
+        return start;
+    }
+
+    // A plain i/I must be followed by a plain j/J.
+    // An i/I with acute must be followed by a j/J with acute.
+    if (withAcute) {
+        if ((index + 1) >= segmentLimit || src[index++] != ACUTE_BYTE0 || src[index++] != ACUTE_BYTE1) {
+            return start;
+        }
+        if (doTitleJ) {
+            unchanged2 = 2;  // ACUTE is 2 code units in UTF-8
+        } else {
+            unchanged1 = unchanged1 + 2;    // ACUTE is 2 code units in UTF-8
+        }
+    }
+
+    // There must not be another combining mark.
+    if (index < segmentLimit) {
+        int32_t cp;
+        int32_t i = index;
+        U8_NEXT(src, i, segmentLimit, cp);
+        uint32_t typeMask = U_GET_GC_MASK(cp);
+        if ((typeMask & U_GC_M_MASK) != 0) {
+            return start;
+        }
+    }
+
+    // Output the rest of the Dutch IJ.
+    ByteSinkUtil::appendUnchanged(src + start, unchanged1, sink, options, edits, errorCode);
+    start += unchanged1;
+    if (doTitleJ) {
+        ByteSinkUtil::appendCodePoint(1, u'J', sink, edits);
+        ++start;
+    }
+    ByteSinkUtil::appendUnchanged(src + start, unchanged2, sink, options, edits, errorCode);
+
+    U_ASSERT(start + unchanged2 == index);
+    return index;
+}
+
+}  // namespace
+
 U_CFUNC void U_CALLCONV
 ucasemap_internalUTF8ToTitle(
        int32_t caseLocale, uint32_t options, BreakIterator *iter,
@ -504,19 +594,14 @@ ucasemap_internalUTF8ToTitle(
                }

                /* Special case Dutch IJ titlecasing */
-                if (titleStart+1 < index &&
-                        caseLocale == UCASE_LOC_DUTCH &&
-                        (src[titleStart] == 0x0049 || src[titleStart] == 0x0069)) {
-                    if (src[titleStart+1] == 0x006A) {
-                        ByteSinkUtil::appendCodePoint(1, 0x004A, sink, edits);
-                        titleLimit++;
-                    } else if (src[titleStart+1] == 0x004A) {
-                        // Keep the capital J from getting lowercased.
-                        if (!ByteSinkUtil::appendUnchanged(src+titleStart+1, 1,
-                                                           sink, options, edits, errorCode)) {
-                            return;
-                        }
-                        titleLimit++;
+                if (titleLimit < index &&
+                    caseLocale == UCASE_LOC_DUTCH) {
+                    if (c < 0) {
+                        c = ~c;
+                    }
+
+                    if (c == u'I' || c == u'Í') {
+                        titleLimit = maybeTitleDutchIJ(src, c, titleLimit, index, sink, options, edits, errorCode);
                    }
                }

--- a/thirdparty/icu4c/common/ucnv.cpp
+++ b/thirdparty/icu4c/common/ucnv.cpp
@ -252,7 +252,10 @@ ucnv_safeClone(const UConverter* cnv, void *stackBuffer, int32_t *pBufferSize, U
            UTRACE_EXIT_STATUS(*status);
            return NULL;
        }
-        *status = U_SAFECLONE_ALLOCATED_WARNING;
+        // If pBufferSize was NULL as the input, pBufferSize is set to &stackBufferSize in this function.
+        if (pBufferSize != &stackBufferSize) {
+            *status = U_SAFECLONE_ALLOCATED_WARNING;
+        }

        /* record the fact that memory was allocated */
        *pBufferSize = bufferSizeNeeded;
@ -317,7 +320,11 @@ ucnv_safeClone(const UConverter* cnv, void *stackBuffer, int32_t *pBufferSize, U
    return localConverter;
 }

-
+U_CAPI UConverter* U_EXPORT2
+ucnv_clone(const UConverter* cnv, UErrorCode *status)
+{
+    return ucnv_safeClone(cnv, nullptr, nullptr, status);
+}

 /*Decreases the reference counter in the shared immutable section of the object
 *and frees the mutable part*/
--- a/thirdparty/icu4c/common/ucurr.cpp
+++ b/thirdparty/icu4c/common/ucurr.cpp
@ -254,7 +254,7 @@ currSymbolsEquiv_cleanup(void)
 }

 /**
- * Deleter for OlsonToMetaMappingEntry
+ * Deleter for IsoCodeEntry
 */
 static void U_CALLCONV
 deleteIsoCodeEntry(void *obj) {
--- a/thirdparty/icu4c/common/uloc.cpp
+++ b/thirdparty/icu4c/common/uloc.cpp
@ -186,10 +186,10 @@ NULL
 };

 static const char* const DEPRECATED_LANGUAGES[]={
-    "in", "iw", "ji", "jw", NULL, NULL
+    "in", "iw", "ji", "jw", "mo", NULL, NULL
 };
 static const char* const REPLACEMENT_LANGUAGES[]={
-    "id", "he", "yi", "jv", NULL, NULL
+    "id", "he", "yi", "jv", "ro", NULL, NULL
 };

 /**
@ -444,7 +444,7 @@ static const char * const COUNTRIES_3[] = {
 /*  "VA",  "VC",  "VE",  "VG",  "VI",  "VN",  "VU",  "WF",     */
    "VAT", "VCT", "VEN", "VGB", "VIR", "VNM", "VUT", "WLF",
 /*  "WS",  "XK",  "YE",  "YT",  "ZA",  "ZM",  "ZW",          */
-    "WSM", "XXK", "YEM", "MYT", "ZAF", "ZMB", "ZWE",
+    "WSM", "XKK", "YEM", "MYT", "ZAF", "ZMB", "ZWE",
 NULL,
 /*  "AN",  "BU",  "CS",  "FX",  "RO", "SU",  "TP",  "YD",  "YU",  "ZR" */
    "ANT", "BUR", "SCG", "FXX", "ROM", "SUN", "TMP", "YMD", "YUG", "ZAR",
--- a/thirdparty/icu4c/common/unicode/localematcher.h
+++ b/thirdparty/icu4c/common/unicode/localematcher.h
@ -461,13 +461,13 @@ public:
         * Option for whether to include or ignore one-way (fallback) match data.
         * By default, they are included.
         *
-         * @param direction the match direction to set.
+         * @param matchDirection the match direction to set.
         * @return this Builder object
         * @stable ICU 67
         */
-        Builder &setDirection(ULocMatchDirection direction) {
+        Builder &setDirection(ULocMatchDirection matchDirection) {
            if (U_SUCCESS(errorCode_)) {
-                direction_ = direction;
+                direction_ = matchDirection;
            }
            return *this;
        }
--- a/thirdparty/icu4c/common/unicode/rbbi.h
+++ b/thirdparty/icu4c/common/unicode/rbbi.h
@ -147,6 +147,11 @@ private:
     */
    int32_t *fLookAheadMatches;

+    /**
+     *  A flag to indicate if phrase based breaking is enabled.
+     */
+    UBool fIsPhraseBreaking;
+
    //=======================================================================
    // constructors
    //=======================================================================
@ -163,6 +168,21 @@ private:
     */
    RuleBasedBreakIterator(RBBIDataHeader* data, UErrorCode &status);

+    /**
+     * This constructor uses the udata interface to create a BreakIterator
+     * whose internal tables live in a memory-mapped file.  "image" is an
+     * ICU UDataMemory handle for the pre-compiled break iterator tables.
+     * @param image handle to the memory image for the break iterator data.
+     *        Ownership of the UDataMemory handle passes to the Break Iterator,
+     *        which will be responsible for closing it when it is no longer needed.
+     * @param status Information on any errors encountered.
+     * @param isPhraseBreaking true if phrase based breaking is required, otherwise false.
+     * @see udata_open
+     * @see #getBinaryRules
+     * @internal (private)
+     */
+    RuleBasedBreakIterator(UDataMemory* image, UBool isPhraseBreaking, UErrorCode &status);
+
    /** @internal */
    friend class RBBIRuleBuilder;
    /** @internal */
--- a/thirdparty/icu4c/common/unicode/ubrk.h
+++ b/thirdparty/icu4c/common/unicode/ubrk.h
@ -312,11 +312,12 @@ ubrk_openBinaryRules(const uint8_t *binaryRules, int32_t rulesLength,
 *  If *pBufferSize is not enough for a stack-based safe clone,
 *  new memory will be allocated.
 * @param status to indicate whether the operation went on smoothly or there were errors
- *  An informational status value, U_SAFECLONE_ALLOCATED_ERROR, is used if any allocations were necessary.
+ *  An informational status value, U_SAFECLONE_ALLOCATED_ERROR, is used
+ * if pBufferSize != NULL and any allocations were necessary
 * @return pointer to the new clone
 * @deprecated ICU 69 Use ubrk_clone() instead.
 */
-U_CAPI UBreakIterator * U_EXPORT2
+U_DEPRECATED UBreakIterator * U_EXPORT2
 ubrk_safeClone(
          const UBreakIterator *bi,
          void *stackBuffer,
@ -325,21 +326,17 @@ ubrk_safeClone(

 #endif /* U_HIDE_DEPRECATED_API */

-#ifndef U_HIDE_DRAFT_API
-
 /**
 * Thread safe cloning operation.
 * @param bi iterator to be cloned
 * @param status to indicate whether the operation went on smoothly or there were errors
 * @return pointer to the new clone
- * @draft ICU 69
+ * @stable ICU 69
 */
 U_CAPI UBreakIterator * U_EXPORT2
 ubrk_clone(const UBreakIterator *bi,
           UErrorCode *status);

-#endif  // U_HIDE_DRAFT_API
-
 #ifndef U_HIDE_DEPRECATED_API

 /**
--- a/thirdparty/icu4c/common/unicode/ucnv.h
+++ b/thirdparty/icu4c/common/unicode/ucnv.h
@ -477,7 +477,7 @@ ucnv_openCCSID(int32_t codepage,
 *
 * <p>The name will NOT be looked up in the alias mechanism, nor will the converter be
 * stored in the converter cache or the alias table. The only way to open further converters
- * is call this function multiple times, or use the ucnv_safeClone() function to clone a
+ * is call this function multiple times, or use the ucnv_clone() function to clone a
 * 'primary' converter.</p>
 *
 * <p>A future version of ICU may add alias table lookups and/or caching
@ -493,13 +493,27 @@ ucnv_openCCSID(int32_t codepage,
 * @return the created Unicode converter object, or <TT>NULL</TT> if an error occurred
 * @see udata_open
 * @see ucnv_open
- * @see ucnv_safeClone
+ * @see ucnv_clone
 * @see ucnv_close
 * @stable ICU 2.2
 */
 U_CAPI UConverter* U_EXPORT2
 ucnv_openPackage(const char *packageName, const char *converterName, UErrorCode *err);

+/**
+ * Thread safe converter cloning operation.
+ *
+ * You must ucnv_close() the clone.
+ *
+ * @param cnv converter to be cloned
+ * @param status to indicate whether the operation went on smoothly or there were errors
+ * @return pointer to the new clone
+ * @stable ICU 71
+ */
+U_CAPI UConverter* U_EXPORT2 ucnv_clone(const UConverter *cnv, UErrorCode *status);
+
+#ifndef U_HIDE_DEPRECATED_API
+
 /**
 * Thread safe converter cloning operation.
 * For most efficient operation, pass in a stackBuffer (and a *pBufferSize)
@ -532,21 +546,19 @@ ucnv_openPackage(const char *packageName, const char *converterName, UErrorCode
 *  pointer to size of allocated space.
 * @param status to indicate whether the operation went on smoothly or there were errors
 *  An informational status value, U_SAFECLONE_ALLOCATED_WARNING,
- *  is used if any allocations were necessary.
+ *  is used if pBufferSize != NULL and any allocations were necessary
 *  However, it is better to check if *pBufferSize grew for checking for
 *  allocations because warning codes can be overridden by subsequent
 *  function calls.
 * @return pointer to the new clone
- * @stable ICU 2.0
+ * @deprecated ICU 71 Use ucnv_clone() instead.
 */
-U_CAPI UConverter * U_EXPORT2
+U_DEPRECATED UConverter * U_EXPORT2
 ucnv_safeClone(const UConverter *cnv,
               void             *stackBuffer,
               int32_t          *pBufferSize,
               UErrorCode       *status);

-#ifndef U_HIDE_DEPRECATED_API
-
 /**
 * \def U_CNV_SAFECLONE_BUFFERSIZE
 * Definition of a buffer size that is designed to be large enough for
--- a/thirdparty/icu4c/common/unicode/uniset.h
+++ b/thirdparty/icu4c/common/unicode/uniset.h
@ -1229,7 +1229,6 @@ public:
     */
    UnicodeSet& retain(UChar32 c);

-#ifndef U_HIDE_DRAFT_API
    /**
     * Retains only the specified string from this set if it is present.
     * Upon return this set will be empty if it did not contain s, or
@ -1238,10 +1237,9 @@ public:
     *
     * @param s the source string
     * @return this object, for chaining
-     * @draft ICU 69
+     * @stable ICU 69
     */
    UnicodeSet& retain(const UnicodeString &s);
-#endif  // U_HIDE_DRAFT_API

    /**
     * Removes the specified range from this set if it is present.
--- a/thirdparty/icu4c/common/unicode/urename.h
+++ b/thirdparty/icu4c/common/unicode/urename.h
@ -567,6 +567,7 @@
 #define ucase_addStringCaseClosure U_ICU_ENTRY_POINT_RENAME(ucase_addStringCaseClosure)
 #define ucase_fold U_ICU_ENTRY_POINT_RENAME(ucase_fold)
 #define ucase_getCaseLocale U_ICU_ENTRY_POINT_RENAME(ucase_getCaseLocale)
+#define ucase_getSingleton U_ICU_ENTRY_POINT_RENAME(ucase_getSingleton)
 #define ucase_getTrie U_ICU_ENTRY_POINT_RENAME(ucase_getTrie)
 #define ucase_getType U_ICU_ENTRY_POINT_RENAME(ucase_getType)
 #define ucase_getTypeOrIgnorable U_ICU_ENTRY_POINT_RENAME(ucase_getTypeOrIgnorable)
@ -630,6 +631,7 @@
 #define ucnv_cbFromUWriteUChars U_ICU_ENTRY_POINT_RENAME(ucnv_cbFromUWriteUChars)
 #define ucnv_cbToUWriteSub U_ICU_ENTRY_POINT_RENAME(ucnv_cbToUWriteSub)
 #define ucnv_cbToUWriteUChars U_ICU_ENTRY_POINT_RENAME(ucnv_cbToUWriteUChars)
+#define ucnv_clone U_ICU_ENTRY_POINT_RENAME(ucnv_clone)
 #define ucnv_close U_ICU_ENTRY_POINT_RENAME(ucnv_close)
 #define ucnv_compareNames U_ICU_ENTRY_POINT_RENAME(ucnv_compareNames)
 #define ucnv_convert U_ICU_ENTRY_POINT_RENAME(ucnv_convert)
@ -725,6 +727,7 @@
 #define ucnvsel_selectForString U_ICU_ENTRY_POINT_RENAME(ucnvsel_selectForString)
 #define ucnvsel_selectForUTF8 U_ICU_ENTRY_POINT_RENAME(ucnvsel_selectForUTF8)
 #define ucnvsel_serialize U_ICU_ENTRY_POINT_RENAME(ucnvsel_serialize)
+#define ucol_clone U_ICU_ENTRY_POINT_RENAME(ucol_clone)
 #define ucol_cloneBinary U_ICU_ENTRY_POINT_RENAME(ucol_cloneBinary)
 #define ucol_close U_ICU_ENTRY_POINT_RENAME(ucol_close)
 #define ucol_closeElements U_ICU_ENTRY_POINT_RENAME(ucol_closeElements)
@ -904,6 +907,7 @@
 #define udatpg_getBestPattern U_ICU_ENTRY_POINT_RENAME(udatpg_getBestPattern)
 #define udatpg_getBestPatternWithOptions U_ICU_ENTRY_POINT_RENAME(udatpg_getBestPatternWithOptions)
 #define udatpg_getDateTimeFormat U_ICU_ENTRY_POINT_RENAME(udatpg_getDateTimeFormat)
+#define udatpg_getDateTimeFormatForStyle U_ICU_ENTRY_POINT_RENAME(udatpg_getDateTimeFormatForStyle)
 #define udatpg_getDecimal U_ICU_ENTRY_POINT_RENAME(udatpg_getDecimal)
 #define udatpg_getDefaultHourCycle U_ICU_ENTRY_POINT_RENAME(udatpg_getDefaultHourCycle)
 #define udatpg_getFieldDisplayName U_ICU_ENTRY_POINT_RENAME(udatpg_getFieldDisplayName)
@ -918,6 +922,7 @@
 #define udatpg_setAppendItemFormat U_ICU_ENTRY_POINT_RENAME(udatpg_setAppendItemFormat)
 #define udatpg_setAppendItemName U_ICU_ENTRY_POINT_RENAME(udatpg_setAppendItemName)
 #define udatpg_setDateTimeFormat U_ICU_ENTRY_POINT_RENAME(udatpg_setDateTimeFormat)
+#define udatpg_setDateTimeFormatForStyle U_ICU_ENTRY_POINT_RENAME(udatpg_setDateTimeFormatForStyle)
 #define udatpg_setDecimal U_ICU_ENTRY_POINT_RENAME(udatpg_setDecimal)
 #define udict_swap U_ICU_ENTRY_POINT_RENAME(udict_swap)
 #define udtitvfmt_close U_ICU_ENTRY_POINT_RENAME(udtitvfmt_close)
--- a/thirdparty/icu4c/common/unicode/uset.h
+++ b/thirdparty/icu4c/common/unicode/uset.h
@ -628,7 +628,6 @@ uset_removeRange(USet* set, UChar32 start, UChar32 end);
 U_CAPI void U_EXPORT2
 uset_removeString(USet* set, const UChar* str, int32_t strLen);

-#ifndef U_HIDE_DRAFT_API
 /**
 * Removes EACH of the characters in this string. Note: "ch" == {"c", "h"}
 * A frozen set will not be modified.
@ -636,11 +635,10 @@ uset_removeString(USet* set, const UChar* str, int32_t strLen);
 * @param set the object to be modified
 * @param str the string
 * @param length the length of the string, or -1 if NUL-terminated
- * @draft ICU 69
+ * @stable ICU 69
 */
 U_CAPI void U_EXPORT2
 uset_removeAllCodePoints(USet *set, const UChar *str, int32_t length);
-#endif  // U_HIDE_DRAFT_API

 /**
 * Removes from this set all of its elements that are contained in the
@ -671,7 +669,6 @@ uset_removeAll(USet* set, const USet* removeSet);
 U_CAPI void U_EXPORT2
 uset_retain(USet* set, UChar32 start, UChar32 end);

-#ifndef U_HIDE_DRAFT_API
 /**
 * Retains only the specified string from this set if it is present.
 * Upon return this set will be empty if it did not contain s, or
@ -681,7 +678,7 @@ uset_retain(USet* set, UChar32 start, UChar32 end);
 * @param set the object to be modified
 * @param str the string
 * @param length the length of the string, or -1 if NUL-terminated
- * @draft ICU 69
+ * @stable ICU 69
 */
 U_CAPI void U_EXPORT2
 uset_retainString(USet *set, const UChar *str, int32_t length);
@ -693,11 +690,10 @@ uset_retainString(USet *set, const UChar *str, int32_t length);
 * @param set the object to be modified
 * @param str the string
 * @param length the length of the string, or -1 if NUL-terminated
- * @draft ICU 69
+ * @stable ICU 69
 */
 U_CAPI void U_EXPORT2
 uset_retainAllCodePoints(USet *set, const UChar *str, int32_t length);
-#endif  // U_HIDE_DRAFT_API

 /**
 * Retains only the elements in this set that are contained in the
@ -741,7 +737,6 @@ uset_compact(USet* set);
 U_CAPI void U_EXPORT2
 uset_complement(USet* set);

-#ifndef U_HIDE_DRAFT_API
 /**
 * Complements the specified range in this set.  Any character in
 * the range will be removed if it is in this set, or will be
@ -753,7 +748,7 @@ uset_complement(USet* set);
 * @param set the object to be modified
 * @param start first character, inclusive, of range
 * @param end last character, inclusive, of range
- * @draft ICU 69
+ * @stable ICU 69
 */
 U_CAPI void U_EXPORT2
 uset_complementRange(USet *set, UChar32 start, UChar32 end);
@ -766,7 +761,7 @@ uset_complementRange(USet *set, UChar32 start, UChar32 end);
 * @param set the object to be modified
 * @param str the string
 * @param length the length of the string, or -1 if NUL-terminated
- * @draft ICU 69
+ * @stable ICU 69
 */
 U_CAPI void U_EXPORT2
 uset_complementString(USet *set, const UChar *str, int32_t length);
@ -778,11 +773,10 @@ uset_complementString(USet *set, const UChar *str, int32_t length);
 * @param set the object to be modified
 * @param str the string
 * @param length the length of the string, or -1 if NUL-terminated
- * @draft ICU 69
+ * @stable ICU 69
 */
 U_CAPI void U_EXPORT2
 uset_complementAllCodePoints(USet *set, const UChar *str, int32_t length);
-#endif  // U_HIDE_DRAFT_API

 /**
 * Complements in this set all elements contained in the specified
--- a/thirdparty/icu4c/common/unicode/uvernum.h
+++ b/thirdparty/icu4c/common/unicode/uvernum.h
@ -60,7 +60,7 @@
 *  This value will change in the subsequent releases of ICU
 *  @stable ICU 2.4
 */
-#define U_ICU_VERSION_MAJOR_NUM 70
+#define U_ICU_VERSION_MAJOR_NUM 71

 /** The current ICU minor version as an integer.
 *  This value will change in the subsequent releases of ICU
@ -86,7 +86,7 @@
 *  This value will change in the subsequent releases of ICU
 *  @stable ICU 2.6
 */
-#define U_ICU_VERSION_SUFFIX _70
+#define U_ICU_VERSION_SUFFIX _71

 /**
 * \def U_DEF2_ICU_ENTRY_POINT_RENAME
@ -139,7 +139,7 @@
 *  This value will change in the subsequent releases of ICU
 *  @stable ICU 2.4
 */
-#define U_ICU_VERSION "70.1"
+#define U_ICU_VERSION "71.1"

 /**
 * The current ICU library major version number as a string, for library name suffixes.
@ -152,13 +152,13 @@
 *
 * @stable ICU 2.6
 */
-#define U_ICU_VERSION_SHORT "70"
+#define U_ICU_VERSION_SHORT "71"

 #ifndef U_HIDE_INTERNAL_API
 /** Data version in ICU4C.
 * @internal ICU 4.4 Internal Use Only
 **/
-#define U_ICU_DATA_VERSION "70.1"
+#define U_ICU_DATA_VERSION "71.1"
 #endif  /* U_HIDE_INTERNAL_API */

 /*===========================================================================
--- a/thirdparty/icu4c/common/unistr.cpp
+++ b/thirdparty/icu4c/common/unistr.cpp
@ -334,7 +334,8 @@ Replaceable::clone() const {
 // UnicodeString overrides clone() with a real implementation
 UnicodeString *
 UnicodeString::clone() const {
-  return new UnicodeString(*this);
+  LocalPointer<UnicodeString> clonedString(new UnicodeString(*this));
+  return clonedString.isValid() && !clonedString->isBogus() ? clonedString.orphan() : nullptr;
 }

 //========================================
@ -1976,7 +1977,12 @@ The vector deleting destructor is already a part of UObject,
 but defining it here makes sure that it is included with this object file.
 This makes sure that static library dependencies are kept to a minimum.
 */
+#if defined(__clang__) || U_GCC_MAJOR_MINOR >= 1100
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wunused-function"
 static void uprv_UnicodeStringDummy(void) {
    delete [] (new UnicodeString[2]);
 }
+#pragma GCC diagnostic pop
+#endif
 #endif
--- a/thirdparty/icu4c/common/ustrcase.cpp
+++ b/thirdparty/icu4c/common/ustrcase.cpp
@ -36,6 +36,12 @@
 #include "ustr_imp.h"
 #include "uassert.h"

+/**
+ * Code point for COMBINING ACUTE ACCENT
+ * @internal
+ */
+#define ACUTE u'\u0301'
+
 U_NAMESPACE_BEGIN

 namespace {
@ -396,6 +402,94 @@ U_NAMESPACE_USE

 #if !UCONFIG_NO_BREAK_ITERATION

+namespace {
+
+/**
+ * Input: c is a letter I with or without acute accent.
+ * start is the index in src after c, and is less than segmentLimit.
+ * If a plain i/I is followed by a plain j/J,
+ * or an i/I with acute (precomposed or decomposed) is followed by a j/J with acute,
+ * then we output accordingly.
+ *
+ * @return the src index after the titlecased sequence, or the start index if no Dutch IJ
+ */
+int32_t maybeTitleDutchIJ(const UChar *src, UChar32 c, int32_t start, int32_t segmentLimit,
+                          UChar *dest, int32_t &destIndex, int32_t destCapacity, uint32_t options,
+                          icu::Edits *edits) {
+    U_ASSERT(start < segmentLimit);
+
+    int32_t index = start;
+    bool withAcute = false;
+
+    // If the conditions are met, then the following variables tell us what to output.
+    int32_t unchanged1 = 0;  // code units before the j, or the whole sequence (0..3)
+    bool doTitleJ = false;  // true if the j needs to be titlecased
+    int32_t unchanged2 = 0;  // after the j (0 or 1)
+
+    // next character after the first letter
+    UChar c2 = src[index++];
+
+    // Is the first letter an i/I with accent?
+    if (c == u'I') {
+        if (c2 == ACUTE) {
+            withAcute = true;
+            unchanged1 = 1;
+            if (index == segmentLimit) { return start; }
+            c2 = src[index++];
+        }
+    } else {  // Í
+        withAcute = true;
+    }
+
+    // Is the next character a j/J?
+    if (c2 == u'j') {
+        doTitleJ = true;
+    } else if (c2 == u'J') {
+        ++unchanged1;
+    } else {
+        return start;
+    }
+
+    // A plain i/I must be followed by a plain j/J.
+    // An i/I with acute must be followed by a j/J with acute.
+    if (withAcute) {
+        if (index == segmentLimit || src[index++] != ACUTE) { return start; }
+        if (doTitleJ) {
+            unchanged2 = 1;
+        } else {
+            ++unchanged1;
+        }
+    }
+
+    // There must not be another combining mark.
+    if (index < segmentLimit) {
+        int32_t cp;
+        int32_t i = index;
+        U16_NEXT(src, i, segmentLimit, cp);
+        uint32_t typeMask = U_GET_GC_MASK(cp);
+        if ((typeMask & U_GC_M_MASK) != 0) {
+            return start;
+        }
+    }
+
+    // Output the rest of the Dutch IJ.
+    destIndex = appendUnchanged(dest, destIndex, destCapacity, src + start, unchanged1, options, edits);
+    start += unchanged1;
+    if (doTitleJ) {
+        destIndex = appendUChar(dest, destIndex, destCapacity, u'J');
+        if (edits != nullptr) {
+            edits->addReplace(1, 1);
+        }
+        ++start;
+    }
+    destIndex = appendUnchanged(dest, destIndex, destCapacity, src + start, unchanged2, options, edits);
+
+    U_ASSERT(start + unchanged2 == index);
+    return index;
+}
+
+}  // namespace
+
 U_CFUNC int32_t U_CALLCONV
 ustrcase_internalToTitle(int32_t caseLocale, uint32_t options, BreakIterator *iter,
                         UChar *dest, int32_t destCapacity,
@ -412,14 +506,14 @@ ustrcase_internalToTitle(int32_t caseLocale, uint32_t options, BreakIterator *it
    csc.limit=srcLength;
    int32_t destIndex=0;
    int32_t prev=0;
-    UBool isFirstIndex=TRUE;
+    bool isFirstIndex=true;

    /* titlecasing loop */
    while(prev<srcLength) {
        /* find next index where to titlecase */
        int32_t index;
        if(isFirstIndex) {
-            isFirstIndex=FALSE;
+            isFirstIndex=false;
            index=iter->first();
        } else {
            index=iter->next();
@ -446,7 +540,7 @@ ustrcase_internalToTitle(int32_t caseLocale, uint32_t options, BreakIterator *it
                // Stop with titleStart<titleLimit<=index
                // if there is a character to be titlecased,
                // or else stop with titleStart==titleLimit==index.
-                UBool toCased = (options&U_TITLECASE_ADJUST_TO_CASED) != 0;
+                bool toCased = (options&U_TITLECASE_ADJUST_TO_CASED) != 0;
                while (toCased ? UCASE_NONE==ucase_getType(c) : !ustrcase_isLNS(c)) {
                    titleStart=titleLimit;
                    if(titleLimit==index) {
@ -479,27 +573,15 @@ ustrcase_internalToTitle(int32_t caseLocale, uint32_t options, BreakIterator *it

                /* Special case Dutch IJ titlecasing */
                if (titleStart+1 < index &&
-                        caseLocale == UCASE_LOC_DUTCH &&
-                        (src[titleStart] == 0x0049 || src[titleStart] == 0x0069)) {
-                    if (src[titleStart+1] == 0x006A) {
-                        destIndex=appendUChar(dest, destIndex, destCapacity, 0x004A);
-                        if(destIndex<0) {
-                            errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
-                            return 0;
-                        }
-                        if(edits!=NULL) {
-                            edits->addReplace(1, 1);
-                        }
-                        titleLimit++;
-                    } else if (src[titleStart+1] == 0x004A) {
-                        // Keep the capital J from getting lowercased.
-                        destIndex=appendUnchanged(dest, destIndex, destCapacity,
-                                                  src+titleStart+1, 1, options, edits);
-                        if(destIndex<0) {
-                            errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
-                            return 0;
-                        }
-                        titleLimit++;
+                        caseLocale == UCASE_LOC_DUTCH) {
+                    if (c < 0) {
+                        c = ~c;
+                    }
+
+                    if (c == u'I' || c == u'Í') {
+                        titleLimit = maybeTitleDutchIJ(src, c, titleStart + 1, index, 
+                                                       dest, destIndex, destCapacity, options, 
+                                                       edits);
                    }
                }

--- a/thirdparty/icu4c/common/uvector.cpp
+++ b/thirdparty/icu4c/common/uvector.cpp
@ -99,14 +99,6 @@ bool UVector::operator==(const UVector& other) const {
    return true;
 }

-// TODO: delete this function once all call sites have been migrated to the
-//       new addElement().
-void UVector::addElementX(void* obj, UErrorCode &status) {
-    if (ensureCapacityX(count + 1, status)) {
-        elements[count++].pointer = obj;
-    }
-}
-
 void UVector::addElement(void* obj, UErrorCode &status) {
    U_ASSERT(deleter == nullptr);
    if (ensureCapacity(count + 1, status)) {
@ -331,38 +323,6 @@ int32_t UVector::indexOf(UElement key, int32_t startIndex, int8_t hint) const {
    return -1;
 }

-UBool UVector::ensureCapacityX(int32_t minimumCapacity, UErrorCode &status) {
-    if (minimumCapacity < 0) {
-        status = U_ILLEGAL_ARGUMENT_ERROR;
-        return FALSE;
-	}
-    if (capacity < minimumCapacity) {
-        if (capacity > (INT32_MAX - 1) / 2) {        	// integer overflow check
-        	status = U_ILLEGAL_ARGUMENT_ERROR;
-        	return FALSE;
-        }
-        int32_t newCap = capacity * 2;
-        if (newCap < minimumCapacity) {
-            newCap = minimumCapacity;
-        }
-        if (newCap > (int32_t)(INT32_MAX / sizeof(UElement))) {	// integer overflow check
-        	// We keep the original memory contents on bad minimumCapacity.
-        	status = U_ILLEGAL_ARGUMENT_ERROR;
-        	return FALSE;
-        }
-        UElement* newElems = (UElement *)uprv_realloc(elements, sizeof(UElement)*newCap);
-        if (newElems == nullptr) {
-            // We keep the original contents on the memory failure on realloc or bad minimumCapacity.
-            status = U_MEMORY_ALLOCATION_ERROR;
-            return FALSE;
-        }
-        elements = newElems;
-        capacity = newCap;
-    }
-    return TRUE;
-}
-
-
 UBool UVector::ensureCapacity(int32_t minimumCapacity, UErrorCode &status) {
    if (U_FAILURE(status)) {
        return false;
@ -370,7 +330,7 @@ UBool UVector::ensureCapacity(int32_t minimumCapacity, UErrorCode &status) {
    if (minimumCapacity < 0) {
        status = U_ILLEGAL_ARGUMENT_ERROR;
        return false;
-	}
+    }
    if (capacity < minimumCapacity) {
        if (capacity > (INT32_MAX - 1) / 2) {        	// integer overflow check
            status = U_ILLEGAL_ARGUMENT_ERROR;
@ -396,6 +356,7 @@ UBool UVector::ensureCapacity(int32_t minimumCapacity, UErrorCode &status) {
    }
    return true;
 }
+
 /**
 * Change the size of this vector as follows: If newSize is smaller,
 * then truncate the array, possibly deleting held elements for i >=
--- a/thirdparty/icu4c/common/uvector.h
+++ b/thirdparty/icu4c/common/uvector.h
@ -123,12 +123,6 @@ public:
    // java.util.Vector API
    //------------------------------------------------------------

-    /*
-     * Old version of addElement, with non-standard error handling.
-     * Will be removed once all uses have been switched to the new addElement().
-     */
-    void addElementX(void* obj, UErrorCode &status);
-
    /**
     * Add an element at the end of the vector.
     * For use only with vectors that do not adopt their elements, which is to say,
@ -197,12 +191,6 @@ public:

    inline UBool isEmpty(void) const {return count == 0;}

-    /*
-     * Old version of ensureCapacity, with non-standard error handling.
-     * Will be removed once all uses have been switched to the new ensureCapacity().
-     */
-    UBool ensureCapacityX(int32_t minimumCapacity, UErrorCode &status);
-
    UBool ensureCapacity(int32_t minimumCapacity, UErrorCode &status);

    /**
--- a/thirdparty/icu4c/common/uvectr32.cpp
+++ b/thirdparty/icu4c/common/uvectr32.cpp
@ -83,7 +83,7 @@ void UVector32::assign(const UVector32& other, UErrorCode &ec) {
 }


-bool UVector32::operator==(const UVector32& other) {
+bool UVector32::operator==(const UVector32& other) const {
    int32_t i;
    if (count != other.count) return false;
    for (i=0; i<count; ++i) {
--- a/thirdparty/icu4c/common/uvectr32.h
+++ b/thirdparty/icu4c/common/uvectr32.h
@ -86,12 +86,12 @@ public:
     * equal if they are of the same size and all elements are equal,
     * as compared using this object's comparer.
     */
-    bool operator==(const UVector32& other);
+    bool operator==(const UVector32& other) const;

    /**
     * Equivalent to !operator==()
     */
-    inline bool operator!=(const UVector32& other);
+    inline bool operator!=(const UVector32& other) const;

    //------------------------------------------------------------
    // java.util.Vector API
@ -268,7 +268,7 @@ inline int32_t UVector32::lastElementi(void) const {
    return elementAti(count-1);
 }

-inline bool UVector32::operator!=(const UVector32& other) {
+inline bool UVector32::operator!=(const UVector32& other) const {
    return !operator==(other);
 }