normalizer2impl.cpp - OpenGrok cross reference for /third_party/icu/icu4c/source/common/normalizer2impl.cpp

Lines Matching +full:p +full:- +full:limit
6 *   Copyright (C) 2009-2014, International Business Machines
11 *   encoding:   UTF-8
50  * UTF-8 lead byte for minNoMaybeCP.
66  * Returns the code point from one single well-formed UTF-8 byte sequence
69  * Trie UTF-8 macros do not assemble whole code points (for efficiency).
71  * We should not need it for normalization-inert data (norm16==0).
72  * Illegal sequences yield the error value norm16==0 just like real normalization-inert code points.
78     switch(cpLimit-cpStart) {  in codePointFromValidUTF8()
94  * Returns the last code point in [start, p[ if it is valid and in U+1000..U+D7FF.
97 UChar32 previousHangulOrJamo(const uint8_t *start, const uint8_t *p) {  in previousHangulOrJamo()  argument
98     if ((p - start) >= 3) {  in previousHangulOrJamo()
99         p -= 3;  in previousHangulOrJamo()
100         uint8_t l = *p;  in previousHangulOrJamo()
103                 (t1 = (uint8_t)(p[1] - 0x80)) <= 0x3f &&  in previousHangulOrJamo()
104                 (t2 = (uint8_t)(p[2] - 0x80)) <= 0x3f &&  in previousHangulOrJamo()
113  * Returns the offset from the Jamo T base if [src, limit[ starts with a single Jamo T code point.
116 int32_t getJamoTMinusBase(const uint8_t *src, const uint8_t *limit) {  in getJamoTMinusBase()  argument
118     if ((limit - src) >= 3 && *src == 0xe1) {  in getJamoTMinusBase()
124                 return t - 0xa7;  in getJamoTMinusBase()
129                 return t - (0xa7 - 0x40);  in getJamoTMinusBase()
133     return -1;  in getJamoTMinusBase()
141     int32_t cpLength = (int32_t)(cpLimit - cpStart);  in appendCodePointDelta()
147         int32_t trail = *(cpLimit-1) + delta;  in appendCodePointDelta()
150             --cpLimit;  in appendCodePointDelta()
155             // Decode the code point, add the delta, re-encode.  in appendCodePointDelta()
162         edits->addReplace(cpLength, length);  in appendCodePointDelta()
169 // ReorderingBuffer -------------------------------------------------------- ***
174         start(str.getBuffer(8)), reorderStart(start), limit(start),  in ReorderingBuffer()
190     limit=start+length;  in init()
191     remainingCapacity=str.getCapacity()-length;  in init()
193     if(start==limit) {  in init()
208     int32_t length=(int32_t)(limit-start);  in equals()
210         length==(int32_t)(otherLimit-otherStart) &&  in equals()
215     U_ASSERT((otherLimit - otherStart) <= INT32_MAX);  // ensured by caller  in equals()
216     int32_t length = (int32_t)(limit - start);  in equals()
217     int32_t otherLength = (int32_t)(otherLimit - otherStart);  in equals()
218     // For equal strings, UTF-8 is at least as long as UTF-16, and at most three times as long.  in equals()
223     // (Invalid sequences are normalization-inert.)  in equals()
245         limit[0]=U16_LEAD(c);  in appendSupplementary()
246         limit[1]=U16_TRAIL(c);  in appendSupplementary()
247         limit+=2;  in appendSupplementary()
250             reorderStart=limit;  in appendSupplementary()
255     remainingCapacity-=2;  in appendSupplementary()
268     remainingCapacity-=length;  in append()
271             reorderStart=limit+length;  in append()
273             reorderStart=limit+1;  // Ok if not a code point boundary.  in append()
276         do { *limit++=*s++; } while(s!=sLimit);  in append()
305     remainingCapacity-=cpLength;  in appendZeroCC()
307         *limit++=(UChar)c;  in appendZeroCC()
309         limit[0]=U16_LEAD(c);  in appendZeroCC()
310         limit[1]=U16_TRAIL(c);  in appendZeroCC()
311         limit+=2;  in appendZeroCC()
314     reorderStart=limit;  in appendZeroCC()
322     int32_t length=(int32_t)(sLimit-s);  in appendZeroCC()
326     u_memcpy(limit, s, length);  in appendZeroCC()
327     limit+=length;  in appendZeroCC()
328     remainingCapacity-=length;  in appendZeroCC()
330     reorderStart=limit;  in appendZeroCC()
335     reorderStart=limit=start;  in remove()
341     if(suffixLength<(limit-start)) {  in removeSuffix()
342         limit-=suffixLength;  in removeSuffix()
345         limit=start;  in removeSuffix()
349     reorderStart=limit;  in removeSuffix()
353     int32_t reorderStartIndex=(int32_t)(reorderStart-start);  in resize()
354     int32_t length=(int32_t)(limit-start);  in resize()
371     limit=start+length;  in resize()
372     remainingCapacity=str.getCapacity()-length;  in resize()
378     UChar c=*--codePointStart;  in skipPrevious()
379     if(U16_IS_TRAIL(c) && start<codePointStart && U16_IS_LEAD(*(codePointStart-1))) {  in skipPrevious()
380         --codePointStart;  in skipPrevious()
389     UChar32 c=*--codePointStart;  in previousCC()
391     if(U16_IS_TRAIL(c) && start<codePointStart && U16_IS_LEAD(c2=*(codePointStart-1))) {  in previousCC()
392         --codePointStart;  in previousCC()
399 // Requires 0<cc<lastCC which implies reorderStart<limit.
403     UChar *q=limit;  in insert()
404     UChar *r=limit+=U16_LENGTH(c);  in insert()
406         *--r=*--q;  in insert()
414 // Normalizer2Impl --------------------------------------------------------- ***
444     U_ASSERT((minMaybeYes & 7) == 0);  // 8-aligned for noNoDelta bit fields  in init()
445     centerNoNoDelta = (minMaybeYes >> DELTA_SHIFT) - MAX_DELTA - 1;  in init()
450     extraData=maybeYesCompositions+((MIN_NORMAL_MAYBE_YES-minMaybeYes)>>OFFSET_SHIFT);  in init()
483     // Add the start code point of each same-value range of the trie.  in addPropertyStarts()
488         sa->add(sa->set, start);  in addPropertyStarts()
491             // Range of code points with same-norm16-value algorithmic decompositions.  in addPropertyStarts()
492             // They might have different non-zero FCD16 values.  in addPropertyStarts()
497                     sa->add(sa->set, start);  in addPropertyStarts()
507         sa->add(sa->set, c);  in addPropertyStarts()
508         sa->add(sa->set, c+1);  in addPropertyStarts()
510     sa->add(sa->set, Hangul::HANGUL_LIMIT); /* add Hangul+1 to continue with other properties */  in addPropertyStarts()
515     // Add the start code point of each same-value range of the canonical iterator data trie.  in addCanonIterPropertyStarts()
520     while ((end = ucptrie_getRange(fCanonIterData->trie, start, UCPMAP_RANGE_NORMAL, 0,  in addCanonIterPropertyStarts()
522         sa->add(sa->set, start);  in addCanonIterPropertyStarts()
532     // Make some effort to support NUL-terminated strings reasonably.  in copyLowPrefixFromNulTerminated()
542     if(--src!=prevSrc) {  in copyLowPrefixFromNulTerminated()
544             buffer->appendZeroCC(prevSrc, src, errorCode);  in copyLowPrefixFromNulTerminated()
568 Normalizer2Impl::decompose(const UChar *src, const UChar *limit,  in decompose()  argument
572     if(destLengthEstimate<0 && limit!=NULL) {  in decompose()
573         destLengthEstimate=(int32_t)(limit-src);  in decompose()
578         decompose(src, limit, &buffer, errorCode);  in decompose()
586 Normalizer2Impl::decompose(const UChar *src, const UChar *limit,  in decompose()  argument
590     if(limit==NULL) {  in decompose()
595         limit=u_strchr(src, 0);  in decompose()
608         for(prevSrc=src; src!=limit;) {  in decompose()
617                 if((src+1)!=limit && U16_IS_TRAIL(c2=src[1])) {  in decompose()
633                 if(!buffer->appendZeroCC(prevSrc, src, errorCode)) {  in decompose()
641         if(src==limit) {  in decompose()
645         // Check one above-minimum, relevant code point.  in decompose()
673 Normalizer2Impl::decomposeShort(const UChar *src, const UChar *limit,  in decomposeShort()  argument
679     while(src<limit) {  in decomposeShort()
686         UCPTRIE_FAST_U16_NEXT(normTrie, UCPTRIE_16, src, limit, c, norm16);  in decomposeShort()
720     // c decomposes, get everything from the variable-length extra data  in decompose()
727         leadCC=(uint8_t)(*(mapping-1)>>8);  in decompose()
739                                const uint8_t *src, const uint8_t *limit,  in decomposeUTF8()  argument
741     U_ASSERT(limit != nullptr);  in decomposeUTF8()
757             if (src == limit) {  in decomposeUTF8()
758                 if (prevBoundary != limit && sink != nullptr) {  in decomposeUTF8()
759                     ByteSinkUtil::appendUnchanged(prevBoundary, limit,  in decomposeUTF8()
768                 UCPTRIE_FAST_U8_NEXT(normTrie, UCPTRIE_16, src, limit, norm16);  in decomposeUTF8()
789         // Medium-fast path: Quick check.  in decomposeUTF8()
827             src = decomposeShort(src, limit, STOP_AT_DECOMP_BOUNDARY, false /* onlyContiguous */,  in decomposeUTF8()
833         if ((src - prevSrc) > INT32_MAX) {  // guard before buffer.equals()  in decomposeUTF8()
857 Normalizer2Impl::decomposeShort(const uint8_t *src, const uint8_t *limit,  in decomposeShort()  argument
863     while (src < limit) {  in decomposeShort()
866         UCPTRIE_FAST_U8_NEXT(normTrie, UCPTRIE_16, src, limit, norm16);  in decomposeShort()
895         // norm16!=INERT guarantees that [prevSrc, src[ is valid UTF-8.  in decomposeShort()
896         // We do not see invalid UTF-8 here because  in decomposeShort()
897         // its norm16==INERT is normalization-inert,  in decomposeShort()
899         // and we stop the slow path where invalid UTF-8 begins.  in decomposeShort()
920             // The character decomposes, get everything from the variable-length extra data.  in decomposeShort()
927                 leadCC = (uint8_t)(*(mapping-1) >> 8);  in decomposeShort()
970     // c decomposes, get everything from the variable-length extra data  in getDecomposition()
976 // The capacity of the buffer must be 30=MAPPING_LENGTH_MASK-1
997     // c decomposes, get everything from the variable-length extra data  in getRawDecomposition()
1004         const uint16_t *rawMapping=mapping-((firstUnit>>7)&1)-1;  in getRawDecomposition()
1008             return (const UChar *)rawMapping-rm0;  in getRawDecomposition()
1012             u_memcpy(buffer+1, (const UChar *)mapping+1+2, mLength-2);  in getRawDecomposition()
1013             length=mLength-1;  in getRawDecomposition()
1022 void Normalizer2Impl::decomposeAndAppend(const UChar *src, const UChar *limit,  in decomposeAndAppend()  argument
1029         decompose(src, limit, &buffer, errorCode);  in decomposeAndAppend()
1035     const UChar *p = src;  in decomposeAndAppend()  local
1036     while (p != limit) {  in decomposeAndAppend()
1037         const UChar *codePointStart = p;  in decomposeAndAppend()
1040         UCPTRIE_FAST_U16_NEXT(normTrie, UCPTRIE_16, p, limit, c, norm16);  in decomposeAndAppend()
1042             p = codePointStart;  in decomposeAndAppend()
1051     if(limit==NULL) {  // appendZeroCC() needs limit!=NULL  in decomposeAndAppend()
1052         limit=u_strchr(p, 0);  in decomposeAndAppend()
1055     if (buffer.append(src, (int32_t)(p - src), false, firstCC, prevCC, errorCode)) {  in decomposeAndAppend()
1056         buffer.appendZeroCC(p, limit, errorCode);  in decomposeAndAppend()
1072     // c decomposes, get everything from the variable-length extra data  in norm16HasDecompBoundaryBefore()
1076     return (firstUnit&MAPPING_HAS_CCC_LCCC_WORD)==0 || (*(mapping-1)&0xff00)==0;  in norm16HasDecompBoundaryBefore()
1100     // c decomposes, get everything from the variable-length extra data  in norm16HasDecompBoundaryAfter()
1103     // decomp after-boundary: same as hasFCDBoundaryAfter(),  in norm16HasDecompBoundaryAfter()
1111     // if(trailCC==1) test leadCC==0, same as checking for before-boundary  in norm16HasDecompBoundaryAfter()
1113     return (firstUnit&MAPPING_HAS_CCC_LCCC_WORD)==0 || (*(mapping-1)&0xff00)==0;  in norm16HasDecompBoundaryAfter()
1118  * a forward-combining "lead" character,
1120  * and a backward-combining "trail" character.
1125  * Bit      0  set if the composite is a forward-combining starter
1126  * otherwise it returns -1.
1129  * encoded as either pairs or triples of 16-bit units.
1182     return -1;  in combine()
1211  * (which is in NFD - decomposed and canonically ordered),
1221     UChar *p=buffer.getStart()+recomposeStartIndex;  in recompose()  local
1222     UChar *limit=buffer.getLimit();  in recompose()  local
1223     if(p==limit) {  in recompose()
1234     // Some of the following variables are not used until we have a forward-combining starter  in recompose()
1236     compositionsList=NULL;  // used as indicator for whether we have a forward-combining starter  in recompose()
1242         UCPTRIE_FAST_U16_NEXT(normTrie, UCPTRIE_16, p, limit, c, norm16);  in recompose()
1248             // the backward-combining character is not blocked  in recompose()
1255                     UChar prev=(UChar)(*starter-Hangul::JAMO_L_BASE);  in recompose()
1257                         pRemove=p-1;  in recompose()
1260                              (prev*Hangul::JAMO_V_COUNT+(c-Hangul::JAMO_V_BASE))*  in recompose()
1263                         if(p!=limit && (t=(UChar)(*p-Hangul::JAMO_T_BASE))<Hangul::JAMO_T_COUNT) {  in recompose()
1264                             ++p;  in recompose()
1270                         r=p;  in recompose()
1271                         while(r<limit) {  in recompose()
1274                         limit=q;  in recompose()
1275                         p=pRemove;  in recompose()
1284                 if(p==limit) {  in recompose()
1294                 pRemove=p-U16_LENGTH(c);  // pRemove & p: start & limit of the combining mark  in recompose()
1310                         --pRemove;  in recompose()
1320                         *--r=*--q;  in recompose()
1323                     *--starter=U16_LEAD(composite);  // undo the temporary increment  in recompose()
1330                 if(pRemove<p) {  in recompose()
1332                     r=p;  in recompose()
1333                     while(r<limit) {  in recompose()
1336                     limit=q;  in recompose()
1337                     p=pRemove;  in recompose()
1341                 if(p==limit) {  in recompose()
1359         if(p==limit) {  in recompose()
1370                     starter=p-1;  in recompose()
1373                     starter=p-2;  in recompose()
1381     buffer.setReorderingLimit(limit);  in recompose()
1386     uint16_t norm16=getNorm16(a);  // maps an out-of-range 'a' to inert norm16  in composePair()
1393             b-=Hangul::JAMO_V_BASE;  in composePair()
1397                      ((a-Hangul::JAMO_L_BASE)*Hangul::JAMO_V_COUNT+b)*  in composePair()
1403             b-=Hangul::JAMO_T_BASE;  in composePair()
1438 Normalizer2Impl::compose(const UChar *src, const UChar *limit,  in compose()  argument
1445     if(limit==NULL) {  in compose()
1452         limit=u_strchr(src, 0);  in compose()
1454             if (hasCompBoundaryAfter(*(src-1), onlyContiguous)) {  in compose()
1458                 prevBoundary = --src;  in compose()
1470             if (src == limit) {  in compose()
1471                 if (prevBoundary != limit && doCompose) {  in compose()
1472                     buffer.appendZeroCC(prevBoundary, limit, errorCode);  in compose()
1486                     if(src!=limit && U16_IS_TRAIL(c2=*src)) {  in compose()
1503         // Medium-fast path: Handle cases that do not require full decomposition and recomposition.  in compose()
1514                         hasCompBoundaryBefore(src, limit)) {  in compose()
1525                 // The mapping is comp-normalized which also implies hasCompBoundaryBefore.  in compose()
1527                         hasCompBoundaryBefore(src, limit)) {  in compose()
1543                 if (hasCompBoundaryBefore(src, limit) ||  in compose()
1555             UChar prev=*(prevSrc-1);  in compose()
1559                 UChar l = (UChar)(prev-Hangul::JAMO_L_BASE);  in compose()
1565                     if (src != limit &&  in compose()
1566                             0 < (t = ((int32_t)*src - Hangul::JAMO_T_BASE)) &&  in compose()
1570                     } else if (hasCompBoundaryBefore(src, limit)) {  in compose()
1574                         t = -1;  in compose()
1578                             (l*Hangul::JAMO_V_COUNT + (c-Hangul::JAMO_V_BASE)) *  in compose()
1580                         --prevSrc;  // Replace the Jamo L as well.  in compose()
1604                 UChar32 syllable = prev + c - Hangul::JAMO_T_BASE;  in compose()
1605                 --prevSrc;  // Replace the Hangul LV as well.  in compose()
1618             // One or more combining marks that do not combine-back:  in compose()
1620             // if followed by a character with a boundary-before.  in compose()
1633                     if (src == limit) {  in compose()
1635                             buffer.appendZeroCC(prevBoundary, limit, errorCode);  in compose()
1641                     UCPTRIE_FAST_U16_NEXT(normTrie, UCPTRIE_16, nextSrc, limit, c, n16);  in compose()
1655                 // src is after the last in-order combining mark.  in compose()
1670             const UChar *p = prevSrc;  in compose()  local
1671             UCPTRIE_FAST_U16_PREV(normTrie, UCPTRIE_16, prevBoundary, p, c, norm16);  in compose()
1673                 prevSrc = p;  in compose()
1684         src = decomposeShort(src, limit, true /* stopAtCompBoundary */, onlyContiguous,  in compose()
1689         if ((src - prevSrc) > INT32_MAX) {  // guard before buffer.equals()  in compose()
1709 Normalizer2Impl::composeQuickCheck(const UChar *src, const UChar *limit,  in composeQuickCheck()  argument
1714     if(limit==NULL) {  in composeQuickCheck()
1717         limit=u_strchr(src, 0);  in composeQuickCheck()
1719             if (hasCompBoundaryAfter(*(src-1), onlyContiguous)) {  in composeQuickCheck()
1722                 prevBoundary = --src;  in composeQuickCheck()
1734             if(src==limit) {  in composeQuickCheck()
1747                     if(src!=limit && U16_IS_TRAIL(c2=*src)) {  in composeQuickCheck()
1769                 const UChar *p = prevSrc;  in composeQuickCheck()  local
1771                 UCPTRIE_FAST_U16_PREV(normTrie, UCPTRIE_16, prevBoundary, p, c, n16);  in composeQuickCheck()
1775                     prevBoundary = p;  in composeQuickCheck()
1800                     if (src == limit) {  in composeQuickCheck()
1805                     UCPTRIE_FAST_U16_NEXT(normTrie, UCPTRIE_16, nextSrc, limit, c, norm16);  in composeQuickCheck()
1816                 // src is after the last in-order combining mark.  in composeQuickCheck()
1831 void Normalizer2Impl::composeAndAppend(const UChar *src, const UChar *limit,  in composeAndAppend()  argument
1838         const UChar *firstStarterInSrc=findNextCompBoundary(src, limit, onlyContiguous);  in composeAndAppend()
1842             int32_t destSuffixLength=(int32_t)(buffer.getLimit()-lastStarterInDest);  in composeAndAppend()
1846             middle.append(src, (int32_t)(firstStarterInSrc-src));  in composeAndAppend()
1857         compose(src, limit, onlyContiguous, true, buffer, errorCode);  in composeAndAppend()
1859         if(limit==NULL) {  // appendZeroCC() needs limit!=NULL  in composeAndAppend()
1860             limit=u_strchr(src, 0);  in composeAndAppend()
1862         buffer.appendZeroCC(src, limit, errorCode);  in composeAndAppend()
1868                              const uint8_t *src, const uint8_t *limit,  in composeUTF8()  argument
1870     U_ASSERT(limit != nullptr);  in composeUTF8()
1881             if (src == limit) {  in composeUTF8()
1882                 if (prevBoundary != limit && sink != nullptr) {  in composeUTF8()
1883                     ByteSinkUtil::appendUnchanged(prevBoundary, limit,  in composeUTF8()
1892                 UCPTRIE_FAST_U8_NEXT(normTrie, UCPTRIE_16, src, limit, norm16);  in composeUTF8()
1904         // Medium-fast path: Handle cases that do not require full decomposition and recomposition.  in composeUTF8()
1915                         hasCompBoundaryBefore(src, limit)) {  in composeUTF8()
1926                 // The mapping is comp-normalized which also implies hasCompBoundaryBefore.  in composeUTF8()
1928                         hasCompBoundaryBefore(src, limit)) {  in composeUTF8()
1947                 if (hasCompBoundaryBefore(src, limit) ||  in composeUTF8()
1955                         edits->addReplace((int32_t)(src - prevSrc), 0);  in composeUTF8()
1967             U_ASSERT((src - prevSrc) == 3 && *prevSrc == 0xe1);  in composeUTF8()
1972                 UChar32 l = prev - Hangul::JAMO_L_BASE;  in composeUTF8()
1977                     int32_t t = getJamoTMinusBase(src, limit);  in composeUTF8()
1981                     } else if (hasCompBoundaryBefore(src, limit)) {  in composeUTF8()
1987                             (l*Hangul::JAMO_V_COUNT + (prevSrc[2]-0xa1)) *  in composeUTF8()
1989                         prevSrc -= 3;  // Replace the Jamo L as well.  in composeUTF8()
2014                 prevSrc -= 3;  // Replace the Hangul LV as well.  in composeUTF8()
2027             // One or more combining marks that do not combine-back:  in composeUTF8()
2029             // if followed by a character with a boundary-before.  in composeUTF8()
2042                     if (src == limit) {  in composeUTF8()
2044                             ByteSinkUtil::appendUnchanged(prevBoundary, limit,  in composeUTF8()
2051                     UCPTRIE_FAST_U8_NEXT(normTrie, UCPTRIE_16, nextSrc, limit, n16);  in composeUTF8()
2065                 // src is after the last in-order combining mark.  in composeUTF8()
2080             const uint8_t *p = prevSrc;  in composeUTF8()  local
2081             UCPTRIE_FAST_U8_PREV(normTrie, UCPTRIE_16, prevBoundary, p, norm16);  in composeUTF8()
2083                 prevSrc = p;  in composeUTF8()
2094         src = decomposeShort(src, limit, STOP_AT_COMP_BOUNDARY, onlyContiguous,  in composeUTF8()
2099         if ((src - prevSrc) > INT32_MAX) {  // guard before buffer.equals()  in composeUTF8()
2123 UBool Normalizer2Impl::hasCompBoundaryBefore(const UChar *src, const UChar *limit) const {  in hasCompBoundaryBefore()
2124     if (src == limit || *src < minCompNoMaybeCP) {  in hasCompBoundaryBefore()
2129     UCPTRIE_FAST_U16_NEXT(normTrie, UCPTRIE_16, src, limit, c, norm16);  in hasCompBoundaryBefore()
2133 UBool Normalizer2Impl::hasCompBoundaryBefore(const uint8_t *src, const uint8_t *limit) const {  in hasCompBoundaryBefore()
2134     if (src == limit) {  in hasCompBoundaryBefore()
2138     UCPTRIE_FAST_U8_NEXT(normTrie, UCPTRIE_16, src, limit, norm16);  in hasCompBoundaryBefore()
2142 UBool Normalizer2Impl::hasCompBoundaryAfter(const UChar *start, const UChar *p,  in hasCompBoundaryAfter()  argument
2144     if (start == p) {  in hasCompBoundaryAfter()
2149     UCPTRIE_FAST_U16_PREV(normTrie, UCPTRIE_16, start, p, c, norm16);  in hasCompBoundaryAfter()
2153 UBool Normalizer2Impl::hasCompBoundaryAfter(const uint8_t *start, const uint8_t *p,  in hasCompBoundaryAfter()  argument
2155     if (start == p) {  in hasCompBoundaryAfter()
2159     UCPTRIE_FAST_U8_PREV(normTrie, UCPTRIE_16, start, p, norm16);  in hasCompBoundaryAfter()
2163 const UChar *Normalizer2Impl::findPreviousCompBoundary(const UChar *start, const UChar *p,  in findPreviousCompBoundary()  argument
2165     while (p != start) {  in findPreviousCompBoundary()
2166         const UChar *codePointLimit = p;  in findPreviousCompBoundary()
2169         UCPTRIE_FAST_U16_PREV(normTrie, UCPTRIE_16, start, p, c, norm16);  in findPreviousCompBoundary()
2174             return p;  in findPreviousCompBoundary()
2177     return p;  in findPreviousCompBoundary()
2180 const UChar *Normalizer2Impl::findNextCompBoundary(const UChar *p, const UChar *limit,  in findNextCompBoundary()  argument
2182     while (p != limit) {  in findNextCompBoundary()
2183         const UChar *codePointStart = p;  in findNextCompBoundary()
2186         UCPTRIE_FAST_U16_NEXT(normTrie, UCPTRIE_16, p, limit, c, norm16);  in findNextCompBoundary()
2191             return p;  in findNextCompBoundary()
2194     return p;  in findNextCompBoundary()
2197 uint8_t Normalizer2Impl::getPreviousTrailCC(const UChar *start, const UChar *p) const {  in getPreviousTrailCC()
2198     if (start == p) {  in getPreviousTrailCC()
2201     int32_t i = (int32_t)(p - start);  in getPreviousTrailCC()
2207 uint8_t Normalizer2Impl::getPreviousTrailCC(const uint8_t *start, const uint8_t *p) const {  in getPreviousTrailCC()
2208     if (start == p) {  in getPreviousTrailCC()
2211     int32_t i = (int32_t)(p - start);  in getPreviousTrailCC()
2217 // Note: normalizer2impl.cpp r30982 (2011-nov-27)
2225 // Ticket 20907 - The optimizer in MSVC/Visual Studio versions below 16.4 has trouble with this
2226 // function on Windows ARM64. As a work-around, we disable optimizations for this function.
2227 // This work-around could/should be removed once the following versions of Visual Studio are no
2256     // c decomposes, get everything from the variable-length extra data  in getFCD16FromNormData()
2261         norm16|=*(mapping-1)&0xff00;  // lccc  in getFCD16FromNormData()
2273 Normalizer2Impl::makeFCD(const UChar *src, const UChar *limit,  in makeFCD()  argument
2276     // Tracks the last FCD-safe boundary, before lccc=0 or after properly-ordered tccc<=1.  in makeFCD()
2280     if(limit==NULL) {  in makeFCD()
2288             // Fetching the fcd16 value was deferred for this below-U+0300 code point.  in makeFCD()
2289             prevFCD16=getFCD16(*(src-1));  in makeFCD()
2291                 --prevBoundary;  in makeFCD()
2294         limit=u_strchr(src, 0);  in makeFCD()
2297     // Note: In this function we use buffer->appendZeroCC() because we track  in makeFCD()
2309         for(prevSrc=src; src!=limit;) {  in makeFCD()
2319                     if((src+1)!=limit && U16_IS_TRAIL(c2=src[1])) {  in makeFCD()
2333             if(buffer!=NULL && !buffer->appendZeroCC(prevSrc, src, errorCode)) {  in makeFCD()
2336             if(src==limit) {  in makeFCD()
2342                 // Fetching the fcd16 value was deferred for this below-minLcccCP code point.  in makeFCD()
2349                         --prevBoundary;  in makeFCD()
2353                 const UChar *p=src-1;  in makeFCD()  local
2354                 if(U16_IS_TRAIL(*p) && prevSrc<p && U16_IS_LEAD(*(p-1))) {  in makeFCD()
2355                     --p;  in makeFCD()
2358                     prevFCD16=getFCD16FromNormData(U16_GET_SUPPLEMENTARY(p[0], p[1]));  in makeFCD()
2362                     prevBoundary=p;  in makeFCD()
2367         } else if(src==limit) {  in makeFCD()
2372         // The current character (c) at [prevSrc..src[ has a non-zero lead combining class.  in makeFCD()
2379             if(buffer!=NULL && !buffer->appendZeroCC(c, errorCode)) {  in makeFCD()
2392             buffer->removeSuffix((int32_t)(prevSrc-prevBoundary));  in makeFCD()
2397             src=findNextFCDBoundary(src, limit);  in makeFCD()
2413 void Normalizer2Impl::makeFCDAndAppend(const UChar *src, const UChar *limit,  in makeFCDAndAppend()  argument
2419         const UChar *firstBoundaryInSrc=findNextFCDBoundary(src, limit);  in makeFCDAndAppend()
2423             int32_t destSuffixLength=(int32_t)(buffer.getLimit()-lastBoundaryInDest);  in makeFCDAndAppend()
2427             middle.append(src, (int32_t)(firstBoundaryInSrc-src));  in makeFCDAndAppend()
2437         makeFCD(src, limit, &buffer, errorCode);  in makeFCDAndAppend()
2439         if(limit==NULL) {  // appendZeroCC() needs limit!=NULL  in makeFCDAndAppend()
2440             limit=u_strchr(src, 0);  in makeFCDAndAppend()
2442         buffer.appendZeroCC(src, limit, errorCode);  in makeFCDAndAppend()
2446 const UChar *Normalizer2Impl::findPreviousFCDBoundary(const UChar *start, const UChar *p) const {  in findPreviousFCDBoundary()
2447     while(start<p) {  in findPreviousFCDBoundary()
2448         const UChar *codePointLimit = p;  in findPreviousFCDBoundary()
2451         UCPTRIE_FAST_U16_PREV(normTrie, UCPTRIE_16, start, p, c, norm16);  in findPreviousFCDBoundary()
2456             return p;  in findPreviousFCDBoundary()
2459     return p;  in findPreviousFCDBoundary()
2462 const UChar *Normalizer2Impl::findNextFCDBoundary(const UChar *p, const UChar *limit) const {  in findNextFCDBoundary()  argument
2463     while(p<limit) {  in findNextFCDBoundary()
2464         const UChar *codePointStart=p;  in findNextFCDBoundary()
2467         UCPTRIE_FAST_U16_NEXT(normTrie, UCPTRIE_16, p, limit, c, norm16);  in findNextFCDBoundary()
2472             return p;  in findNextFCDBoundary()
2475     return p;  in findNextFCDBoundary()
2478 // CanonicalIterator data -------------------------------------------------- ***
2512                 set->add(firstOrigin);  in addToStartSet()
2517         set->add(origin);  in addToStartSet()
2538     U_ASSERT(impl->fCanonIterData == NULL);  in doInit()
2539     impl->fCanonIterData = new CanonIterData(errorCode);  in doInit()
2540     if (impl->fCanonIterData == NULL) {  in doInit()
2546         while ((end = ucptrie_getRange(impl->normTrie, start,  in doInit()
2549 …      // Call Normalizer2Impl::makeCanonIterDataFromNorm16() for a range of same-norm16 characters.  in doInit()
2551 …            impl->makeCanonIterDataFromNorm16(start, end, value, *impl->fCanonIterData, errorCode);  in doInit()
2556         umutablecptrie_setName(impl->fCanonIterData->mutableTrie, "CanonIterData");  in doInit()
2558         impl->fCanonIterData->trie = umutablecptrie_buildImmutable(  in doInit()
2559 …         impl->fCanonIterData->mutableTrie, UCPTRIE_TYPE_SMALL, UCPTRIE_VALUE_BITS_32, &errorCode);  in doInit()
2560         umutablecptrie_close(impl->fCanonIterData->mutableTrie);  in doInit()
2561         impl->fCanonIterData->mutableTrie = nullptr;  in doInit()
2564         delete impl->fCanonIterData;  in doInit()
2565         impl->fCanonIterData = NULL;  in doInit()
2573         // Inert, or 2-way mapping (including Hangul syllable).  in makeCanonIterDataFromNorm16()
2575         // Composites from 2-way mappings are added at runtime from the  in makeCanonIterDataFromNorm16()
2577         // 2-way mappings get CANON_NOT_SEGMENT_STARTER set because they are  in makeCanonIterDataFromNorm16()
2593             // c has a one-way decomposition  in makeCanonIterDataFromNorm16()
2595             // Do not modify the whole-range norm16 value.  in makeCanonIterDataFromNorm16()
2605                 // c decomposes, get everything from the variable-length extra data  in makeCanonIterDataFromNorm16()
2610                     if(c==c2 && (*(mapping-1)&0xff)!=0) {  in makeCanonIterDataFromNorm16()
2622                     // one-way mapping. A 2-way mapping is possible here after  in makeCanonIterDataFromNorm16()
2649     umtx_initOnce(me->fCanonIterDataInitOnce, &initCanonIterData, me, errorCode);  in ensureCanonIterData()
2654     return (int32_t)ucptrie_get(fCanonIterData->trie, c);  in getCanonValue()
2658     return *(const UnicodeSet *)fCanonIterData->canonStartSets[n];  in getCanonStartSet()
2681                 (UChar32)(Hangul::HANGUL_BASE+(c-Hangul::JAMO_L_BASE)*Hangul::JAMO_VT_COUNT);  in getCanonStartSet()
2682             set.add(syllable, syllable+Hangul::JAMO_VT_COUNT-1);  in getCanonStartSet()
2692 // Normalizer2 data swapping ----------------------------------------------- ***
2719     uint8_t formatVersion0=pInfo->formatVersion[0];  in unorm2_swap()
2721         pInfo->dataFormat[0]==0x4e &&   /* dataFormat="Nrm2" */  in unorm2_swap()
2722         pInfo->dataFormat[1]==0x72 &&  in unorm2_swap()
2723         pInfo->dataFormat[2]==0x6d &&  in unorm2_swap()
2724         pInfo->dataFormat[3]==0x32 &&  in unorm2_swap()
2728                          pInfo->dataFormat[0], pInfo->dataFormat[1],  in unorm2_swap()
2729                          pInfo->dataFormat[2], pInfo->dataFormat[3],  in unorm2_swap()
2730                          pInfo->formatVersion[0]);  in unorm2_swap()
2749         length-=headerSize;  in unorm2_swap()
2783         ds->swapArray32(ds, inBytes, nextOffset-offset, outBytes, pErrorCode);  in unorm2_swap()
2788         utrie_swapAnyVersion(ds, inBytes+offset, nextOffset-offset, outBytes+offset, pErrorCode);  in unorm2_swap()
2793         ds->swapArray16(ds, inBytes+offset, nextOffset-offset, outBytes+offset, pErrorCode);  in unorm2_swap()