Lines Matching +full:p +full:- +full:limit
6 * Copyright (C) 2009-2014, International Business Machines
11 * encoding: UTF-8
50 * UTF-8 lead byte for minNoMaybeCP.
66 * Returns the code point from one single well-formed UTF-8 byte sequence
69 * Trie UTF-8 macros do not assemble whole code points (for efficiency).
71 * We should not need it for normalization-inert data (norm16==0).
72 * Illegal sequences yield the error value norm16==0 just like real normalization-inert code points.
78 switch(cpLimit-cpStart) { in codePointFromValidUTF8()
94 * Returns the last code point in [start, p[ if it is valid and in U+1000..U+D7FF.
97 UChar32 previousHangulOrJamo(const uint8_t *start, const uint8_t *p) { in previousHangulOrJamo() argument
98 if ((p - start) >= 3) { in previousHangulOrJamo()
99 p -= 3; in previousHangulOrJamo()
100 uint8_t l = *p; in previousHangulOrJamo()
103 (t1 = (uint8_t)(p[1] - 0x80)) <= 0x3f && in previousHangulOrJamo()
104 (t2 = (uint8_t)(p[2] - 0x80)) <= 0x3f && in previousHangulOrJamo()
113 * Returns the offset from the Jamo T base if [src, limit[ starts with a single Jamo T code point.
116 int32_t getJamoTMinusBase(const uint8_t *src, const uint8_t *limit) { in getJamoTMinusBase() argument
118 if ((limit - src) >= 3 && *src == 0xe1) { in getJamoTMinusBase()
124 return t - 0xa7; in getJamoTMinusBase()
129 return t - (0xa7 - 0x40); in getJamoTMinusBase()
133 return -1; in getJamoTMinusBase()
141 int32_t cpLength = (int32_t)(cpLimit - cpStart); in appendCodePointDelta()
147 int32_t trail = *(cpLimit-1) + delta; in appendCodePointDelta()
150 --cpLimit; in appendCodePointDelta()
155 // Decode the code point, add the delta, re-encode. in appendCodePointDelta()
162 edits->addReplace(cpLength, length); in appendCodePointDelta()
169 // ReorderingBuffer -------------------------------------------------------- ***
174 start(str.getBuffer(8)), reorderStart(start), limit(start), in ReorderingBuffer()
190 limit=start+length; in init()
191 remainingCapacity=str.getCapacity()-length; in init()
193 if(start==limit) { in init()
208 int32_t length=(int32_t)(limit-start); in equals()
210 length==(int32_t)(otherLimit-otherStart) && in equals()
215 U_ASSERT((otherLimit - otherStart) <= INT32_MAX); // ensured by caller in equals()
216 int32_t length = (int32_t)(limit - start); in equals()
217 int32_t otherLength = (int32_t)(otherLimit - otherStart); in equals()
218 // For equal strings, UTF-8 is at least as long as UTF-16, and at most three times as long. in equals()
223 // (Invalid sequences are normalization-inert.) in equals()
245 limit[0]=U16_LEAD(c); in appendSupplementary()
246 limit[1]=U16_TRAIL(c); in appendSupplementary()
247 limit+=2; in appendSupplementary()
250 reorderStart=limit; in appendSupplementary()
255 remainingCapacity-=2; in appendSupplementary()
268 remainingCapacity-=length; in append()
271 reorderStart=limit+length; in append()
273 reorderStart=limit+1; // Ok if not a code point boundary. in append()
276 do { *limit++=*s++; } while(s!=sLimit); in append()
305 remainingCapacity-=cpLength; in appendZeroCC()
307 *limit++=(UChar)c; in appendZeroCC()
309 limit[0]=U16_LEAD(c); in appendZeroCC()
310 limit[1]=U16_TRAIL(c); in appendZeroCC()
311 limit+=2; in appendZeroCC()
314 reorderStart=limit; in appendZeroCC()
322 int32_t length=(int32_t)(sLimit-s); in appendZeroCC()
326 u_memcpy(limit, s, length); in appendZeroCC()
327 limit+=length; in appendZeroCC()
328 remainingCapacity-=length; in appendZeroCC()
330 reorderStart=limit; in appendZeroCC()
335 reorderStart=limit=start; in remove()
341 if(suffixLength<(limit-start)) { in removeSuffix()
342 limit-=suffixLength; in removeSuffix()
345 limit=start; in removeSuffix()
349 reorderStart=limit; in removeSuffix()
353 int32_t reorderStartIndex=(int32_t)(reorderStart-start); in resize()
354 int32_t length=(int32_t)(limit-start); in resize()
371 limit=start+length; in resize()
372 remainingCapacity=str.getCapacity()-length; in resize()
378 UChar c=*--codePointStart; in skipPrevious()
379 if(U16_IS_TRAIL(c) && start<codePointStart && U16_IS_LEAD(*(codePointStart-1))) { in skipPrevious()
380 --codePointStart; in skipPrevious()
389 UChar32 c=*--codePointStart; in previousCC()
391 if(U16_IS_TRAIL(c) && start<codePointStart && U16_IS_LEAD(c2=*(codePointStart-1))) { in previousCC()
392 --codePointStart; in previousCC()
399 // Requires 0<cc<lastCC which implies reorderStart<limit.
403 UChar *q=limit; in insert()
404 UChar *r=limit+=U16_LENGTH(c); in insert()
406 *--r=*--q; in insert()
414 // Normalizer2Impl --------------------------------------------------------- ***
444 U_ASSERT((minMaybeYes & 7) == 0); // 8-aligned for noNoDelta bit fields in init()
445 centerNoNoDelta = (minMaybeYes >> DELTA_SHIFT) - MAX_DELTA - 1; in init()
450 extraData=maybeYesCompositions+((MIN_NORMAL_MAYBE_YES-minMaybeYes)>>OFFSET_SHIFT); in init()
483 // Add the start code point of each same-value range of the trie. in addPropertyStarts()
488 sa->add(sa->set, start); in addPropertyStarts()
491 // Range of code points with same-norm16-value algorithmic decompositions. in addPropertyStarts()
492 // They might have different non-zero FCD16 values. in addPropertyStarts()
497 sa->add(sa->set, start); in addPropertyStarts()
507 sa->add(sa->set, c); in addPropertyStarts()
508 sa->add(sa->set, c+1); in addPropertyStarts()
510 sa->add(sa->set, Hangul::HANGUL_LIMIT); /* add Hangul+1 to continue with other properties */ in addPropertyStarts()
515 // Add the start code point of each same-value range of the canonical iterator data trie. in addCanonIterPropertyStarts()
520 while ((end = ucptrie_getRange(fCanonIterData->trie, start, UCPMAP_RANGE_NORMAL, 0, in addCanonIterPropertyStarts()
522 sa->add(sa->set, start); in addCanonIterPropertyStarts()
532 // Make some effort to support NUL-terminated strings reasonably. in copyLowPrefixFromNulTerminated()
542 if(--src!=prevSrc) { in copyLowPrefixFromNulTerminated()
544 buffer->appendZeroCC(prevSrc, src, errorCode); in copyLowPrefixFromNulTerminated()
568 Normalizer2Impl::decompose(const UChar *src, const UChar *limit, in decompose() argument
572 if(destLengthEstimate<0 && limit!=NULL) { in decompose()
573 destLengthEstimate=(int32_t)(limit-src); in decompose()
578 decompose(src, limit, &buffer, errorCode); in decompose()
586 Normalizer2Impl::decompose(const UChar *src, const UChar *limit, in decompose() argument
590 if(limit==NULL) { in decompose()
595 limit=u_strchr(src, 0); in decompose()
608 for(prevSrc=src; src!=limit;) { in decompose()
617 if((src+1)!=limit && U16_IS_TRAIL(c2=src[1])) { in decompose()
633 if(!buffer->appendZeroCC(prevSrc, src, errorCode)) { in decompose()
641 if(src==limit) { in decompose()
645 // Check one above-minimum, relevant code point. in decompose()
673 Normalizer2Impl::decomposeShort(const UChar *src, const UChar *limit, in decomposeShort() argument
679 while(src<limit) { in decomposeShort()
686 UCPTRIE_FAST_U16_NEXT(normTrie, UCPTRIE_16, src, limit, c, norm16); in decomposeShort()
720 // c decomposes, get everything from the variable-length extra data in decompose()
727 leadCC=(uint8_t)(*(mapping-1)>>8); in decompose()
739 const uint8_t *src, const uint8_t *limit, in decomposeUTF8() argument
741 U_ASSERT(limit != nullptr); in decomposeUTF8()
757 if (src == limit) { in decomposeUTF8()
758 if (prevBoundary != limit && sink != nullptr) { in decomposeUTF8()
759 ByteSinkUtil::appendUnchanged(prevBoundary, limit, in decomposeUTF8()
768 UCPTRIE_FAST_U8_NEXT(normTrie, UCPTRIE_16, src, limit, norm16); in decomposeUTF8()
789 // Medium-fast path: Quick check. in decomposeUTF8()
827 src = decomposeShort(src, limit, STOP_AT_DECOMP_BOUNDARY, false /* onlyContiguous */, in decomposeUTF8()
833 if ((src - prevSrc) > INT32_MAX) { // guard before buffer.equals() in decomposeUTF8()
857 Normalizer2Impl::decomposeShort(const uint8_t *src, const uint8_t *limit, in decomposeShort() argument
863 while (src < limit) { in decomposeShort()
866 UCPTRIE_FAST_U8_NEXT(normTrie, UCPTRIE_16, src, limit, norm16); in decomposeShort()
895 // norm16!=INERT guarantees that [prevSrc, src[ is valid UTF-8. in decomposeShort()
896 // We do not see invalid UTF-8 here because in decomposeShort()
897 // its norm16==INERT is normalization-inert, in decomposeShort()
899 // and we stop the slow path where invalid UTF-8 begins. in decomposeShort()
920 // The character decomposes, get everything from the variable-length extra data. in decomposeShort()
927 leadCC = (uint8_t)(*(mapping-1) >> 8); in decomposeShort()
970 // c decomposes, get everything from the variable-length extra data in getDecomposition()
976 // The capacity of the buffer must be 30=MAPPING_LENGTH_MASK-1
997 // c decomposes, get everything from the variable-length extra data in getRawDecomposition()
1004 const uint16_t *rawMapping=mapping-((firstUnit>>7)&1)-1; in getRawDecomposition()
1008 return (const UChar *)rawMapping-rm0; in getRawDecomposition()
1012 u_memcpy(buffer+1, (const UChar *)mapping+1+2, mLength-2); in getRawDecomposition()
1013 length=mLength-1; in getRawDecomposition()
1022 void Normalizer2Impl::decomposeAndAppend(const UChar *src, const UChar *limit, in decomposeAndAppend() argument
1029 decompose(src, limit, &buffer, errorCode); in decomposeAndAppend()
1035 const UChar *p = src; in decomposeAndAppend() local
1036 while (p != limit) { in decomposeAndAppend()
1037 const UChar *codePointStart = p; in decomposeAndAppend()
1040 UCPTRIE_FAST_U16_NEXT(normTrie, UCPTRIE_16, p, limit, c, norm16); in decomposeAndAppend()
1042 p = codePointStart; in decomposeAndAppend()
1051 if(limit==NULL) { // appendZeroCC() needs limit!=NULL in decomposeAndAppend()
1052 limit=u_strchr(p, 0); in decomposeAndAppend()
1055 if (buffer.append(src, (int32_t)(p - src), false, firstCC, prevCC, errorCode)) { in decomposeAndAppend()
1056 buffer.appendZeroCC(p, limit, errorCode); in decomposeAndAppend()
1072 // c decomposes, get everything from the variable-length extra data in norm16HasDecompBoundaryBefore()
1076 return (firstUnit&MAPPING_HAS_CCC_LCCC_WORD)==0 || (*(mapping-1)&0xff00)==0; in norm16HasDecompBoundaryBefore()
1100 // c decomposes, get everything from the variable-length extra data in norm16HasDecompBoundaryAfter()
1103 // decomp after-boundary: same as hasFCDBoundaryAfter(), in norm16HasDecompBoundaryAfter()
1111 // if(trailCC==1) test leadCC==0, same as checking for before-boundary in norm16HasDecompBoundaryAfter()
1113 return (firstUnit&MAPPING_HAS_CCC_LCCC_WORD)==0 || (*(mapping-1)&0xff00)==0; in norm16HasDecompBoundaryAfter()
1118 * a forward-combining "lead" character,
1120 * and a backward-combining "trail" character.
1125 * Bit 0 set if the composite is a forward-combining starter
1126 * otherwise it returns -1.
1129 * encoded as either pairs or triples of 16-bit units.
1182 return -1; in combine()
1211 * (which is in NFD - decomposed and canonically ordered),
1221 UChar *p=buffer.getStart()+recomposeStartIndex; in recompose() local
1222 UChar *limit=buffer.getLimit(); in recompose() local
1223 if(p==limit) { in recompose()
1234 // Some of the following variables are not used until we have a forward-combining starter in recompose()
1236 compositionsList=NULL; // used as indicator for whether we have a forward-combining starter in recompose()
1242 UCPTRIE_FAST_U16_NEXT(normTrie, UCPTRIE_16, p, limit, c, norm16); in recompose()
1248 // the backward-combining character is not blocked in recompose()
1255 UChar prev=(UChar)(*starter-Hangul::JAMO_L_BASE); in recompose()
1257 pRemove=p-1; in recompose()
1260 (prev*Hangul::JAMO_V_COUNT+(c-Hangul::JAMO_V_BASE))* in recompose()
1263 if(p!=limit && (t=(UChar)(*p-Hangul::JAMO_T_BASE))<Hangul::JAMO_T_COUNT) { in recompose()
1264 ++p; in recompose()
1270 r=p; in recompose()
1271 while(r<limit) { in recompose()
1274 limit=q; in recompose()
1275 p=pRemove; in recompose()
1284 if(p==limit) { in recompose()
1294 pRemove=p-U16_LENGTH(c); // pRemove & p: start & limit of the combining mark in recompose()
1310 --pRemove; in recompose()
1320 *--r=*--q; in recompose()
1323 *--starter=U16_LEAD(composite); // undo the temporary increment in recompose()
1330 if(pRemove<p) { in recompose()
1332 r=p; in recompose()
1333 while(r<limit) { in recompose()
1336 limit=q; in recompose()
1337 p=pRemove; in recompose()
1341 if(p==limit) { in recompose()
1359 if(p==limit) { in recompose()
1370 starter=p-1; in recompose()
1373 starter=p-2; in recompose()
1381 buffer.setReorderingLimit(limit); in recompose()
1386 uint16_t norm16=getNorm16(a); // maps an out-of-range 'a' to inert norm16 in composePair()
1393 b-=Hangul::JAMO_V_BASE; in composePair()
1397 ((a-Hangul::JAMO_L_BASE)*Hangul::JAMO_V_COUNT+b)* in composePair()
1403 b-=Hangul::JAMO_T_BASE; in composePair()
1438 Normalizer2Impl::compose(const UChar *src, const UChar *limit, in compose() argument
1445 if(limit==NULL) { in compose()
1452 limit=u_strchr(src, 0); in compose()
1454 if (hasCompBoundaryAfter(*(src-1), onlyContiguous)) { in compose()
1458 prevBoundary = --src; in compose()
1470 if (src == limit) { in compose()
1471 if (prevBoundary != limit && doCompose) { in compose()
1472 buffer.appendZeroCC(prevBoundary, limit, errorCode); in compose()
1486 if(src!=limit && U16_IS_TRAIL(c2=*src)) { in compose()
1503 // Medium-fast path: Handle cases that do not require full decomposition and recomposition. in compose()
1514 hasCompBoundaryBefore(src, limit)) { in compose()
1525 // The mapping is comp-normalized which also implies hasCompBoundaryBefore. in compose()
1527 hasCompBoundaryBefore(src, limit)) { in compose()
1543 if (hasCompBoundaryBefore(src, limit) || in compose()
1555 UChar prev=*(prevSrc-1); in compose()
1559 UChar l = (UChar)(prev-Hangul::JAMO_L_BASE); in compose()
1565 if (src != limit && in compose()
1566 0 < (t = ((int32_t)*src - Hangul::JAMO_T_BASE)) && in compose()
1570 } else if (hasCompBoundaryBefore(src, limit)) { in compose()
1574 t = -1; in compose()
1578 (l*Hangul::JAMO_V_COUNT + (c-Hangul::JAMO_V_BASE)) * in compose()
1580 --prevSrc; // Replace the Jamo L as well. in compose()
1604 UChar32 syllable = prev + c - Hangul::JAMO_T_BASE; in compose()
1605 --prevSrc; // Replace the Hangul LV as well. in compose()
1618 // One or more combining marks that do not combine-back: in compose()
1620 // if followed by a character with a boundary-before. in compose()
1633 if (src == limit) { in compose()
1635 buffer.appendZeroCC(prevBoundary, limit, errorCode); in compose()
1641 UCPTRIE_FAST_U16_NEXT(normTrie, UCPTRIE_16, nextSrc, limit, c, n16); in compose()
1655 // src is after the last in-order combining mark. in compose()
1670 const UChar *p = prevSrc; in compose() local
1671 UCPTRIE_FAST_U16_PREV(normTrie, UCPTRIE_16, prevBoundary, p, c, norm16); in compose()
1673 prevSrc = p; in compose()
1684 src = decomposeShort(src, limit, true /* stopAtCompBoundary */, onlyContiguous, in compose()
1689 if ((src - prevSrc) > INT32_MAX) { // guard before buffer.equals() in compose()
1709 Normalizer2Impl::composeQuickCheck(const UChar *src, const UChar *limit, in composeQuickCheck() argument
1714 if(limit==NULL) { in composeQuickCheck()
1717 limit=u_strchr(src, 0); in composeQuickCheck()
1719 if (hasCompBoundaryAfter(*(src-1), onlyContiguous)) { in composeQuickCheck()
1722 prevBoundary = --src; in composeQuickCheck()
1734 if(src==limit) { in composeQuickCheck()
1747 if(src!=limit && U16_IS_TRAIL(c2=*src)) { in composeQuickCheck()
1769 const UChar *p = prevSrc; in composeQuickCheck() local
1771 UCPTRIE_FAST_U16_PREV(normTrie, UCPTRIE_16, prevBoundary, p, c, n16); in composeQuickCheck()
1775 prevBoundary = p; in composeQuickCheck()
1800 if (src == limit) { in composeQuickCheck()
1805 UCPTRIE_FAST_U16_NEXT(normTrie, UCPTRIE_16, nextSrc, limit, c, norm16); in composeQuickCheck()
1816 // src is after the last in-order combining mark. in composeQuickCheck()
1831 void Normalizer2Impl::composeAndAppend(const UChar *src, const UChar *limit, in composeAndAppend() argument
1838 const UChar *firstStarterInSrc=findNextCompBoundary(src, limit, onlyContiguous); in composeAndAppend()
1842 int32_t destSuffixLength=(int32_t)(buffer.getLimit()-lastStarterInDest); in composeAndAppend()
1846 middle.append(src, (int32_t)(firstStarterInSrc-src)); in composeAndAppend()
1857 compose(src, limit, onlyContiguous, true, buffer, errorCode); in composeAndAppend()
1859 if(limit==NULL) { // appendZeroCC() needs limit!=NULL in composeAndAppend()
1860 limit=u_strchr(src, 0); in composeAndAppend()
1862 buffer.appendZeroCC(src, limit, errorCode); in composeAndAppend()
1868 const uint8_t *src, const uint8_t *limit, in composeUTF8() argument
1870 U_ASSERT(limit != nullptr); in composeUTF8()
1881 if (src == limit) { in composeUTF8()
1882 if (prevBoundary != limit && sink != nullptr) { in composeUTF8()
1883 ByteSinkUtil::appendUnchanged(prevBoundary, limit, in composeUTF8()
1892 UCPTRIE_FAST_U8_NEXT(normTrie, UCPTRIE_16, src, limit, norm16); in composeUTF8()
1904 // Medium-fast path: Handle cases that do not require full decomposition and recomposition. in composeUTF8()
1915 hasCompBoundaryBefore(src, limit)) { in composeUTF8()
1926 // The mapping is comp-normalized which also implies hasCompBoundaryBefore. in composeUTF8()
1928 hasCompBoundaryBefore(src, limit)) { in composeUTF8()
1947 if (hasCompBoundaryBefore(src, limit) || in composeUTF8()
1955 edits->addReplace((int32_t)(src - prevSrc), 0); in composeUTF8()
1967 U_ASSERT((src - prevSrc) == 3 && *prevSrc == 0xe1); in composeUTF8()
1972 UChar32 l = prev - Hangul::JAMO_L_BASE; in composeUTF8()
1977 int32_t t = getJamoTMinusBase(src, limit); in composeUTF8()
1981 } else if (hasCompBoundaryBefore(src, limit)) { in composeUTF8()
1987 (l*Hangul::JAMO_V_COUNT + (prevSrc[2]-0xa1)) * in composeUTF8()
1989 prevSrc -= 3; // Replace the Jamo L as well. in composeUTF8()
2014 prevSrc -= 3; // Replace the Hangul LV as well. in composeUTF8()
2027 // One or more combining marks that do not combine-back: in composeUTF8()
2029 // if followed by a character with a boundary-before. in composeUTF8()
2042 if (src == limit) { in composeUTF8()
2044 ByteSinkUtil::appendUnchanged(prevBoundary, limit, in composeUTF8()
2051 UCPTRIE_FAST_U8_NEXT(normTrie, UCPTRIE_16, nextSrc, limit, n16); in composeUTF8()
2065 // src is after the last in-order combining mark. in composeUTF8()
2080 const uint8_t *p = prevSrc; in composeUTF8() local
2081 UCPTRIE_FAST_U8_PREV(normTrie, UCPTRIE_16, prevBoundary, p, norm16); in composeUTF8()
2083 prevSrc = p; in composeUTF8()
2094 src = decomposeShort(src, limit, STOP_AT_COMP_BOUNDARY, onlyContiguous, in composeUTF8()
2099 if ((src - prevSrc) > INT32_MAX) { // guard before buffer.equals() in composeUTF8()
2123 UBool Normalizer2Impl::hasCompBoundaryBefore(const UChar *src, const UChar *limit) const { in hasCompBoundaryBefore()
2124 if (src == limit || *src < minCompNoMaybeCP) { in hasCompBoundaryBefore()
2129 UCPTRIE_FAST_U16_NEXT(normTrie, UCPTRIE_16, src, limit, c, norm16); in hasCompBoundaryBefore()
2133 UBool Normalizer2Impl::hasCompBoundaryBefore(const uint8_t *src, const uint8_t *limit) const { in hasCompBoundaryBefore()
2134 if (src == limit) { in hasCompBoundaryBefore()
2138 UCPTRIE_FAST_U8_NEXT(normTrie, UCPTRIE_16, src, limit, norm16); in hasCompBoundaryBefore()
2142 UBool Normalizer2Impl::hasCompBoundaryAfter(const UChar *start, const UChar *p, in hasCompBoundaryAfter() argument
2144 if (start == p) { in hasCompBoundaryAfter()
2149 UCPTRIE_FAST_U16_PREV(normTrie, UCPTRIE_16, start, p, c, norm16); in hasCompBoundaryAfter()
2153 UBool Normalizer2Impl::hasCompBoundaryAfter(const uint8_t *start, const uint8_t *p, in hasCompBoundaryAfter() argument
2155 if (start == p) { in hasCompBoundaryAfter()
2159 UCPTRIE_FAST_U8_PREV(normTrie, UCPTRIE_16, start, p, norm16); in hasCompBoundaryAfter()
2163 const UChar *Normalizer2Impl::findPreviousCompBoundary(const UChar *start, const UChar *p, in findPreviousCompBoundary() argument
2165 while (p != start) { in findPreviousCompBoundary()
2166 const UChar *codePointLimit = p; in findPreviousCompBoundary()
2169 UCPTRIE_FAST_U16_PREV(normTrie, UCPTRIE_16, start, p, c, norm16); in findPreviousCompBoundary()
2174 return p; in findPreviousCompBoundary()
2177 return p; in findPreviousCompBoundary()
2180 const UChar *Normalizer2Impl::findNextCompBoundary(const UChar *p, const UChar *limit, in findNextCompBoundary() argument
2182 while (p != limit) { in findNextCompBoundary()
2183 const UChar *codePointStart = p; in findNextCompBoundary()
2186 UCPTRIE_FAST_U16_NEXT(normTrie, UCPTRIE_16, p, limit, c, norm16); in findNextCompBoundary()
2191 return p; in findNextCompBoundary()
2194 return p; in findNextCompBoundary()
2197 uint8_t Normalizer2Impl::getPreviousTrailCC(const UChar *start, const UChar *p) const { in getPreviousTrailCC()
2198 if (start == p) { in getPreviousTrailCC()
2201 int32_t i = (int32_t)(p - start); in getPreviousTrailCC()
2207 uint8_t Normalizer2Impl::getPreviousTrailCC(const uint8_t *start, const uint8_t *p) const { in getPreviousTrailCC()
2208 if (start == p) { in getPreviousTrailCC()
2211 int32_t i = (int32_t)(p - start); in getPreviousTrailCC()
2217 // Note: normalizer2impl.cpp r30982 (2011-nov-27)
2225 // Ticket 20907 - The optimizer in MSVC/Visual Studio versions below 16.4 has trouble with this
2226 // function on Windows ARM64. As a work-around, we disable optimizations for this function.
2227 // This work-around could/should be removed once the following versions of Visual Studio are no
2256 // c decomposes, get everything from the variable-length extra data in getFCD16FromNormData()
2261 norm16|=*(mapping-1)&0xff00; // lccc in getFCD16FromNormData()
2273 Normalizer2Impl::makeFCD(const UChar *src, const UChar *limit, in makeFCD() argument
2276 // Tracks the last FCD-safe boundary, before lccc=0 or after properly-ordered tccc<=1. in makeFCD()
2280 if(limit==NULL) { in makeFCD()
2288 // Fetching the fcd16 value was deferred for this below-U+0300 code point. in makeFCD()
2289 prevFCD16=getFCD16(*(src-1)); in makeFCD()
2291 --prevBoundary; in makeFCD()
2294 limit=u_strchr(src, 0); in makeFCD()
2297 // Note: In this function we use buffer->appendZeroCC() because we track in makeFCD()
2309 for(prevSrc=src; src!=limit;) { in makeFCD()
2319 if((src+1)!=limit && U16_IS_TRAIL(c2=src[1])) { in makeFCD()
2333 if(buffer!=NULL && !buffer->appendZeroCC(prevSrc, src, errorCode)) { in makeFCD()
2336 if(src==limit) { in makeFCD()
2342 // Fetching the fcd16 value was deferred for this below-minLcccCP code point. in makeFCD()
2349 --prevBoundary; in makeFCD()
2353 const UChar *p=src-1; in makeFCD() local
2354 if(U16_IS_TRAIL(*p) && prevSrc<p && U16_IS_LEAD(*(p-1))) { in makeFCD()
2355 --p; in makeFCD()
2358 prevFCD16=getFCD16FromNormData(U16_GET_SUPPLEMENTARY(p[0], p[1])); in makeFCD()
2362 prevBoundary=p; in makeFCD()
2367 } else if(src==limit) { in makeFCD()
2372 // The current character (c) at [prevSrc..src[ has a non-zero lead combining class. in makeFCD()
2379 if(buffer!=NULL && !buffer->appendZeroCC(c, errorCode)) { in makeFCD()
2392 buffer->removeSuffix((int32_t)(prevSrc-prevBoundary)); in makeFCD()
2397 src=findNextFCDBoundary(src, limit); in makeFCD()
2413 void Normalizer2Impl::makeFCDAndAppend(const UChar *src, const UChar *limit, in makeFCDAndAppend() argument
2419 const UChar *firstBoundaryInSrc=findNextFCDBoundary(src, limit); in makeFCDAndAppend()
2423 int32_t destSuffixLength=(int32_t)(buffer.getLimit()-lastBoundaryInDest); in makeFCDAndAppend()
2427 middle.append(src, (int32_t)(firstBoundaryInSrc-src)); in makeFCDAndAppend()
2437 makeFCD(src, limit, &buffer, errorCode); in makeFCDAndAppend()
2439 if(limit==NULL) { // appendZeroCC() needs limit!=NULL in makeFCDAndAppend()
2440 limit=u_strchr(src, 0); in makeFCDAndAppend()
2442 buffer.appendZeroCC(src, limit, errorCode); in makeFCDAndAppend()
2446 const UChar *Normalizer2Impl::findPreviousFCDBoundary(const UChar *start, const UChar *p) const { in findPreviousFCDBoundary()
2447 while(start<p) { in findPreviousFCDBoundary()
2448 const UChar *codePointLimit = p; in findPreviousFCDBoundary()
2451 UCPTRIE_FAST_U16_PREV(normTrie, UCPTRIE_16, start, p, c, norm16); in findPreviousFCDBoundary()
2456 return p; in findPreviousFCDBoundary()
2459 return p; in findPreviousFCDBoundary()
2462 const UChar *Normalizer2Impl::findNextFCDBoundary(const UChar *p, const UChar *limit) const { in findNextFCDBoundary() argument
2463 while(p<limit) { in findNextFCDBoundary()
2464 const UChar *codePointStart=p; in findNextFCDBoundary()
2467 UCPTRIE_FAST_U16_NEXT(normTrie, UCPTRIE_16, p, limit, c, norm16); in findNextFCDBoundary()
2472 return p; in findNextFCDBoundary()
2475 return p; in findNextFCDBoundary()
2478 // CanonicalIterator data -------------------------------------------------- ***
2512 set->add(firstOrigin); in addToStartSet()
2517 set->add(origin); in addToStartSet()
2538 U_ASSERT(impl->fCanonIterData == NULL); in doInit()
2539 impl->fCanonIterData = new CanonIterData(errorCode); in doInit()
2540 if (impl->fCanonIterData == NULL) { in doInit()
2546 while ((end = ucptrie_getRange(impl->normTrie, start, in doInit()
2549 … // Call Normalizer2Impl::makeCanonIterDataFromNorm16() for a range of same-norm16 characters. in doInit()
2551 … impl->makeCanonIterDataFromNorm16(start, end, value, *impl->fCanonIterData, errorCode); in doInit()
2556 umutablecptrie_setName(impl->fCanonIterData->mutableTrie, "CanonIterData"); in doInit()
2558 impl->fCanonIterData->trie = umutablecptrie_buildImmutable( in doInit()
2559 … impl->fCanonIterData->mutableTrie, UCPTRIE_TYPE_SMALL, UCPTRIE_VALUE_BITS_32, &errorCode); in doInit()
2560 umutablecptrie_close(impl->fCanonIterData->mutableTrie); in doInit()
2561 impl->fCanonIterData->mutableTrie = nullptr; in doInit()
2564 delete impl->fCanonIterData; in doInit()
2565 impl->fCanonIterData = NULL; in doInit()
2573 // Inert, or 2-way mapping (including Hangul syllable). in makeCanonIterDataFromNorm16()
2575 // Composites from 2-way mappings are added at runtime from the in makeCanonIterDataFromNorm16()
2577 // 2-way mappings get CANON_NOT_SEGMENT_STARTER set because they are in makeCanonIterDataFromNorm16()
2593 // c has a one-way decomposition in makeCanonIterDataFromNorm16()
2595 // Do not modify the whole-range norm16 value. in makeCanonIterDataFromNorm16()
2605 // c decomposes, get everything from the variable-length extra data in makeCanonIterDataFromNorm16()
2610 if(c==c2 && (*(mapping-1)&0xff)!=0) { in makeCanonIterDataFromNorm16()
2622 // one-way mapping. A 2-way mapping is possible here after in makeCanonIterDataFromNorm16()
2649 umtx_initOnce(me->fCanonIterDataInitOnce, &initCanonIterData, me, errorCode); in ensureCanonIterData()
2654 return (int32_t)ucptrie_get(fCanonIterData->trie, c); in getCanonValue()
2658 return *(const UnicodeSet *)fCanonIterData->canonStartSets[n]; in getCanonStartSet()
2681 (UChar32)(Hangul::HANGUL_BASE+(c-Hangul::JAMO_L_BASE)*Hangul::JAMO_VT_COUNT); in getCanonStartSet()
2682 set.add(syllable, syllable+Hangul::JAMO_VT_COUNT-1); in getCanonStartSet()
2692 // Normalizer2 data swapping ----------------------------------------------- ***
2719 uint8_t formatVersion0=pInfo->formatVersion[0]; in unorm2_swap()
2721 pInfo->dataFormat[0]==0x4e && /* dataFormat="Nrm2" */ in unorm2_swap()
2722 pInfo->dataFormat[1]==0x72 && in unorm2_swap()
2723 pInfo->dataFormat[2]==0x6d && in unorm2_swap()
2724 pInfo->dataFormat[3]==0x32 && in unorm2_swap()
2728 pInfo->dataFormat[0], pInfo->dataFormat[1], in unorm2_swap()
2729 pInfo->dataFormat[2], pInfo->dataFormat[3], in unorm2_swap()
2730 pInfo->formatVersion[0]); in unorm2_swap()
2749 length-=headerSize; in unorm2_swap()
2783 ds->swapArray32(ds, inBytes, nextOffset-offset, outBytes, pErrorCode); in unorm2_swap()
2788 utrie_swapAnyVersion(ds, inBytes+offset, nextOffset-offset, outBytes+offset, pErrorCode); in unorm2_swap()
2793 ds->swapArray16(ds, inBytes+offset, nextOffset-offset, outBytes+offset, pErrorCode); in unorm2_swap()