• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 //
4 //   Copyright (C) 2012 International Business Machines Corporation
5 //   and others. All rights reserved.
6 //
7 //   file:  regeximp.cpp
8 //
9 //           ICU Regular Expressions,
10 //             miscellaneous implementation functions.
11 //
12 
13 #include "unicode/utypes.h"
14 
15 #if !UCONFIG_NO_REGULAR_EXPRESSIONS
16 #include "regeximp.h"
17 #include "unicode/utf16.h"
18 
19 U_NAMESPACE_BEGIN
20 
CaseFoldingUTextIterator(UText & text)21 CaseFoldingUTextIterator::CaseFoldingUTextIterator(UText &text) :
22    fUText(text), fFoldChars(nullptr), fFoldLength(0) {
23 }
24 
~CaseFoldingUTextIterator()25 CaseFoldingUTextIterator::~CaseFoldingUTextIterator() {}
26 
next()27 UChar32 CaseFoldingUTextIterator::next() {
28     UChar32  foldedC;
29     UChar32  originalC;
30     if (fFoldChars == nullptr) {
31         // We are not in a string folding of an earlier character.
32         // Start handling the next char from the input UText.
33         originalC = UTEXT_NEXT32(&fUText);
34         if (originalC == U_SENTINEL) {
35             return originalC;
36         }
37         fFoldLength = ucase_toFullFolding(originalC, &fFoldChars, U_FOLD_CASE_DEFAULT);
38         if (fFoldLength >= UCASE_MAX_STRING_LENGTH || fFoldLength < 0) {
39             // input code point folds to a single code point, possibly itself.
40             // See comment in ucase.h for explanation of return values from ucase_toFullFoldings.
41             if (fFoldLength < 0) {
42                 fFoldLength = ~fFoldLength;
43             }
44             foldedC = (UChar32)fFoldLength;
45             fFoldChars = nullptr;
46             return foldedC;
47         }
48         // String foldings fall through here.
49         fFoldIndex = 0;
50     }
51 
52     U16_NEXT(fFoldChars, fFoldIndex, fFoldLength, foldedC);
53     if (fFoldIndex >= fFoldLength) {
54         fFoldChars = nullptr;
55     }
56     return foldedC;
57 }
58 
59 
inExpansion()60 UBool CaseFoldingUTextIterator::inExpansion() {
61     return fFoldChars != nullptr;
62 }
63 
64 
65 
CaseFoldingUCharIterator(const char16_t * chars,int64_t start,int64_t limit)66 CaseFoldingUCharIterator::CaseFoldingUCharIterator(const char16_t *chars, int64_t start, int64_t limit) :
67    fChars(chars), fIndex(start), fLimit(limit), fFoldChars(nullptr), fFoldLength(0) {
68 }
69 
70 
~CaseFoldingUCharIterator()71 CaseFoldingUCharIterator::~CaseFoldingUCharIterator() {}
72 
73 
next()74 UChar32 CaseFoldingUCharIterator::next() {
75     UChar32  foldedC;
76     UChar32  originalC;
77     if (fFoldChars == nullptr) {
78         // We are not in a string folding of an earlier character.
79         // Start handling the next char from the input UText.
80         if (fIndex >= fLimit) {
81             return U_SENTINEL;
82         }
83         U16_NEXT(fChars, fIndex, fLimit, originalC);
84 
85         fFoldLength = ucase_toFullFolding(originalC, &fFoldChars, U_FOLD_CASE_DEFAULT);
86         if (fFoldLength >= UCASE_MAX_STRING_LENGTH || fFoldLength < 0) {
87             // input code point folds to a single code point, possibly itself.
88             // See comment in ucase.h for explanation of return values from ucase_toFullFoldings.
89             if (fFoldLength < 0) {
90                 fFoldLength = ~fFoldLength;
91             }
92             foldedC = (UChar32)fFoldLength;
93             fFoldChars = nullptr;
94             return foldedC;
95         }
96         // String foldings fall through here.
97         fFoldIndex = 0;
98     }
99 
100     U16_NEXT(fFoldChars, fFoldIndex, fFoldLength, foldedC);
101     if (fFoldIndex >= fFoldLength) {
102         fFoldChars = nullptr;
103     }
104     return foldedC;
105 }
106 
107 
inExpansion()108 UBool CaseFoldingUCharIterator::inExpansion() {
109     return fFoldChars != nullptr;
110 }
111 
getIndex()112 int64_t CaseFoldingUCharIterator::getIndex() {
113     return fIndex;
114 }
115 
116 
117 U_NAMESPACE_END
118 
119 #endif
120 
121