1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 *
6 * Copyright (C) 2001-2011, International Business Machines
7 * Corporation and others. All Rights Reserved.
8 *
9 *******************************************************************************
10 * file name: casetrn.cpp
11 * encoding: UTF-8
12 * tab size: 8 (not used)
13 * indentation:4
14 *
15 * created on: 2004sep03
16 * created by: Markus W. Scherer
17 *
18 * Implementation class for lower-/upper-/title-casing transliterators.
19 */
20
21 #include "unicode/utypes.h"
22
23 #if !UCONFIG_NO_TRANSLITERATION
24
25 #include "unicode/uchar.h"
26 #include "unicode/ustring.h"
27 #include "unicode/utf.h"
28 #include "unicode/utf16.h"
29 #include "tolowtrn.h"
30 #include "ucase.h"
31 #include "cpputils.h"
32
33 /* case context iterator using a Replaceable */
34 U_CFUNC UChar32 U_CALLCONV
utrans_rep_caseContextIterator(void * context,int8_t dir)35 utrans_rep_caseContextIterator(void *context, int8_t dir)
36 {
37 U_NAMESPACE_USE
38
39 UCaseContext *csc=(UCaseContext *)context;
40 Replaceable *rep=(Replaceable *)csc->p;
41 UChar32 c;
42
43 if(dir<0) {
44 /* reset for backward iteration */
45 csc->index=csc->cpStart;
46 csc->dir=dir;
47 } else if(dir>0) {
48 /* reset for forward iteration */
49 csc->index=csc->cpLimit;
50 csc->dir=dir;
51 } else {
52 /* continue current iteration direction */
53 dir=csc->dir;
54 }
55
56 // automatically adjust start and limit if the Replaceable disagrees
57 // with the original values
58 if(dir<0) {
59 if(csc->start<csc->index) {
60 c=rep->char32At(csc->index-1);
61 if(c<0) {
62 csc->start=csc->index;
63 } else {
64 csc->index-=U16_LENGTH(c);
65 return c;
66 }
67 }
68 } else {
69 // detect, and store in csc->b1, if we hit the limit
70 if(csc->index<csc->limit) {
71 c=rep->char32At(csc->index);
72 if(c<0) {
73 csc->limit=csc->index;
74 csc->b1=true;
75 } else {
76 csc->index+=U16_LENGTH(c);
77 return c;
78 }
79 } else {
80 csc->b1=true;
81 }
82 }
83 return U_SENTINEL;
84 }
85
86 U_NAMESPACE_BEGIN
87
UOBJECT_DEFINE_ABSTRACT_RTTI_IMPLEMENTATION(CaseMapTransliterator)88 UOBJECT_DEFINE_ABSTRACT_RTTI_IMPLEMENTATION(CaseMapTransliterator)
89
90 /**
91 * Constructs a transliterator.
92 */
93 CaseMapTransliterator::CaseMapTransliterator(const UnicodeString &id, UCaseMapFull *map) :
94 Transliterator(id, 0),
95 fMap(map)
96 {
97 // TODO test incremental mode with context-sensitive text (e.g. greek sigma)
98 // TODO need to call setMaximumContextLength()?!
99 }
100
101 /**
102 * Destructor.
103 */
~CaseMapTransliterator()104 CaseMapTransliterator::~CaseMapTransliterator() {
105 }
106
107 /**
108 * Copy constructor.
109 */
CaseMapTransliterator(const CaseMapTransliterator & o)110 CaseMapTransliterator::CaseMapTransliterator(const CaseMapTransliterator& o) :
111 Transliterator(o),
112 fMap(o.fMap)
113 {
114 }
115
116 /**
117 * Assignment operator.
118 */
119 /*CaseMapTransliterator& CaseMapTransliterator::operator=(const CaseMapTransliterator& o) {
120 Transliterator::operator=(o);
121 fMap = o.fMap;
122 return *this;
123 }*/
124
125 /**
126 * Transliterator API.
127 */
128 /*CaseMapTransliterator* CaseMapTransliterator::clone() const {
129 return new CaseMapTransliterator(*this);
130 }*/
131
132 /**
133 * Implements {@link Transliterator#handleTransliterate}.
134 */
handleTransliterate(Replaceable & text,UTransPosition & offsets,UBool isIncremental) const135 void CaseMapTransliterator::handleTransliterate(Replaceable& text,
136 UTransPosition& offsets,
137 UBool isIncremental) const
138 {
139 if (offsets.start >= offsets.limit) {
140 return;
141 }
142
143 UCaseContext csc;
144 uprv_memset(&csc, 0, sizeof(csc));
145 csc.p = &text;
146 csc.start = offsets.contextStart;
147 csc.limit = offsets.contextLimit;
148
149 UnicodeString tmp;
150 const char16_t *s;
151 UChar32 c;
152 int32_t textPos, delta, result;
153
154 for(textPos=offsets.start; textPos<offsets.limit;) {
155 csc.cpStart=textPos;
156 c=text.char32At(textPos);
157 csc.cpLimit=textPos+=U16_LENGTH(c);
158
159 result=fMap(c, utrans_rep_caseContextIterator, &csc, &s, UCASE_LOC_ROOT);
160
161 if(csc.b1 && isIncremental) {
162 // fMap() tried to look beyond the context limit
163 // wait for more input
164 offsets.start=csc.cpStart;
165 return;
166 }
167
168 if(result>=0) {
169 // replace the current code point with its full case mapping result
170 // see UCASE_MAX_STRING_LENGTH
171 if(result<=UCASE_MAX_STRING_LENGTH) {
172 // string s[result]
173 tmp.setTo(false, s, result);
174 delta=result-U16_LENGTH(c);
175 } else {
176 // single code point
177 tmp.setTo(result);
178 delta=tmp.length()-U16_LENGTH(c);
179 }
180 text.handleReplaceBetween(csc.cpStart, textPos, tmp);
181 if(delta!=0) {
182 textPos+=delta;
183 csc.limit=offsets.contextLimit+=delta;
184 offsets.limit+=delta;
185 }
186 }
187 }
188 offsets.start=textPos;
189 }
190
191 U_NAMESPACE_END
192
193 #endif /* #if !UCONFIG_NO_TRANSLITERATION */
194