1 // Copyright (C) 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 *
6 * Copyright (C) 2001-2011, International Business Machines
7 * Corporation and others. All Rights Reserved.
8 *
9 *******************************************************************************
10 * file name: casetrn.cpp
11 * encoding: US-ASCII
12 * tab size: 8 (not used)
13 * indentation:4
14 *
15 * created on: 2004sep03
16 * created by: Markus W. Scherer
17 *
18 * Implementation class for lower-/upper-/title-casing transliterators.
19 */
20
21 #include "unicode/utypes.h"
22
23 #if !UCONFIG_NO_TRANSLITERATION
24
25 #include "unicode/uchar.h"
26 #include "unicode/ustring.h"
27 #include "unicode/utf.h"
28 #include "unicode/utf16.h"
29 #include "tolowtrn.h"
30 #include "ucase.h"
31 #include "cpputils.h"
32
33 /* case context iterator using a Replaceable */
34 U_CFUNC UChar32 U_CALLCONV
utrans_rep_caseContextIterator(void * context,int8_t dir)35 utrans_rep_caseContextIterator(void *context, int8_t dir)
36 {
37 U_NAMESPACE_USE
38
39 UCaseContext *csc=(UCaseContext *)context;
40 Replaceable *rep=(Replaceable *)csc->p;
41 UChar32 c;
42
43 if(dir<0) {
44 /* reset for backward iteration */
45 csc->index=csc->cpStart;
46 csc->dir=dir;
47 } else if(dir>0) {
48 /* reset for forward iteration */
49 csc->index=csc->cpLimit;
50 csc->dir=dir;
51 } else {
52 /* continue current iteration direction */
53 dir=csc->dir;
54 }
55
56 // automatically adjust start and limit if the Replaceable disagrees
57 // with the original values
58 if(dir<0) {
59 if(csc->start<csc->index) {
60 c=rep->char32At(csc->index-1);
61 if(c<0) {
62 csc->start=csc->index;
63 } else {
64 csc->index-=U16_LENGTH(c);
65 return c;
66 }
67 }
68 } else {
69 // detect, and store in csc->b1, if we hit the limit
70 if(csc->index<csc->limit) {
71 c=rep->char32At(csc->index);
72 if(c<0) {
73 csc->limit=csc->index;
74 csc->b1=TRUE;
75 } else {
76 csc->index+=U16_LENGTH(c);
77 return c;
78 }
79 } else {
80 csc->b1=TRUE;
81 }
82 }
83 return U_SENTINEL;
84 }
85
86 U_NAMESPACE_BEGIN
87
UOBJECT_DEFINE_ABSTRACT_RTTI_IMPLEMENTATION(CaseMapTransliterator)88 UOBJECT_DEFINE_ABSTRACT_RTTI_IMPLEMENTATION(CaseMapTransliterator)
89
90 /**
91 * Constructs a transliterator.
92 */
93 CaseMapTransliterator::CaseMapTransliterator(const UnicodeString &id, UCaseMapFull *map) :
94 Transliterator(id, 0),
95 fCsp(ucase_getSingleton()),
96 fMap(map)
97 {
98 // TODO test incremental mode with context-sensitive text (e.g. greek sigma)
99 // TODO need to call setMaximumContextLength()?!
100 }
101
102 /**
103 * Destructor.
104 */
~CaseMapTransliterator()105 CaseMapTransliterator::~CaseMapTransliterator() {
106 }
107
108 /**
109 * Copy constructor.
110 */
CaseMapTransliterator(const CaseMapTransliterator & o)111 CaseMapTransliterator::CaseMapTransliterator(const CaseMapTransliterator& o) :
112 Transliterator(o),
113 fCsp(o.fCsp), fMap(o.fMap)
114 {
115 }
116
117 /**
118 * Assignment operator.
119 */
120 /*CaseMapTransliterator& CaseMapTransliterator::operator=(const CaseMapTransliterator& o) {
121 Transliterator::operator=(o);
122 fCsp = o.fCsp;
123 fMap = o.fMap;
124 return *this;
125 }*/
126
127 /**
128 * Transliterator API.
129 */
130 /*Transliterator* CaseMapTransliterator::clone(void) const {
131 return new CaseMapTransliterator(*this);
132 }*/
133
134 /**
135 * Implements {@link Transliterator#handleTransliterate}.
136 */
handleTransliterate(Replaceable & text,UTransPosition & offsets,UBool isIncremental) const137 void CaseMapTransliterator::handleTransliterate(Replaceable& text,
138 UTransPosition& offsets,
139 UBool isIncremental) const
140 {
141 if (offsets.start >= offsets.limit) {
142 return;
143 }
144
145 UCaseContext csc;
146 uprv_memset(&csc, 0, sizeof(csc));
147 csc.p = &text;
148 csc.start = offsets.contextStart;
149 csc.limit = offsets.contextLimit;
150
151 UnicodeString tmp;
152 const UChar *s;
153 UChar32 c;
154 int32_t textPos, delta, result, locCache=0;
155
156 for(textPos=offsets.start; textPos<offsets.limit;) {
157 csc.cpStart=textPos;
158 c=text.char32At(textPos);
159 csc.cpLimit=textPos+=U16_LENGTH(c);
160
161 result=fMap(fCsp, c, utrans_rep_caseContextIterator, &csc, &s, "", &locCache);
162
163 if(csc.b1 && isIncremental) {
164 // fMap() tried to look beyond the context limit
165 // wait for more input
166 offsets.start=csc.cpStart;
167 return;
168 }
169
170 if(result>=0) {
171 // replace the current code point with its full case mapping result
172 // see UCASE_MAX_STRING_LENGTH
173 if(result<=UCASE_MAX_STRING_LENGTH) {
174 // string s[result]
175 tmp.setTo(FALSE, s, result);
176 delta=result-U16_LENGTH(c);
177 } else {
178 // single code point
179 tmp.setTo(result);
180 delta=tmp.length()-U16_LENGTH(c);
181 }
182 text.handleReplaceBetween(csc.cpStart, textPos, tmp);
183 if(delta!=0) {
184 textPos+=delta;
185 csc.limit=offsets.contextLimit+=delta;
186 offsets.limit+=delta;
187 }
188 }
189 }
190 offsets.start=textPos;
191 }
192
193 U_NAMESPACE_END
194
195 #endif /* #if !UCONFIG_NO_TRANSLITERATION */
196