1 /*
2 *******************************************************************************
3 *
4 * Copyright (C) 2001-2007, International Business Machines
5 * Corporation and others. All Rights Reserved.
6 *
7 *******************************************************************************
8 * file name: casetrn.cpp
9 * encoding: US-ASCII
10 * tab size: 8 (not used)
11 * indentation:4
12 *
13 * created on: 2004sep03
14 * created by: Markus W. Scherer
15 *
16 * Implementation class for lower-/upper-/title-casing transliterators.
17 */
18
19 #include "unicode/utypes.h"
20
21 #if !UCONFIG_NO_TRANSLITERATION
22
23 #include "unicode/uchar.h"
24 #include "unicode/ustring.h"
25 #include "tolowtrn.h"
26 #include "ucase.h"
27 #include "cpputils.h"
28
29 /* case context iterator using a Replaceable */
30 U_CFUNC UChar32 U_CALLCONV
utrans_rep_caseContextIterator(void * context,int8_t dir)31 utrans_rep_caseContextIterator(void *context, int8_t dir)
32 {
33 U_NAMESPACE_USE
34
35 UCaseContext *csc=(UCaseContext *)context;
36 Replaceable *rep=(Replaceable *)csc->p;
37 UChar32 c;
38
39 if(dir<0) {
40 /* reset for backward iteration */
41 csc->index=csc->cpStart;
42 csc->dir=dir;
43 } else if(dir>0) {
44 /* reset for forward iteration */
45 csc->index=csc->cpLimit;
46 csc->dir=dir;
47 } else {
48 /* continue current iteration direction */
49 dir=csc->dir;
50 }
51
52 // automatically adjust start and limit if the Replaceable disagrees
53 // with the original values
54 if(dir<0) {
55 if(csc->start<csc->index) {
56 c=rep->char32At(csc->index-1);
57 if(c<0) {
58 csc->start=csc->index;
59 } else {
60 csc->index-=U16_LENGTH(c);
61 return c;
62 }
63 }
64 } else {
65 // detect, and store in csc->b1, if we hit the limit
66 if(csc->index<csc->limit) {
67 c=rep->char32At(csc->index);
68 if(c<0) {
69 csc->limit=csc->index;
70 csc->b1=TRUE;
71 } else {
72 csc->index+=U16_LENGTH(c);
73 return c;
74 }
75 } else {
76 csc->b1=TRUE;
77 }
78 }
79 return U_SENTINEL;
80 }
81
82 U_NAMESPACE_BEGIN
83
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CaseMapTransliterator)84 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CaseMapTransliterator)
85
86 /**
87 * Constructs a transliterator.
88 */
89 CaseMapTransliterator::CaseMapTransliterator(const UnicodeString &id, UCaseMapFull *map) :
90 Transliterator(id, 0),
91 fCsp(NULL),
92 fMap(map)
93 {
94 UErrorCode errorCode = U_ZERO_ERROR;
95 fCsp = ucase_getSingleton(&errorCode); // expect to get NULL if failure
96
97 // TODO test incremental mode with context-sensitive text (e.g. greek sigma)
98 // TODO need to call setMaximumContextLength()?!
99 }
100
101 /**
102 * Destructor.
103 */
~CaseMapTransliterator()104 CaseMapTransliterator::~CaseMapTransliterator() {
105 }
106
107 /**
108 * Copy constructor.
109 */
CaseMapTransliterator(const CaseMapTransliterator & o)110 CaseMapTransliterator::CaseMapTransliterator(const CaseMapTransliterator& o) :
111 Transliterator(o),
112 fCsp(o.fCsp), fMap(o.fMap)
113 {
114 }
115
116 /**
117 * Assignment operator.
118 */
119 /*CaseMapTransliterator& CaseMapTransliterator::operator=(const CaseMapTransliterator& o) {
120 Transliterator::operator=(o);
121 fCsp = o.fCsp;
122 fMap = o.fMap;
123 return *this;
124 }*/
125
126 /**
127 * Transliterator API.
128 */
clone(void) const129 Transliterator* CaseMapTransliterator::clone(void) const {
130 return new CaseMapTransliterator(*this);
131 }
132
133 /**
134 * Implements {@link Transliterator#handleTransliterate}.
135 */
handleTransliterate(Replaceable & text,UTransPosition & offsets,UBool isIncremental) const136 void CaseMapTransliterator::handleTransliterate(Replaceable& text,
137 UTransPosition& offsets,
138 UBool isIncremental) const
139 {
140 if (offsets.start >= offsets.limit) {
141 return;
142 }
143
144 UCaseContext csc;
145 uprv_memset(&csc, 0, sizeof(csc));
146 csc.p = &text;
147 csc.start = offsets.contextStart;
148 csc.limit = offsets.contextLimit;
149
150 UnicodeString tmp;
151 const UChar *s;
152 UChar32 c;
153 int32_t textPos, delta, result, locCache=0;
154
155 for(textPos=offsets.start; textPos<offsets.limit;) {
156 csc.cpStart=textPos;
157 c=text.char32At(textPos);
158 csc.cpLimit=textPos+=U16_LENGTH(c);
159
160 result=fMap(fCsp, c, utrans_rep_caseContextIterator, &csc, &s, "", &locCache);
161
162 if(csc.b1 && isIncremental) {
163 // fMap() tried to look beyond the context limit
164 // wait for more input
165 offsets.start=csc.cpStart;
166 return;
167 }
168
169 if(result>=0) {
170 // replace the current code point with its full case mapping result
171 // see UCASE_MAX_STRING_LENGTH
172 if(result<=UCASE_MAX_STRING_LENGTH) {
173 // string s[result]
174 tmp.setTo(FALSE, s, result);
175 delta=result-U16_LENGTH(c);
176 } else {
177 // single code point
178 tmp.setTo(result);
179 delta=tmp.length()-U16_LENGTH(c);
180 }
181 text.handleReplaceBetween(csc.cpStart, textPos, tmp);
182 if(delta!=0) {
183 textPos+=delta;
184 csc.limit=offsets.contextLimit+=delta;
185 offsets.limit+=delta;
186 }
187 }
188 }
189 offsets.start=textPos;
190 }
191
192 U_NAMESPACE_END
193
194 #endif /* #if !UCONFIG_NO_TRANSLITERATION */
195