• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (C) 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 *
6 *   Copyright (C) 2001-2011, International Business Machines
7 *   Corporation and others.  All Rights Reserved.
8 *
9 *******************************************************************************
10 *   file name:  casetrn.cpp
11 *   encoding:   US-ASCII
12 *   tab size:   8 (not used)
13 *   indentation:4
14 *
15 *   created on: 2004sep03
16 *   created by: Markus W. Scherer
17 *
18 *   Implementation class for lower-/upper-/title-casing transliterators.
19 */
20 
21 #include "unicode/utypes.h"
22 
23 #if !UCONFIG_NO_TRANSLITERATION
24 
25 #include "unicode/uchar.h"
26 #include "unicode/ustring.h"
27 #include "unicode/utf.h"
28 #include "unicode/utf16.h"
29 #include "tolowtrn.h"
30 #include "ucase.h"
31 #include "cpputils.h"
32 
33 /* case context iterator using a Replaceable */
34 U_CFUNC UChar32 U_CALLCONV
utrans_rep_caseContextIterator(void * context,int8_t dir)35 utrans_rep_caseContextIterator(void *context, int8_t dir)
36 {
37     U_NAMESPACE_USE
38 
39     UCaseContext *csc=(UCaseContext *)context;
40     Replaceable *rep=(Replaceable *)csc->p;
41     UChar32 c;
42 
43     if(dir<0) {
44         /* reset for backward iteration */
45         csc->index=csc->cpStart;
46         csc->dir=dir;
47     } else if(dir>0) {
48         /* reset for forward iteration */
49         csc->index=csc->cpLimit;
50         csc->dir=dir;
51     } else {
52         /* continue current iteration direction */
53         dir=csc->dir;
54     }
55 
56     // automatically adjust start and limit if the Replaceable disagrees
57     // with the original values
58     if(dir<0) {
59         if(csc->start<csc->index) {
60             c=rep->char32At(csc->index-1);
61             if(c<0) {
62                 csc->start=csc->index;
63             } else {
64                 csc->index-=U16_LENGTH(c);
65                 return c;
66             }
67         }
68     } else {
69         // detect, and store in csc->b1, if we hit the limit
70         if(csc->index<csc->limit) {
71             c=rep->char32At(csc->index);
72             if(c<0) {
73                 csc->limit=csc->index;
74                 csc->b1=TRUE;
75             } else {
76                 csc->index+=U16_LENGTH(c);
77                 return c;
78             }
79         } else {
80             csc->b1=TRUE;
81         }
82     }
83     return U_SENTINEL;
84 }
85 
86 U_NAMESPACE_BEGIN
87 
UOBJECT_DEFINE_ABSTRACT_RTTI_IMPLEMENTATION(CaseMapTransliterator)88 UOBJECT_DEFINE_ABSTRACT_RTTI_IMPLEMENTATION(CaseMapTransliterator)
89 
90 /**
91  * Constructs a transliterator.
92  */
93 CaseMapTransliterator::CaseMapTransliterator(const UnicodeString &id, UCaseMapFull *map) :
94     Transliterator(id, 0),
95     fCsp(ucase_getSingleton()),
96     fMap(map)
97 {
98     // TODO test incremental mode with context-sensitive text (e.g. greek sigma)
99     // TODO need to call setMaximumContextLength()?!
100 }
101 
102 /**
103  * Destructor.
104  */
~CaseMapTransliterator()105 CaseMapTransliterator::~CaseMapTransliterator() {
106 }
107 
108 /**
109  * Copy constructor.
110  */
CaseMapTransliterator(const CaseMapTransliterator & o)111 CaseMapTransliterator::CaseMapTransliterator(const CaseMapTransliterator& o) :
112     Transliterator(o),
113     fCsp(o.fCsp), fMap(o.fMap)
114 {
115 }
116 
117 /**
118  * Assignment operator.
119  */
120 /*CaseMapTransliterator& CaseMapTransliterator::operator=(const CaseMapTransliterator& o) {
121     Transliterator::operator=(o);
122     fCsp = o.fCsp;
123     fMap = o.fMap;
124     return *this;
125 }*/
126 
127 /**
128  * Transliterator API.
129  */
130 /*Transliterator* CaseMapTransliterator::clone(void) const {
131     return new CaseMapTransliterator(*this);
132 }*/
133 
134 /**
135  * Implements {@link Transliterator#handleTransliterate}.
136  */
handleTransliterate(Replaceable & text,UTransPosition & offsets,UBool isIncremental) const137 void CaseMapTransliterator::handleTransliterate(Replaceable& text,
138                                  UTransPosition& offsets,
139                                  UBool isIncremental) const
140 {
141     if (offsets.start >= offsets.limit) {
142         return;
143     }
144 
145     UCaseContext csc;
146     uprv_memset(&csc, 0, sizeof(csc));
147     csc.p = &text;
148     csc.start = offsets.contextStart;
149     csc.limit = offsets.contextLimit;
150 
151     UnicodeString tmp;
152     const UChar *s;
153     UChar32 c;
154     int32_t textPos, delta, result, locCache=0;
155 
156     for(textPos=offsets.start; textPos<offsets.limit;) {
157         csc.cpStart=textPos;
158         c=text.char32At(textPos);
159         csc.cpLimit=textPos+=U16_LENGTH(c);
160 
161         result=fMap(fCsp, c, utrans_rep_caseContextIterator, &csc, &s, "", &locCache);
162 
163         if(csc.b1 && isIncremental) {
164             // fMap() tried to look beyond the context limit
165             // wait for more input
166             offsets.start=csc.cpStart;
167             return;
168         }
169 
170         if(result>=0) {
171             // replace the current code point with its full case mapping result
172             // see UCASE_MAX_STRING_LENGTH
173             if(result<=UCASE_MAX_STRING_LENGTH) {
174                 // string s[result]
175                 tmp.setTo(FALSE, s, result);
176                 delta=result-U16_LENGTH(c);
177             } else {
178                 // single code point
179                 tmp.setTo(result);
180                 delta=tmp.length()-U16_LENGTH(c);
181             }
182             text.handleReplaceBetween(csc.cpStart, textPos, tmp);
183             if(delta!=0) {
184                 textPos+=delta;
185                 csc.limit=offsets.contextLimit+=delta;
186                 offsets.limit+=delta;
187             }
188         }
189     }
190     offsets.start=textPos;
191 }
192 
193 U_NAMESPACE_END
194 
195 #endif /* #if !UCONFIG_NO_TRANSLITERATION */
196