1 /*
2 **********************************************************************
3 * Copyright (c) 2001-2011, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 **********************************************************************
6 * Date Name Description
7 * 11/19/2001 aliu Creation.
8 **********************************************************************
9 */
10
11 #include "unicode/utypes.h"
12
13 #if !UCONFIG_NO_TRANSLITERATION
14
15 #include "unicode/utf16.h"
16 #include "esctrn.h"
17 #include "util.h"
18
19 U_NAMESPACE_BEGIN
20
21 static const UChar UNIPRE[] = {85,43,0}; // "U+"
22 static const UChar BS_u[] = {92,117,0}; // "\\u"
23 static const UChar BS_U[] = {92,85,0}; // "\\U"
24 static const UChar XMLPRE[] = {38,35,120,0}; // "&#x"
25 static const UChar XML10PRE[] = {38,35,0}; // "&#"
26 static const UChar PERLPRE[] = {92,120,123,0}; // "\\x{"
27 static const UChar SEMI[] = {59,0}; // ";"
28 static const UChar RBRACE[] = {125,0}; // "}"
29
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(EscapeTransliterator)30 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(EscapeTransliterator)
31
32 /**
33 * Factory methods
34 */
35 static Transliterator* _createEscUnicode(const UnicodeString& ID, Transliterator::Token /*context*/) {
36 // Unicode: "U+10FFFF" hex, min=4, max=6
37 return new EscapeTransliterator(ID, UnicodeString(TRUE, UNIPRE, 2), UnicodeString(), 16, 4, TRUE, NULL);
38 }
_createEscJava(const UnicodeString & ID,Transliterator::Token)39 static Transliterator* _createEscJava(const UnicodeString& ID, Transliterator::Token /*context*/) {
40 // Java: "\\uFFFF" hex, min=4, max=4
41 return new EscapeTransliterator(ID, UnicodeString(TRUE, BS_u, 2), UnicodeString(), 16, 4, FALSE, NULL);
42 }
_createEscC(const UnicodeString & ID,Transliterator::Token)43 static Transliterator* _createEscC(const UnicodeString& ID, Transliterator::Token /*context*/) {
44 // C: "\\uFFFF" hex, min=4, max=4; \\U0010FFFF hex, min=8, max=8
45 return new EscapeTransliterator(ID, UnicodeString(TRUE, BS_u, 2), UnicodeString(), 16, 4, TRUE,
46 new EscapeTransliterator(UnicodeString(), UnicodeString(TRUE, BS_U, 2), UnicodeString(), 16, 8, TRUE, NULL));
47 }
_createEscXML(const UnicodeString & ID,Transliterator::Token)48 static Transliterator* _createEscXML(const UnicodeString& ID, Transliterator::Token /*context*/) {
49 // XML: "" hex, min=1, max=6
50 return new EscapeTransliterator(ID, UnicodeString(TRUE, XMLPRE, 3), UnicodeString(SEMI[0]), 16, 1, TRUE, NULL);
51 }
_createEscXML10(const UnicodeString & ID,Transliterator::Token)52 static Transliterator* _createEscXML10(const UnicodeString& ID, Transliterator::Token /*context*/) {
53 // XML10: "&1114111;" dec, min=1, max=7 (not really "Any-Hex")
54 return new EscapeTransliterator(ID, UnicodeString(TRUE, XML10PRE, 2), UnicodeString(SEMI[0]), 10, 1, TRUE, NULL);
55 }
_createEscPerl(const UnicodeString & ID,Transliterator::Token)56 static Transliterator* _createEscPerl(const UnicodeString& ID, Transliterator::Token /*context*/) {
57 // Perl: "\\x{263A}" hex, min=1, max=6
58 return new EscapeTransliterator(ID, UnicodeString(TRUE, PERLPRE, 3), UnicodeString(RBRACE[0]), 16, 1, TRUE, NULL);
59 }
60
61 /**
62 * Registers standard variants with the system. Called by
63 * Transliterator during initialization.
64 */
registerIDs()65 void EscapeTransliterator::registerIDs() {
66 Token t = integerToken(0);
67
68 Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/Unicode"), _createEscUnicode, t);
69
70 Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/Java"), _createEscJava, t);
71
72 Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/C"), _createEscC, t);
73
74 Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/XML"), _createEscXML, t);
75
76 Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/XML10"), _createEscXML10, t);
77
78 Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/Perl"), _createEscPerl, t);
79
80 Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex"), _createEscJava, t);
81 }
82
83 /**
84 * Constructs an escape transliterator with the given ID and
85 * parameters. See the class member documentation for details.
86 */
EscapeTransliterator(const UnicodeString & newID,const UnicodeString & _prefix,const UnicodeString & _suffix,int32_t _radix,int32_t _minDigits,UBool _grokSupplementals,EscapeTransliterator * adoptedSupplementalHandler)87 EscapeTransliterator::EscapeTransliterator(const UnicodeString& newID,
88 const UnicodeString& _prefix, const UnicodeString& _suffix,
89 int32_t _radix, int32_t _minDigits,
90 UBool _grokSupplementals,
91 EscapeTransliterator* adoptedSupplementalHandler) :
92 Transliterator(newID, NULL)
93 {
94 this->prefix = _prefix;
95 this->suffix = _suffix;
96 this->radix = _radix;
97 this->minDigits = _minDigits;
98 this->grokSupplementals = _grokSupplementals;
99 this->supplementalHandler = adoptedSupplementalHandler;
100 }
101
102 /**
103 * Copy constructor.
104 */
EscapeTransliterator(const EscapeTransliterator & o)105 EscapeTransliterator::EscapeTransliterator(const EscapeTransliterator& o) :
106 Transliterator(o),
107 prefix(o.prefix),
108 suffix(o.suffix),
109 radix(o.radix),
110 minDigits(o.minDigits),
111 grokSupplementals(o.grokSupplementals) {
112 supplementalHandler = (o.supplementalHandler != 0) ?
113 new EscapeTransliterator(*o.supplementalHandler) : NULL;
114 }
115
~EscapeTransliterator()116 EscapeTransliterator::~EscapeTransliterator() {
117 delete supplementalHandler;
118 }
119
120 /**
121 * Transliterator API.
122 */
clone() const123 Transliterator* EscapeTransliterator::clone() const {
124 return new EscapeTransliterator(*this);
125 }
126
127 /**
128 * Implements {@link Transliterator#handleTransliterate}.
129 */
handleTransliterate(Replaceable & text,UTransPosition & pos,UBool) const130 void EscapeTransliterator::handleTransliterate(Replaceable& text,
131 UTransPosition& pos,
132 UBool /*isIncremental*/) const
133 {
134 /* TODO: Verify that isIncremental can be ignored */
135 int32_t start = pos.start;
136 int32_t limit = pos.limit;
137
138 UnicodeString buf(prefix);
139 int32_t prefixLen = prefix.length();
140 UBool redoPrefix = FALSE;
141
142 while (start < limit) {
143 int32_t c = grokSupplementals ? text.char32At(start) : text.charAt(start);
144 int32_t charLen = grokSupplementals ? U16_LENGTH(c) : 1;
145
146 if ((c & 0xFFFF0000) != 0 && supplementalHandler != NULL) {
147 buf.truncate(0);
148 buf.append(supplementalHandler->prefix);
149 ICU_Utility::appendNumber(buf, c, supplementalHandler->radix,
150 supplementalHandler->minDigits);
151 buf.append(supplementalHandler->suffix);
152 redoPrefix = TRUE;
153 } else {
154 if (redoPrefix) {
155 buf.truncate(0);
156 buf.append(prefix);
157 redoPrefix = FALSE;
158 } else {
159 buf.truncate(prefixLen);
160 }
161 ICU_Utility::appendNumber(buf, c, radix, minDigits);
162 buf.append(suffix);
163 }
164
165 text.handleReplaceBetween(start, start + charLen, buf);
166 start += buf.length();
167 limit += buf.length() - charLen;
168 }
169
170 pos.contextLimit += limit - pos.limit;
171 pos.limit = limit;
172 pos.start = start;
173 }
174
175 U_NAMESPACE_END
176
177 #endif /* #if !UCONFIG_NO_TRANSLITERATION */
178
179 //eof
180