1 /**
2 *******************************************************************************
3 * Copyright (C) 1996-2006, International Business Machines Corporation and *
4 * others. All Rights Reserved. *
5 *******************************************************************************
6 *
7 *
8 *******************************************************************************
9 */
10 /*
11 * @(#) icujniinterface.c 1.2 00/10/11
12 *
13 * (C) Copyright IBM Corp. 2000 - All Rights Reserved
14 * A JNI wrapper to ICU native converter Interface
15 * @author: Ram Viswanadha
16 */
17
18 #define LOG_TAG "NativeConverter"
19
20 #include "ErrorCode.h"
21 #include "JNIHelp.h"
22 #include "JniConstants.h"
23 #include "ScopedLocalRef.h"
24 #include "ScopedPrimitiveArray.h"
25 #include "ScopedUtfChars.h"
26 #include "UniquePtr.h"
27 #include "cutils/log.h"
28 #include "unicode/ucnv.h"
29 #include "unicode/ucnv_cb.h"
30 #include "unicode/uniset.h"
31 #include "unicode/ustring.h"
32 #include "unicode/utypes.h"
33 #include <stdlib.h>
34 #include <string.h>
35
36 #define NativeConverter_REPORT 0
37 #define NativeConverter_IGNORE 1
38 #define NativeConverter_REPLACE 2
39
40 struct DecoderCallbackContext {
41 int length;
42 UChar subUChars[256];
43 UConverterToUCallback onUnmappableInput;
44 UConverterToUCallback onMalformedInput;
45 };
46
47 struct EncoderCallbackContext {
48 int length;
49 char subBytes[256];
50 UConverterFromUCallback onUnmappableInput;
51 UConverterFromUCallback onMalformedInput;
52 };
53
54 struct UConverterDeleter {
operator ()UConverterDeleter55 void operator()(UConverter* p) const {
56 ucnv_close(p);
57 }
58 };
59 typedef UniquePtr<UConverter, UConverterDeleter> UniqueUConverter;
60
toUConverter(jlong address)61 static UConverter* toUConverter(jlong address) {
62 return reinterpret_cast<UConverter*>(static_cast<uintptr_t>(address));
63 }
64
NativeConverter_openConverter(JNIEnv * env,jclass,jstring converterName)65 static jlong NativeConverter_openConverter(JNIEnv* env, jclass, jstring converterName) {
66 ScopedUtfChars converterNameChars(env, converterName);
67 if (converterNameChars.c_str() == NULL) {
68 return 0;
69 }
70 UErrorCode errorCode = U_ZERO_ERROR;
71 UConverter* cnv = ucnv_open(converterNameChars.c_str(), &errorCode);
72 icu4jni_error(env, errorCode);
73 return reinterpret_cast<uintptr_t>(cnv);
74 }
75
NativeConverter_closeConverter(JNIEnv *,jclass,jlong address)76 static void NativeConverter_closeConverter(JNIEnv*, jclass, jlong address) {
77 ucnv_close(toUConverter(address));
78 }
79
NativeConverter_encode(JNIEnv * env,jclass,jlong address,jcharArray source,jint sourceEnd,jbyteArray target,jint targetEnd,jintArray data,jboolean flush)80 static jint NativeConverter_encode(JNIEnv* env, jclass, jlong address,
81 jcharArray source, jint sourceEnd, jbyteArray target, jint targetEnd,
82 jintArray data, jboolean flush) {
83
84 UConverter* cnv = toUConverter(address);
85 if (cnv == NULL) {
86 return U_ILLEGAL_ARGUMENT_ERROR;
87 }
88 ScopedCharArrayRO uSource(env, source);
89 if (uSource.get() == NULL) {
90 return U_ILLEGAL_ARGUMENT_ERROR;
91 }
92 ScopedByteArrayRW uTarget(env, target);
93 if (uTarget.get() == NULL) {
94 return U_ILLEGAL_ARGUMENT_ERROR;
95 }
96 ScopedIntArrayRW myData(env, data);
97 if (myData.get() == NULL) {
98 return U_ILLEGAL_ARGUMENT_ERROR;
99 }
100
101 // Do the conversion.
102 jint* sourceOffset = &myData[0];
103 jint* targetOffset = &myData[1];
104 const jchar* mySource = uSource.get() + *sourceOffset;
105 const UChar* mySourceLimit= uSource.get() + sourceEnd;
106 char* cTarget = reinterpret_cast<char*>(uTarget.get() + *targetOffset);
107 const char* cTargetLimit = reinterpret_cast<const char*>(uTarget.get() + targetEnd);
108 UErrorCode errorCode = U_ZERO_ERROR;
109 ucnv_fromUnicode(cnv , &cTarget, cTargetLimit, &mySource, mySourceLimit, NULL, (UBool) flush, &errorCode);
110 *sourceOffset = (mySource - uSource.get()) - *sourceOffset;
111 *targetOffset = (reinterpret_cast<jbyte*>(cTarget) - uTarget.get()) - *targetOffset;
112
113 // Check how much more input is necessary to complete what's in the converter's internal buffer.
114 UErrorCode minorErrorCode = U_ZERO_ERROR;
115 int32_t pending = ucnv_fromUCountPending(cnv, &minorErrorCode);
116 if (U_SUCCESS(minorErrorCode)) {
117 myData[3] = pending;
118 }
119
120 // If there was an error, count the problematic characters.
121 if (errorCode == U_ILLEGAL_CHAR_FOUND || errorCode == U_INVALID_CHAR_FOUND) {
122 int8_t len = 32;
123 UChar invalidUChars[32];
124 ucnv_getInvalidUChars(cnv, invalidUChars, &len, &minorErrorCode);
125 if (U_SUCCESS(minorErrorCode)) {
126 myData[2] = len;
127 }
128 }
129 return errorCode;
130 }
131
NativeConverter_decode(JNIEnv * env,jclass,jlong address,jbyteArray source,jint sourceEnd,jcharArray target,jint targetEnd,jintArray data,jboolean flush)132 static jint NativeConverter_decode(JNIEnv* env, jclass, jlong address,
133 jbyteArray source, jint sourceEnd, jcharArray target, jint targetEnd,
134 jintArray data, jboolean flush) {
135
136 UConverter* cnv = toUConverter(address);
137 if (cnv == NULL) {
138 return U_ILLEGAL_ARGUMENT_ERROR;
139 }
140 ScopedByteArrayRO uSource(env, source);
141 if (uSource.get() == NULL) {
142 return U_ILLEGAL_ARGUMENT_ERROR;
143 }
144 ScopedCharArrayRW uTarget(env, target);
145 if (uTarget.get() == NULL) {
146 return U_ILLEGAL_ARGUMENT_ERROR;
147 }
148 ScopedIntArrayRW myData(env, data);
149 if (myData.get() == NULL) {
150 return U_ILLEGAL_ARGUMENT_ERROR;
151 }
152
153 // Do the conversion.
154 jint* sourceOffset = &myData[0];
155 jint* targetOffset = &myData[1];
156 const char* mySource = reinterpret_cast<const char*>(uSource.get() + *sourceOffset);
157 const char* mySourceLimit = reinterpret_cast<const char*>(uSource.get() + sourceEnd);
158 UChar* cTarget = uTarget.get() + *targetOffset;
159 const UChar* cTargetLimit = uTarget.get() + targetEnd;
160 UErrorCode errorCode = U_ZERO_ERROR;
161 ucnv_toUnicode(cnv, &cTarget, cTargetLimit, &mySource, mySourceLimit, NULL, flush, &errorCode);
162 *sourceOffset = mySource - reinterpret_cast<const char*>(uSource.get()) - *sourceOffset;
163 *targetOffset = cTarget - uTarget.get() - *targetOffset;
164
165 // Check how much more input is necessary to complete what's in the converter's internal buffer.
166 UErrorCode minorErrorCode = U_ZERO_ERROR;
167 jint pending = ucnv_toUCountPending(cnv, &minorErrorCode);
168 myData[3] = pending;
169
170 // If there was an error, count the problematic bytes.
171 if (errorCode == U_ILLEGAL_CHAR_FOUND || errorCode == U_INVALID_CHAR_FOUND) {
172 int8_t len = 32;
173 char invalidChars[32] = {'\0'};
174 ucnv_getInvalidChars(cnv, invalidChars, &len, &minorErrorCode);
175 if (U_SUCCESS(minorErrorCode)) {
176 myData[2] = len;
177 }
178 }
179
180 return errorCode;
181 }
182
NativeConverter_resetByteToChar(JNIEnv *,jclass,jlong address)183 static void NativeConverter_resetByteToChar(JNIEnv*, jclass, jlong address) {
184 UConverter* cnv = toUConverter(address);
185 if (cnv) {
186 ucnv_resetToUnicode(cnv);
187 }
188 }
189
NativeConverter_resetCharToByte(JNIEnv *,jclass,jlong address)190 static void NativeConverter_resetCharToByte(JNIEnv*, jclass, jlong address) {
191 UConverter* cnv = toUConverter(address);
192 if (cnv) {
193 ucnv_resetFromUnicode(cnv);
194 }
195 }
196
NativeConverter_getMaxBytesPerChar(JNIEnv *,jclass,jlong address)197 static jint NativeConverter_getMaxBytesPerChar(JNIEnv*, jclass, jlong address) {
198 UConverter* cnv = toUConverter(address);
199 return (cnv != NULL) ? ucnv_getMaxCharSize(cnv) : -1;
200 }
201
NativeConverter_getMinBytesPerChar(JNIEnv *,jclass,jlong address)202 static jint NativeConverter_getMinBytesPerChar(JNIEnv*, jclass, jlong address) {
203 UConverter* cnv = toUConverter(address);
204 return (cnv != NULL) ? ucnv_getMinCharSize(cnv) : -1;
205 }
206
NativeConverter_getAveBytesPerChar(JNIEnv *,jclass,jlong address)207 static jfloat NativeConverter_getAveBytesPerChar(JNIEnv*, jclass, jlong address) {
208 UConverter* cnv = toUConverter(address);
209 return (cnv != NULL) ? ((ucnv_getMaxCharSize(cnv) + ucnv_getMinCharSize(cnv)) / 2.0) : -1;
210 }
211
NativeConverter_flushByteToChar(JNIEnv * env,jclass,jlong address,jcharArray target,jint targetEnd,jintArray data)212 static jint NativeConverter_flushByteToChar(JNIEnv* env, jclass, jlong address,
213 jcharArray target, jint targetEnd, jintArray data) {
214 UConverter* cnv = toUConverter(address);
215 if (cnv == NULL) {
216 return U_ILLEGAL_ARGUMENT_ERROR;
217 }
218 ScopedCharArrayRW uTarget(env, target);
219 if (uTarget.get() == NULL) {
220 return U_ILLEGAL_ARGUMENT_ERROR;
221 }
222 ScopedIntArrayRW myData(env, data);
223 if (myData.get() == NULL) {
224 return U_ILLEGAL_ARGUMENT_ERROR;
225 }
226 char source = '\0';
227 jint* targetOffset = &myData[1];
228 const char* mySource = &source;
229 const char* mySourceLimit = &source;
230 UChar* cTarget = uTarget.get() + *targetOffset;
231 const UChar* cTargetLimit = uTarget.get() + targetEnd;
232 UErrorCode errorCode = U_ZERO_ERROR;
233 ucnv_toUnicode(cnv, &cTarget, cTargetLimit, &mySource, mySourceLimit, NULL, TRUE, &errorCode);
234 *targetOffset = cTarget - uTarget.get() - *targetOffset;
235 return errorCode;
236 }
237
NativeConverter_flushCharToByte(JNIEnv * env,jclass,jlong address,jbyteArray target,jint targetEnd,jintArray data)238 static jint NativeConverter_flushCharToByte(JNIEnv* env, jclass, jlong address,
239 jbyteArray target, jint targetEnd, jintArray data) {
240 UConverter* cnv = toUConverter(address);
241 if (cnv == NULL) {
242 return U_ILLEGAL_ARGUMENT_ERROR;
243 }
244 ScopedByteArrayRW uTarget(env, target);
245 if (uTarget.get() == NULL) {
246 return U_ILLEGAL_ARGUMENT_ERROR;
247 }
248 ScopedIntArrayRW myData(env, data);
249 if (myData.get() == NULL) {
250 return U_ILLEGAL_ARGUMENT_ERROR;
251 }
252 jchar source = '\0';
253 jint* targetOffset = &myData[1];
254 const jchar* mySource = &source;
255 const UChar* mySourceLimit= &source;
256 char* cTarget = reinterpret_cast<char*>(uTarget.get() + *targetOffset);
257 const char* cTargetLimit = reinterpret_cast<char*>(uTarget.get() + targetEnd);
258 UErrorCode errorCode = U_ZERO_ERROR;
259 ucnv_fromUnicode(cnv, &cTarget, cTargetLimit, &mySource, mySourceLimit, NULL, TRUE, &errorCode);
260 *targetOffset = reinterpret_cast<jbyte*>(cTarget) - uTarget.get() - *targetOffset;
261 return errorCode;
262 }
263
NativeConverter_canEncode(JNIEnv *,jclass,jlong address,jint codeUnit)264 static jboolean NativeConverter_canEncode(JNIEnv*, jclass, jlong address, jint codeUnit) {
265 UErrorCode errorCode = U_ZERO_ERROR;
266 UConverter* cnv = toUConverter(address);
267 if (cnv == NULL) {
268 return JNI_FALSE;
269 }
270
271 UChar srcBuffer[3];
272 const UChar* src = &srcBuffer[0];
273 const UChar* srcLimit = (codeUnit < 0x10000) ? &src[1] : &src[2];
274
275 char dstBuffer[5];
276 char* dst = &dstBuffer[0];
277 const char* dstLimit = &dstBuffer[4];
278
279 int i = 0;
280 UTF_APPEND_CHAR(&srcBuffer[0], i, 2, codeUnit);
281
282 ucnv_fromUnicode(cnv, &dst, dstLimit, &src, srcLimit, NULL, TRUE, &errorCode);
283 return U_SUCCESS(errorCode);
284 }
285
286 /*
287 * If a charset listed in the IANA Charset Registry is supported by an implementation
288 * of the Java platform then its canonical name must be the name listed in the registry.
289 * Many charsets are given more than one name in the registry, in which case the registry
290 * identifies one of the names as MIME-preferred. If a charset has more than one registry
291 * name then its canonical name must be the MIME-preferred name and the other names in
292 * the registry must be valid aliases. If a supported charset is not listed in the IANA
293 * registry then its canonical name must begin with one of the strings "X-" or "x-".
294 */
getJavaCanonicalName(JNIEnv * env,const char * icuCanonicalName)295 static jstring getJavaCanonicalName(JNIEnv* env, const char* icuCanonicalName) {
296 UErrorCode status = U_ZERO_ERROR;
297
298 // Check to see if this is a well-known MIME or IANA name.
299 const char* cName = NULL;
300 if ((cName = ucnv_getStandardName(icuCanonicalName, "MIME", &status)) != NULL) {
301 return env->NewStringUTF(cName);
302 } else if ((cName = ucnv_getStandardName(icuCanonicalName, "IANA", &status)) != NULL) {
303 return env->NewStringUTF(cName);
304 }
305
306 // Check to see if an alias already exists with "x-" prefix, if yes then
307 // make that the canonical name.
308 int32_t aliasCount = ucnv_countAliases(icuCanonicalName, &status);
309 for (int i = 0; i < aliasCount; ++i) {
310 const char* name = ucnv_getAlias(icuCanonicalName, i, &status);
311 if (name != NULL && name[0] == 'x' && name[1] == '-') {
312 return env->NewStringUTF(name);
313 }
314 }
315
316 // As a last resort, prepend "x-" to any alias and make that the canonical name.
317 status = U_ZERO_ERROR;
318 const char* name = ucnv_getStandardName(icuCanonicalName, "UTR22", &status);
319 if (name == NULL && strchr(icuCanonicalName, ',') != NULL) {
320 name = ucnv_getAlias(icuCanonicalName, 1, &status);
321 }
322 // If there is no UTR22 canonical name then just return the original name.
323 if (name == NULL) {
324 name = icuCanonicalName;
325 }
326 UniquePtr<char[]> result(new char[2 + strlen(name) + 1]);
327 strcpy(&result[0], "x-");
328 strcat(&result[0], name);
329 return env->NewStringUTF(&result[0]);
330 }
331
NativeConverter_getAvailableCharsetNames(JNIEnv * env,jclass)332 static jobjectArray NativeConverter_getAvailableCharsetNames(JNIEnv* env, jclass) {
333 int32_t num = ucnv_countAvailable();
334 jobjectArray result = env->NewObjectArray(num, JniConstants::stringClass, NULL);
335 for (int i = 0; i < num; ++i) {
336 const char* name = ucnv_getAvailableName(i);
337 ScopedLocalRef<jstring> javaCanonicalName(env, getJavaCanonicalName(env, name));
338 env->SetObjectArrayElement(result, i, javaCanonicalName.get());
339 }
340 return result;
341 }
342
getAliases(JNIEnv * env,const char * icuCanonicalName)343 static jobjectArray getAliases(JNIEnv* env, const char* icuCanonicalName) {
344 // Get an upper bound on the number of aliases...
345 const char* myEncName = icuCanonicalName;
346 UErrorCode error = U_ZERO_ERROR;
347 int32_t aliasCount = ucnv_countAliases(myEncName, &error);
348 if (aliasCount == 0 && myEncName[0] == 'x' && myEncName[1] == '-') {
349 myEncName = myEncName + 2;
350 aliasCount = ucnv_countAliases(myEncName, &error);
351 }
352 if (!U_SUCCESS(error)) {
353 return NULL;
354 }
355
356 // Collect the aliases we want...
357 const char* aliasArray[aliasCount];
358 int actualAliasCount = 0;
359 for(int i = 0; i < aliasCount; ++i) {
360 const char* name = ucnv_getAlias(myEncName, (uint16_t) i, &error);
361 if (!U_SUCCESS(error)) {
362 return NULL;
363 }
364 // TODO: why do we ignore these ones?
365 if (strchr(name, '+') == 0 && strchr(name, ',') == 0) {
366 aliasArray[actualAliasCount++]= name;
367 }
368 }
369
370 // Convert our C++ char*[] into a Java String[]...
371 jobjectArray result = env->NewObjectArray(actualAliasCount, JniConstants::stringClass, NULL);
372 for (int i = 0; i < actualAliasCount; ++i) {
373 ScopedLocalRef<jstring> alias(env, env->NewStringUTF(aliasArray[i]));
374 env->SetObjectArrayElement(result, i, alias.get());
375 }
376 return result;
377 }
378
getICUCanonicalName(const char * name)379 static const char* getICUCanonicalName(const char* name) {
380 UErrorCode error = U_ZERO_ERROR;
381 const char* canonicalName = NULL;
382 if ((canonicalName = ucnv_getCanonicalName(name, "MIME", &error)) != NULL) {
383 return canonicalName;
384 } else if((canonicalName = ucnv_getCanonicalName(name, "IANA", &error)) != NULL) {
385 return canonicalName;
386 } else if((canonicalName = ucnv_getCanonicalName(name, "", &error)) != NULL) {
387 return canonicalName;
388 } else if((canonicalName = ucnv_getAlias(name, 0, &error)) != NULL) {
389 /* we have some aliases in the form x-blah .. match those first */
390 return canonicalName;
391 } else if (strstr(name, "x-") == name) {
392 /* check if the converter can be opened with the name given */
393 error = U_ZERO_ERROR;
394 UniqueUConverter cnv(ucnv_open(name + 2, &error));
395 if (cnv.get() != NULL) {
396 return name + 2;
397 }
398 }
399 return NULL;
400 }
401
CHARSET_ENCODER_CALLBACK(const void * rawContext,UConverterFromUnicodeArgs * args,const UChar * codeUnits,int32_t length,UChar32 codePoint,UConverterCallbackReason reason,UErrorCode * status)402 static void CHARSET_ENCODER_CALLBACK(const void* rawContext, UConverterFromUnicodeArgs* args,
403 const UChar* codeUnits, int32_t length, UChar32 codePoint, UConverterCallbackReason reason,
404 UErrorCode* status) {
405 if (!rawContext) {
406 return;
407 }
408 const EncoderCallbackContext* ctx = reinterpret_cast<const EncoderCallbackContext*>(rawContext);
409 switch(reason) {
410 case UCNV_UNASSIGNED:
411 ctx->onUnmappableInput(ctx, args, codeUnits, length, codePoint, reason, status);
412 return;
413 case UCNV_ILLEGAL:
414 case UCNV_IRREGULAR:
415 ctx->onMalformedInput(ctx, args, codeUnits, length, codePoint, reason, status);
416 return;
417 case UCNV_CLOSE:
418 delete ctx;
419 return;
420 default:
421 *status = U_ILLEGAL_ARGUMENT_ERROR;
422 return;
423 }
424 }
425
encoderReplaceCallback(const void * rawContext,UConverterFromUnicodeArgs * fromArgs,const UChar *,int32_t,UChar32,UConverterCallbackReason,UErrorCode * err)426 static void encoderReplaceCallback(const void* rawContext,
427 UConverterFromUnicodeArgs* fromArgs, const UChar*, int32_t, UChar32,
428 UConverterCallbackReason, UErrorCode * err) {
429 if (rawContext == NULL) {
430 return;
431 }
432 const EncoderCallbackContext* context = reinterpret_cast<const EncoderCallbackContext*>(rawContext);
433 *err = U_ZERO_ERROR;
434 ucnv_cbFromUWriteBytes(fromArgs, context->subBytes, context->length, 0, err);
435 }
436
getFromUCallback(int32_t mode)437 static UConverterFromUCallback getFromUCallback(int32_t mode) {
438 switch(mode) {
439 case NativeConverter_REPORT:
440 return UCNV_FROM_U_CALLBACK_STOP;
441 case NativeConverter_IGNORE:
442 return UCNV_FROM_U_CALLBACK_SKIP;
443 case NativeConverter_REPLACE:
444 return encoderReplaceCallback;
445 }
446 abort();
447 }
448
NativeConverter_setCallbackEncode(JNIEnv * env,jclass,jlong address,jint onMalformedInput,jint onUnmappableInput,jbyteArray subBytes)449 static jint NativeConverter_setCallbackEncode(JNIEnv* env, jclass, jlong address,
450 jint onMalformedInput, jint onUnmappableInput, jbyteArray subBytes) {
451 UConverter* cnv = toUConverter(address);
452 if (!cnv) {
453 return U_ILLEGAL_ARGUMENT_ERROR;
454 }
455 UConverterFromUCallback fromUOldAction = NULL;
456 const void* fromUOldContext = NULL;
457 ucnv_getFromUCallBack(cnv, &fromUOldAction, const_cast<const void**>(&fromUOldContext));
458
459 /* fromUOldContext can only be DecodeCallbackContext since
460 * the converter created is private data for the decoder
461 * and callbacks can only be set via this method!
462 */
463 EncoderCallbackContext* fromUNewContext=NULL;
464 UConverterFromUCallback fromUNewAction=NULL;
465 if (fromUOldContext == NULL) {
466 fromUNewContext = new EncoderCallbackContext;
467 fromUNewAction = CHARSET_ENCODER_CALLBACK;
468 } else {
469 fromUNewContext = const_cast<EncoderCallbackContext*>(
470 reinterpret_cast<const EncoderCallbackContext*>(fromUOldContext));
471 fromUNewAction = fromUOldAction;
472 fromUOldAction = NULL;
473 fromUOldContext = NULL;
474 }
475 fromUNewContext->onMalformedInput = getFromUCallback(onMalformedInput);
476 fromUNewContext->onUnmappableInput = getFromUCallback(onUnmappableInput);
477 ScopedByteArrayRO sub(env, subBytes);
478 if (sub.get() == NULL) {
479 return U_ILLEGAL_ARGUMENT_ERROR;
480 }
481 fromUNewContext->length = sub.size();
482 memcpy(fromUNewContext->subBytes, sub.get(), sub.size());
483 UErrorCode errorCode = U_ZERO_ERROR;
484 ucnv_setFromUCallBack(cnv, fromUNewAction, fromUNewContext, &fromUOldAction, &fromUOldContext,
485 &errorCode);
486 return errorCode;
487 }
488
decoderIgnoreCallback(const void *,UConverterToUnicodeArgs *,const char *,int32_t,UConverterCallbackReason,UErrorCode * err)489 static void decoderIgnoreCallback(const void*, UConverterToUnicodeArgs*, const char*, int32_t, UConverterCallbackReason, UErrorCode* err) {
490 // The icu4c UCNV_FROM_U_CALLBACK_SKIP callback requires that the context is NULL, which is
491 // never true for us.
492 *err = U_ZERO_ERROR;
493 }
494
decoderReplaceCallback(const void * rawContext,UConverterToUnicodeArgs * toArgs,const char *,int32_t,UConverterCallbackReason,UErrorCode * err)495 static void decoderReplaceCallback(const void* rawContext,
496 UConverterToUnicodeArgs* toArgs, const char*, int32_t, UConverterCallbackReason,
497 UErrorCode* err) {
498 if (!rawContext) {
499 return;
500 }
501 const DecoderCallbackContext* context = reinterpret_cast<const DecoderCallbackContext*>(rawContext);
502 *err = U_ZERO_ERROR;
503 ucnv_cbToUWriteUChars(toArgs,context->subUChars, context->length, 0, err);
504 }
505
getToUCallback(int32_t mode)506 static UConverterToUCallback getToUCallback(int32_t mode) {
507 switch (mode) {
508 case NativeConverter_IGNORE: return decoderIgnoreCallback;
509 case NativeConverter_REPLACE: return decoderReplaceCallback;
510 case NativeConverter_REPORT: return UCNV_TO_U_CALLBACK_STOP;
511 }
512 abort();
513 }
514
CHARSET_DECODER_CALLBACK(const void * rawContext,UConverterToUnicodeArgs * args,const char * codeUnits,int32_t length,UConverterCallbackReason reason,UErrorCode * status)515 static void CHARSET_DECODER_CALLBACK(const void* rawContext, UConverterToUnicodeArgs* args,
516 const char* codeUnits, int32_t length,
517 UConverterCallbackReason reason, UErrorCode* status) {
518 if (!rawContext) {
519 return;
520 }
521 const DecoderCallbackContext* ctx = reinterpret_cast<const DecoderCallbackContext*>(rawContext);
522 switch(reason) {
523 case UCNV_UNASSIGNED:
524 ctx->onUnmappableInput(ctx, args, codeUnits, length, reason, status);
525 return;
526 case UCNV_ILLEGAL:
527 case UCNV_IRREGULAR:
528 ctx->onMalformedInput(ctx, args, codeUnits, length, reason, status);
529 return;
530 case UCNV_CLOSE:
531 delete ctx;
532 return;
533 default:
534 *status = U_ILLEGAL_ARGUMENT_ERROR;
535 return;
536 }
537 }
538
NativeConverter_setCallbackDecode(JNIEnv * env,jclass,jlong address,jint onMalformedInput,jint onUnmappableInput,jcharArray subChars)539 static jint NativeConverter_setCallbackDecode(JNIEnv* env, jclass, jlong address,
540 jint onMalformedInput, jint onUnmappableInput, jcharArray subChars) {
541 UConverter* cnv = toUConverter(address);
542 if (cnv == NULL) {
543 return U_ILLEGAL_ARGUMENT_ERROR;
544 }
545
546 UConverterToUCallback toUOldAction;
547 const void* toUOldContext;
548 ucnv_getToUCallBack(cnv, &toUOldAction, &toUOldContext);
549
550 /* toUOldContext can only be DecodeCallbackContext since
551 * the converter created is private data for the decoder
552 * and callbacks can only be set via this method!
553 */
554 DecoderCallbackContext* toUNewContext = NULL;
555 UConverterToUCallback toUNewAction = NULL;
556 if (toUOldContext == NULL) {
557 toUNewContext = new DecoderCallbackContext;
558 toUNewAction = CHARSET_DECODER_CALLBACK;
559 } else {
560 toUNewContext = const_cast<DecoderCallbackContext*>(
561 reinterpret_cast<const DecoderCallbackContext*>(toUOldContext));
562 toUNewAction = toUOldAction;
563 toUOldAction = NULL;
564 toUOldContext = NULL;
565 }
566 toUNewContext->onMalformedInput = getToUCallback(onMalformedInput);
567 toUNewContext->onUnmappableInput = getToUCallback(onUnmappableInput);
568 ScopedCharArrayRO sub(env, subChars);
569 if (sub.get() == NULL) {
570 return U_ILLEGAL_ARGUMENT_ERROR;
571 }
572 toUNewContext->length = sub.size();
573 u_strncpy(toUNewContext->subUChars, sub.get(), sub.size());
574 UErrorCode errorCode = U_ZERO_ERROR;
575 ucnv_setToUCallBack(cnv, toUNewAction, toUNewContext, &toUOldAction, &toUOldContext,
576 &errorCode);
577 return errorCode;
578 }
579
NativeConverter_getAveCharsPerByte(JNIEnv * env,jclass,jlong handle)580 static jfloat NativeConverter_getAveCharsPerByte(JNIEnv* env, jclass, jlong handle) {
581 return (1 / (jfloat) NativeConverter_getMaxBytesPerChar(env, NULL, handle));
582 }
583
NativeConverter_getSubstitutionBytes(JNIEnv * env,jclass,jlong address)584 static jbyteArray NativeConverter_getSubstitutionBytes(JNIEnv* env, jclass, jlong address) {
585 UConverter* cnv = toUConverter(address);
586 if (cnv == NULL) {
587 return NULL;
588 }
589 UErrorCode status = U_ZERO_ERROR;
590 char subBytes[10];
591 int8_t len = sizeof(subBytes);
592 ucnv_getSubstChars(cnv, subBytes, &len, &status);
593 if (!U_SUCCESS(status)) {
594 return env->NewByteArray(0);
595 }
596 jbyteArray result = env->NewByteArray(len);
597 if (result == NULL) {
598 return NULL;
599 }
600 env->SetByteArrayRegion(result, 0, len, reinterpret_cast<jbyte*>(subBytes));
601 return result;
602 }
603
NativeConverter_contains(JNIEnv * env,jclass,jstring name1,jstring name2)604 static jboolean NativeConverter_contains(JNIEnv* env, jclass, jstring name1, jstring name2) {
605 ScopedUtfChars name1Chars(env, name1);
606 if (name1Chars.c_str() == NULL) {
607 return JNI_FALSE;
608 }
609 ScopedUtfChars name2Chars(env, name2);
610 if (name2Chars.c_str() == NULL) {
611 return JNI_FALSE;
612 }
613
614 UErrorCode errorCode = U_ZERO_ERROR;
615 UniqueUConverter converter1(ucnv_open(name1Chars.c_str(), &errorCode));
616 UnicodeSet set1;
617 ucnv_getUnicodeSet(converter1.get(), set1.toUSet(), UCNV_ROUNDTRIP_SET, &errorCode);
618
619 UniqueUConverter converter2(ucnv_open(name2Chars.c_str(), &errorCode));
620 UnicodeSet set2;
621 ucnv_getUnicodeSet(converter2.get(), set2.toUSet(), UCNV_ROUNDTRIP_SET, &errorCode);
622
623 return U_SUCCESS(errorCode) && set1.containsAll(set2);
624 }
625
NativeConverter_charsetForName(JNIEnv * env,jclass,jstring charsetName)626 static jobject NativeConverter_charsetForName(JNIEnv* env, jclass, jstring charsetName) {
627 ScopedUtfChars charsetNameChars(env, charsetName);
628 if (charsetNameChars.c_str() == NULL) {
629 return NULL;
630 }
631 // Get ICU's canonical name for this charset.
632 const char* icuCanonicalName = getICUCanonicalName(charsetNameChars.c_str());
633 if (icuCanonicalName == NULL) {
634 return NULL;
635 }
636 // Get Java's canonical name for this charset.
637 jstring javaCanonicalName = getJavaCanonicalName(env, icuCanonicalName);
638 if (env->ExceptionOccurred()) {
639 return NULL;
640 }
641
642 // Check that this charset is supported.
643 // ICU doesn't offer any "isSupported", so we just open and immediately close.
644 // We ignore the UErrorCode because ucnv_open returning NULL is all the information we need.
645 UErrorCode dummy = U_ZERO_ERROR;
646 UniqueUConverter cnv(ucnv_open(icuCanonicalName, &dummy));
647 if (cnv.get() == NULL) {
648 return NULL;
649 }
650 cnv.reset();
651
652 // Get the aliases for this charset.
653 jobjectArray aliases = getAliases(env, icuCanonicalName);
654 if (env->ExceptionOccurred()) {
655 return NULL;
656 }
657
658 // Construct the CharsetICU object.
659 jmethodID charsetConstructor = env->GetMethodID(JniConstants::charsetICUClass, "<init>",
660 "(Ljava/lang/String;Ljava/lang/String;[Ljava/lang/String;)V");
661 if (env->ExceptionOccurred()) {
662 return NULL;
663 }
664 return env->NewObject(JniConstants::charsetICUClass, charsetConstructor,
665 javaCanonicalName, env->NewStringUTF(icuCanonicalName), aliases);
666 }
667
668 static JNINativeMethod gMethods[] = {
669 NATIVE_METHOD(NativeConverter, canEncode, "(JI)Z"),
670 NATIVE_METHOD(NativeConverter, charsetForName, "(Ljava/lang/String;)Ljava/nio/charset/Charset;"),
671 NATIVE_METHOD(NativeConverter, closeConverter, "(J)V"),
672 NATIVE_METHOD(NativeConverter, contains, "(Ljava/lang/String;Ljava/lang/String;)Z"),
673 NATIVE_METHOD(NativeConverter, decode, "(J[BI[CI[IZ)I"),
674 NATIVE_METHOD(NativeConverter, encode, "(J[CI[BI[IZ)I"),
675 NATIVE_METHOD(NativeConverter, flushByteToChar, "(J[CI[I)I"),
676 NATIVE_METHOD(NativeConverter, flushCharToByte, "(J[BI[I)I"),
677 NATIVE_METHOD(NativeConverter, getAvailableCharsetNames, "()[Ljava/lang/String;"),
678 NATIVE_METHOD(NativeConverter, getAveBytesPerChar, "(J)F"),
679 NATIVE_METHOD(NativeConverter, getAveCharsPerByte, "(J)F"),
680 NATIVE_METHOD(NativeConverter, getMaxBytesPerChar, "(J)I"),
681 NATIVE_METHOD(NativeConverter, getMinBytesPerChar, "(J)I"),
682 NATIVE_METHOD(NativeConverter, getSubstitutionBytes, "(J)[B"),
683 NATIVE_METHOD(NativeConverter, openConverter, "(Ljava/lang/String;)J"),
684 NATIVE_METHOD(NativeConverter, resetByteToChar, "(J)V"),
685 NATIVE_METHOD(NativeConverter, resetCharToByte, "(J)V"),
686 NATIVE_METHOD(NativeConverter, setCallbackDecode, "(JII[C)I"),
687 NATIVE_METHOD(NativeConverter, setCallbackEncode, "(JII[B)I"),
688 };
register_com_ibm_icu4jni_converters_NativeConverter(JNIEnv * env)689 int register_com_ibm_icu4jni_converters_NativeConverter(JNIEnv* env) {
690 return jniRegisterNativeMethods(env, "com/ibm/icu4jni/charset/NativeConverter",
691 gMethods, NELEM(gMethods));
692 }
693