• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2 *******************************************************************************
3 * Copyright (C) 1996-2006, International Business Machines Corporation and    *
4 * others. All Rights Reserved.                                                *
5 *******************************************************************************
6 *
7 *
8 *******************************************************************************
9 */
10 /*
11  *  @(#) icujniinterface.c	1.2 00/10/11
12  *
13  * (C) Copyright IBM Corp. 2000 - All Rights Reserved
14  *  A JNI wrapper to ICU native converter Interface
15  * @author: Ram Viswanadha
16  */
17 
18 #define LOG_TAG "NativeConverter"
19 
20 #include "ErrorCode.h"
21 #include "JNIHelp.h"
22 #include "JniConstants.h"
23 #include "ScopedLocalRef.h"
24 #include "ScopedPrimitiveArray.h"
25 #include "ScopedUtfChars.h"
26 #include "UniquePtr.h"
27 #include "cutils/log.h"
28 #include "unicode/ucnv.h"
29 #include "unicode/ucnv_cb.h"
30 #include "unicode/uniset.h"
31 #include "unicode/ustring.h"
32 #include "unicode/utypes.h"
33 #include <stdlib.h>
34 #include <string.h>
35 
36 #define NativeConverter_REPORT 0
37 #define NativeConverter_IGNORE 1
38 #define NativeConverter_REPLACE 2
39 
40 struct DecoderCallbackContext {
41     int length;
42     UChar subUChars[256];
43     UConverterToUCallback onUnmappableInput;
44     UConverterToUCallback onMalformedInput;
45 };
46 
47 struct EncoderCallbackContext {
48     int length;
49     char subBytes[256];
50     UConverterFromUCallback onUnmappableInput;
51     UConverterFromUCallback onMalformedInput;
52 };
53 
54 struct UConverterDeleter {
operator ()UConverterDeleter55     void operator()(UConverter* p) const {
56         ucnv_close(p);
57     }
58 };
59 typedef UniquePtr<UConverter, UConverterDeleter> UniqueUConverter;
60 
toUConverter(jlong address)61 static UConverter* toUConverter(jlong address) {
62     return reinterpret_cast<UConverter*>(static_cast<uintptr_t>(address));
63 }
64 
NativeConverter_openConverter(JNIEnv * env,jclass,jstring converterName)65 static jlong NativeConverter_openConverter(JNIEnv* env, jclass, jstring converterName) {
66     ScopedUtfChars converterNameChars(env, converterName);
67     if (converterNameChars.c_str() == NULL) {
68         return 0;
69     }
70     UErrorCode errorCode = U_ZERO_ERROR;
71     UConverter* cnv = ucnv_open(converterNameChars.c_str(), &errorCode);
72     icu4jni_error(env, errorCode);
73     return reinterpret_cast<uintptr_t>(cnv);
74 }
75 
NativeConverter_closeConverter(JNIEnv *,jclass,jlong address)76 static void NativeConverter_closeConverter(JNIEnv*, jclass, jlong address) {
77     ucnv_close(toUConverter(address));
78 }
79 
NativeConverter_encode(JNIEnv * env,jclass,jlong address,jcharArray source,jint sourceEnd,jbyteArray target,jint targetEnd,jintArray data,jboolean flush)80 static jint NativeConverter_encode(JNIEnv* env, jclass, jlong address,
81         jcharArray source, jint sourceEnd, jbyteArray target, jint targetEnd,
82         jintArray data, jboolean flush) {
83 
84     UConverter* cnv = toUConverter(address);
85     if (cnv == NULL) {
86         return U_ILLEGAL_ARGUMENT_ERROR;
87     }
88     ScopedCharArrayRO uSource(env, source);
89     if (uSource.get() == NULL) {
90         return U_ILLEGAL_ARGUMENT_ERROR;
91     }
92     ScopedByteArrayRW uTarget(env, target);
93     if (uTarget.get() == NULL) {
94         return U_ILLEGAL_ARGUMENT_ERROR;
95     }
96     ScopedIntArrayRW myData(env, data);
97     if (myData.get() == NULL) {
98         return U_ILLEGAL_ARGUMENT_ERROR;
99     }
100 
101     // Do the conversion.
102     jint* sourceOffset = &myData[0];
103     jint* targetOffset = &myData[1];
104     const jchar* mySource = uSource.get() + *sourceOffset;
105     const UChar* mySourceLimit= uSource.get() + sourceEnd;
106     char* cTarget = reinterpret_cast<char*>(uTarget.get() + *targetOffset);
107     const char* cTargetLimit = reinterpret_cast<const char*>(uTarget.get() + targetEnd);
108     UErrorCode errorCode = U_ZERO_ERROR;
109     ucnv_fromUnicode(cnv , &cTarget, cTargetLimit, &mySource, mySourceLimit, NULL, (UBool) flush, &errorCode);
110     *sourceOffset = (mySource - uSource.get()) - *sourceOffset;
111     *targetOffset = (reinterpret_cast<jbyte*>(cTarget) - uTarget.get()) - *targetOffset;
112 
113     // Check how much more input is necessary to complete what's in the converter's internal buffer.
114     UErrorCode minorErrorCode = U_ZERO_ERROR;
115     int32_t pending = ucnv_fromUCountPending(cnv, &minorErrorCode);
116     if (U_SUCCESS(minorErrorCode)) {
117         myData[3] = pending;
118     }
119 
120     // If there was an error, count the problematic characters.
121     if (errorCode == U_ILLEGAL_CHAR_FOUND || errorCode == U_INVALID_CHAR_FOUND) {
122         int8_t len = 32;
123         UChar invalidUChars[32];
124         ucnv_getInvalidUChars(cnv, invalidUChars, &len, &minorErrorCode);
125         if (U_SUCCESS(minorErrorCode)) {
126             myData[2] = len;
127         }
128     }
129     return errorCode;
130 }
131 
NativeConverter_decode(JNIEnv * env,jclass,jlong address,jbyteArray source,jint sourceEnd,jcharArray target,jint targetEnd,jintArray data,jboolean flush)132 static jint NativeConverter_decode(JNIEnv* env, jclass, jlong address,
133         jbyteArray source, jint sourceEnd, jcharArray target, jint targetEnd,
134         jintArray data, jboolean flush) {
135 
136     UConverter* cnv = toUConverter(address);
137     if (cnv == NULL) {
138         return U_ILLEGAL_ARGUMENT_ERROR;
139     }
140     ScopedByteArrayRO uSource(env, source);
141     if (uSource.get() == NULL) {
142         return U_ILLEGAL_ARGUMENT_ERROR;
143     }
144     ScopedCharArrayRW uTarget(env, target);
145     if (uTarget.get() == NULL) {
146         return U_ILLEGAL_ARGUMENT_ERROR;
147     }
148     ScopedIntArrayRW myData(env, data);
149     if (myData.get() == NULL) {
150         return U_ILLEGAL_ARGUMENT_ERROR;
151     }
152 
153     // Do the conversion.
154     jint* sourceOffset = &myData[0];
155     jint* targetOffset = &myData[1];
156     const char* mySource = reinterpret_cast<const char*>(uSource.get() + *sourceOffset);
157     const char* mySourceLimit = reinterpret_cast<const char*>(uSource.get() + sourceEnd);
158     UChar* cTarget = uTarget.get() + *targetOffset;
159     const UChar* cTargetLimit = uTarget.get() + targetEnd;
160     UErrorCode errorCode = U_ZERO_ERROR;
161     ucnv_toUnicode(cnv, &cTarget, cTargetLimit, &mySource, mySourceLimit, NULL, flush, &errorCode);
162     *sourceOffset = mySource - reinterpret_cast<const char*>(uSource.get()) - *sourceOffset;
163     *targetOffset = cTarget - uTarget.get() - *targetOffset;
164 
165     // Check how much more input is necessary to complete what's in the converter's internal buffer.
166     UErrorCode minorErrorCode = U_ZERO_ERROR;
167     jint pending = ucnv_toUCountPending(cnv, &minorErrorCode);
168     myData[3] = pending;
169 
170     // If there was an error, count the problematic bytes.
171     if (errorCode == U_ILLEGAL_CHAR_FOUND || errorCode == U_INVALID_CHAR_FOUND) {
172         int8_t len = 32;
173         char invalidChars[32] = {'\0'};
174         ucnv_getInvalidChars(cnv, invalidChars, &len, &minorErrorCode);
175         if (U_SUCCESS(minorErrorCode)) {
176             myData[2] = len;
177         }
178     }
179 
180     return errorCode;
181 }
182 
NativeConverter_resetByteToChar(JNIEnv *,jclass,jlong address)183 static void NativeConverter_resetByteToChar(JNIEnv*, jclass, jlong address) {
184     UConverter* cnv = toUConverter(address);
185     if (cnv) {
186         ucnv_resetToUnicode(cnv);
187     }
188 }
189 
NativeConverter_resetCharToByte(JNIEnv *,jclass,jlong address)190 static void NativeConverter_resetCharToByte(JNIEnv*, jclass, jlong address) {
191     UConverter* cnv = toUConverter(address);
192     if (cnv) {
193         ucnv_resetFromUnicode(cnv);
194     }
195 }
196 
NativeConverter_getMaxBytesPerChar(JNIEnv *,jclass,jlong address)197 static jint NativeConverter_getMaxBytesPerChar(JNIEnv*, jclass, jlong address) {
198     UConverter* cnv = toUConverter(address);
199     return (cnv != NULL) ? ucnv_getMaxCharSize(cnv) : -1;
200 }
201 
NativeConverter_getMinBytesPerChar(JNIEnv *,jclass,jlong address)202 static jint NativeConverter_getMinBytesPerChar(JNIEnv*, jclass, jlong address) {
203     UConverter* cnv = toUConverter(address);
204     return (cnv != NULL) ? ucnv_getMinCharSize(cnv) : -1;
205 }
206 
NativeConverter_getAveBytesPerChar(JNIEnv *,jclass,jlong address)207 static jfloat NativeConverter_getAveBytesPerChar(JNIEnv*, jclass, jlong address) {
208     UConverter* cnv = toUConverter(address);
209     return (cnv != NULL) ? ((ucnv_getMaxCharSize(cnv) + ucnv_getMinCharSize(cnv)) / 2.0) : -1;
210 }
211 
NativeConverter_flushByteToChar(JNIEnv * env,jclass,jlong address,jcharArray target,jint targetEnd,jintArray data)212 static jint NativeConverter_flushByteToChar(JNIEnv* env, jclass, jlong address,
213         jcharArray target, jint targetEnd, jintArray data) {
214     UConverter* cnv = toUConverter(address);
215     if (cnv == NULL) {
216         return U_ILLEGAL_ARGUMENT_ERROR;
217     }
218     ScopedCharArrayRW uTarget(env, target);
219     if (uTarget.get() == NULL) {
220         return U_ILLEGAL_ARGUMENT_ERROR;
221     }
222     ScopedIntArrayRW myData(env, data);
223     if (myData.get() == NULL) {
224         return U_ILLEGAL_ARGUMENT_ERROR;
225     }
226     char source = '\0';
227     jint* targetOffset = &myData[1];
228     const char* mySource = &source;
229     const char* mySourceLimit = &source;
230     UChar* cTarget = uTarget.get() + *targetOffset;
231     const UChar* cTargetLimit = uTarget.get() + targetEnd;
232     UErrorCode errorCode = U_ZERO_ERROR;
233     ucnv_toUnicode(cnv, &cTarget, cTargetLimit, &mySource, mySourceLimit, NULL, TRUE, &errorCode);
234     *targetOffset = cTarget - uTarget.get() - *targetOffset;
235     return errorCode;
236 }
237 
NativeConverter_flushCharToByte(JNIEnv * env,jclass,jlong address,jbyteArray target,jint targetEnd,jintArray data)238 static jint NativeConverter_flushCharToByte(JNIEnv* env, jclass, jlong address,
239         jbyteArray target, jint targetEnd, jintArray data) {
240     UConverter* cnv = toUConverter(address);
241     if (cnv == NULL) {
242         return U_ILLEGAL_ARGUMENT_ERROR;
243     }
244     ScopedByteArrayRW uTarget(env, target);
245     if (uTarget.get() == NULL) {
246         return U_ILLEGAL_ARGUMENT_ERROR;
247     }
248     ScopedIntArrayRW myData(env, data);
249     if (myData.get() == NULL) {
250         return U_ILLEGAL_ARGUMENT_ERROR;
251     }
252     jchar source = '\0';
253     jint* targetOffset = &myData[1];
254     const jchar* mySource = &source;
255     const UChar* mySourceLimit= &source;
256     char* cTarget = reinterpret_cast<char*>(uTarget.get() + *targetOffset);
257     const char* cTargetLimit = reinterpret_cast<char*>(uTarget.get() + targetEnd);
258     UErrorCode errorCode = U_ZERO_ERROR;
259     ucnv_fromUnicode(cnv, &cTarget, cTargetLimit, &mySource, mySourceLimit, NULL, TRUE, &errorCode);
260     *targetOffset = reinterpret_cast<jbyte*>(cTarget) - uTarget.get() - *targetOffset;
261     return errorCode;
262 }
263 
NativeConverter_canEncode(JNIEnv *,jclass,jlong address,jint codeUnit)264 static jboolean NativeConverter_canEncode(JNIEnv*, jclass, jlong address, jint codeUnit) {
265     UErrorCode errorCode = U_ZERO_ERROR;
266     UConverter* cnv = toUConverter(address);
267     if (cnv == NULL) {
268         return JNI_FALSE;
269     }
270 
271     UChar srcBuffer[3];
272     const UChar* src = &srcBuffer[0];
273     const UChar* srcLimit = (codeUnit < 0x10000) ? &src[1] : &src[2];
274 
275     char dstBuffer[5];
276     char* dst = &dstBuffer[0];
277     const char* dstLimit = &dstBuffer[4];
278 
279     int i = 0;
280     UTF_APPEND_CHAR(&srcBuffer[0], i, 2, codeUnit);
281 
282     ucnv_fromUnicode(cnv, &dst, dstLimit, &src, srcLimit, NULL, TRUE, &errorCode);
283     return U_SUCCESS(errorCode);
284 }
285 
286 /*
287  * If a charset listed in the IANA Charset Registry is supported by an implementation
288  * of the Java platform then its canonical name must be the name listed in the registry.
289  * Many charsets are given more than one name in the registry, in which case the registry
290  * identifies one of the names as MIME-preferred. If a charset has more than one registry
291  * name then its canonical name must be the MIME-preferred name and the other names in
292  * the registry must be valid aliases. If a supported charset is not listed in the IANA
293  * registry then its canonical name must begin with one of the strings "X-" or "x-".
294  */
getJavaCanonicalName(JNIEnv * env,const char * icuCanonicalName)295 static jstring getJavaCanonicalName(JNIEnv* env, const char* icuCanonicalName) {
296     UErrorCode status = U_ZERO_ERROR;
297 
298     // Check to see if this is a well-known MIME or IANA name.
299     const char* cName = NULL;
300     if ((cName = ucnv_getStandardName(icuCanonicalName, "MIME", &status)) != NULL) {
301         return env->NewStringUTF(cName);
302     } else if ((cName = ucnv_getStandardName(icuCanonicalName, "IANA", &status)) != NULL) {
303         return env->NewStringUTF(cName);
304     }
305 
306     // Check to see if an alias already exists with "x-" prefix, if yes then
307     // make that the canonical name.
308     int32_t aliasCount = ucnv_countAliases(icuCanonicalName, &status);
309     for (int i = 0; i < aliasCount; ++i) {
310         const char* name = ucnv_getAlias(icuCanonicalName, i, &status);
311         if (name != NULL && name[0] == 'x' && name[1] == '-') {
312             return env->NewStringUTF(name);
313         }
314     }
315 
316     // As a last resort, prepend "x-" to any alias and make that the canonical name.
317     status = U_ZERO_ERROR;
318     const char* name = ucnv_getStandardName(icuCanonicalName, "UTR22", &status);
319     if (name == NULL && strchr(icuCanonicalName, ',') != NULL) {
320         name = ucnv_getAlias(icuCanonicalName, 1, &status);
321     }
322     // If there is no UTR22 canonical name then just return the original name.
323     if (name == NULL) {
324         name = icuCanonicalName;
325     }
326     UniquePtr<char[]> result(new char[2 + strlen(name) + 1]);
327     strcpy(&result[0], "x-");
328     strcat(&result[0], name);
329     return env->NewStringUTF(&result[0]);
330 }
331 
NativeConverter_getAvailableCharsetNames(JNIEnv * env,jclass)332 static jobjectArray NativeConverter_getAvailableCharsetNames(JNIEnv* env, jclass) {
333     int32_t num = ucnv_countAvailable();
334     jobjectArray result = env->NewObjectArray(num, JniConstants::stringClass, NULL);
335     for (int i = 0; i < num; ++i) {
336         const char* name = ucnv_getAvailableName(i);
337         ScopedLocalRef<jstring> javaCanonicalName(env, getJavaCanonicalName(env, name));
338         env->SetObjectArrayElement(result, i, javaCanonicalName.get());
339     }
340     return result;
341 }
342 
getAliases(JNIEnv * env,const char * icuCanonicalName)343 static jobjectArray getAliases(JNIEnv* env, const char* icuCanonicalName) {
344     // Get an upper bound on the number of aliases...
345     const char* myEncName = icuCanonicalName;
346     UErrorCode error = U_ZERO_ERROR;
347     int32_t aliasCount = ucnv_countAliases(myEncName, &error);
348     if (aliasCount == 0 && myEncName[0] == 'x' && myEncName[1] == '-') {
349         myEncName = myEncName + 2;
350         aliasCount = ucnv_countAliases(myEncName, &error);
351     }
352     if (!U_SUCCESS(error)) {
353         return NULL;
354     }
355 
356     // Collect the aliases we want...
357     const char* aliasArray[aliasCount];
358     int actualAliasCount = 0;
359     for(int i = 0; i < aliasCount; ++i) {
360         const char* name = ucnv_getAlias(myEncName, (uint16_t) i, &error);
361         if (!U_SUCCESS(error)) {
362             return NULL;
363         }
364         // TODO: why do we ignore these ones?
365         if (strchr(name, '+') == 0 && strchr(name, ',') == 0) {
366             aliasArray[actualAliasCount++]= name;
367         }
368     }
369 
370     // Convert our C++ char*[] into a Java String[]...
371     jobjectArray result = env->NewObjectArray(actualAliasCount, JniConstants::stringClass, NULL);
372     for (int i = 0; i < actualAliasCount; ++i) {
373         ScopedLocalRef<jstring> alias(env, env->NewStringUTF(aliasArray[i]));
374         env->SetObjectArrayElement(result, i, alias.get());
375     }
376     return result;
377 }
378 
getICUCanonicalName(const char * name)379 static const char* getICUCanonicalName(const char* name) {
380     UErrorCode error = U_ZERO_ERROR;
381     const char* canonicalName = NULL;
382     if ((canonicalName = ucnv_getCanonicalName(name, "MIME", &error)) != NULL) {
383         return canonicalName;
384     } else if((canonicalName = ucnv_getCanonicalName(name, "IANA", &error)) != NULL) {
385         return canonicalName;
386     } else if((canonicalName = ucnv_getCanonicalName(name, "", &error)) != NULL) {
387         return canonicalName;
388     } else if((canonicalName =  ucnv_getAlias(name, 0, &error)) != NULL) {
389         /* we have some aliases in the form x-blah .. match those first */
390         return canonicalName;
391     } else if (strstr(name, "x-") == name) {
392         /* check if the converter can be opened with the name given */
393         error = U_ZERO_ERROR;
394         UniqueUConverter cnv(ucnv_open(name + 2, &error));
395         if (cnv.get() != NULL) {
396             return name + 2;
397         }
398     }
399     return NULL;
400 }
401 
CHARSET_ENCODER_CALLBACK(const void * rawContext,UConverterFromUnicodeArgs * args,const UChar * codeUnits,int32_t length,UChar32 codePoint,UConverterCallbackReason reason,UErrorCode * status)402 static void CHARSET_ENCODER_CALLBACK(const void* rawContext, UConverterFromUnicodeArgs* args,
403         const UChar* codeUnits, int32_t length, UChar32 codePoint, UConverterCallbackReason reason,
404         UErrorCode* status) {
405     if (!rawContext) {
406         return;
407     }
408     const EncoderCallbackContext* ctx = reinterpret_cast<const EncoderCallbackContext*>(rawContext);
409     switch(reason) {
410     case UCNV_UNASSIGNED:
411         ctx->onUnmappableInput(ctx, args, codeUnits, length, codePoint, reason, status);
412         return;
413     case UCNV_ILLEGAL:
414     case UCNV_IRREGULAR:
415         ctx->onMalformedInput(ctx, args, codeUnits, length, codePoint, reason, status);
416         return;
417     case UCNV_CLOSE:
418         delete ctx;
419         return;
420     default:
421         *status = U_ILLEGAL_ARGUMENT_ERROR;
422         return;
423     }
424 }
425 
encoderReplaceCallback(const void * rawContext,UConverterFromUnicodeArgs * fromArgs,const UChar *,int32_t,UChar32,UConverterCallbackReason,UErrorCode * err)426 static void encoderReplaceCallback(const void* rawContext,
427         UConverterFromUnicodeArgs* fromArgs, const UChar*, int32_t, UChar32,
428         UConverterCallbackReason, UErrorCode * err) {
429     if (rawContext == NULL) {
430         return;
431     }
432     const EncoderCallbackContext* context = reinterpret_cast<const EncoderCallbackContext*>(rawContext);
433     *err = U_ZERO_ERROR;
434     ucnv_cbFromUWriteBytes(fromArgs, context->subBytes, context->length, 0, err);
435 }
436 
getFromUCallback(int32_t mode)437 static UConverterFromUCallback getFromUCallback(int32_t mode) {
438     switch(mode) {
439     case NativeConverter_REPORT:
440         return UCNV_FROM_U_CALLBACK_STOP;
441     case NativeConverter_IGNORE:
442         return UCNV_FROM_U_CALLBACK_SKIP;
443     case NativeConverter_REPLACE:
444         return encoderReplaceCallback;
445     }
446     abort();
447 }
448 
NativeConverter_setCallbackEncode(JNIEnv * env,jclass,jlong address,jint onMalformedInput,jint onUnmappableInput,jbyteArray subBytes)449 static jint NativeConverter_setCallbackEncode(JNIEnv* env, jclass, jlong address,
450         jint onMalformedInput, jint onUnmappableInput, jbyteArray subBytes) {
451     UConverter* cnv = toUConverter(address);
452     if (!cnv) {
453         return U_ILLEGAL_ARGUMENT_ERROR;
454     }
455     UConverterFromUCallback fromUOldAction = NULL;
456     const void* fromUOldContext = NULL;
457     ucnv_getFromUCallBack(cnv, &fromUOldAction, const_cast<const void**>(&fromUOldContext));
458 
459     /* fromUOldContext can only be DecodeCallbackContext since
460      * the converter created is private data for the decoder
461      * and callbacks can only be set via this method!
462      */
463     EncoderCallbackContext* fromUNewContext=NULL;
464     UConverterFromUCallback fromUNewAction=NULL;
465     if (fromUOldContext == NULL) {
466         fromUNewContext = new EncoderCallbackContext;
467         fromUNewAction = CHARSET_ENCODER_CALLBACK;
468     } else {
469         fromUNewContext = const_cast<EncoderCallbackContext*>(
470                 reinterpret_cast<const EncoderCallbackContext*>(fromUOldContext));
471         fromUNewAction = fromUOldAction;
472         fromUOldAction = NULL;
473         fromUOldContext = NULL;
474     }
475     fromUNewContext->onMalformedInput = getFromUCallback(onMalformedInput);
476     fromUNewContext->onUnmappableInput = getFromUCallback(onUnmappableInput);
477     ScopedByteArrayRO sub(env, subBytes);
478     if (sub.get() == NULL) {
479         return U_ILLEGAL_ARGUMENT_ERROR;
480     }
481     fromUNewContext->length = sub.size();
482     memcpy(fromUNewContext->subBytes, sub.get(), sub.size());
483     UErrorCode errorCode = U_ZERO_ERROR;
484     ucnv_setFromUCallBack(cnv, fromUNewAction, fromUNewContext, &fromUOldAction, &fromUOldContext,
485             &errorCode);
486     return errorCode;
487 }
488 
decoderIgnoreCallback(const void *,UConverterToUnicodeArgs *,const char *,int32_t,UConverterCallbackReason,UErrorCode * err)489 static void decoderIgnoreCallback(const void*, UConverterToUnicodeArgs*, const char*, int32_t, UConverterCallbackReason, UErrorCode* err) {
490     // The icu4c UCNV_FROM_U_CALLBACK_SKIP callback requires that the context is NULL, which is
491     // never true for us.
492     *err = U_ZERO_ERROR;
493 }
494 
decoderReplaceCallback(const void * rawContext,UConverterToUnicodeArgs * toArgs,const char *,int32_t,UConverterCallbackReason,UErrorCode * err)495 static void decoderReplaceCallback(const void* rawContext,
496         UConverterToUnicodeArgs* toArgs, const char*, int32_t, UConverterCallbackReason,
497         UErrorCode* err) {
498     if (!rawContext) {
499         return;
500     }
501     const DecoderCallbackContext* context = reinterpret_cast<const DecoderCallbackContext*>(rawContext);
502     *err = U_ZERO_ERROR;
503     ucnv_cbToUWriteUChars(toArgs,context->subUChars, context->length, 0, err);
504 }
505 
getToUCallback(int32_t mode)506 static UConverterToUCallback getToUCallback(int32_t mode) {
507     switch (mode) {
508     case NativeConverter_IGNORE: return decoderIgnoreCallback;
509     case NativeConverter_REPLACE: return decoderReplaceCallback;
510     case NativeConverter_REPORT: return UCNV_TO_U_CALLBACK_STOP;
511     }
512     abort();
513 }
514 
CHARSET_DECODER_CALLBACK(const void * rawContext,UConverterToUnicodeArgs * args,const char * codeUnits,int32_t length,UConverterCallbackReason reason,UErrorCode * status)515 static void CHARSET_DECODER_CALLBACK(const void* rawContext, UConverterToUnicodeArgs* args,
516         const char* codeUnits, int32_t length,
517         UConverterCallbackReason reason, UErrorCode* status) {
518     if (!rawContext) {
519         return;
520     }
521     const DecoderCallbackContext* ctx = reinterpret_cast<const DecoderCallbackContext*>(rawContext);
522     switch(reason) {
523     case UCNV_UNASSIGNED:
524         ctx->onUnmappableInput(ctx, args, codeUnits, length, reason, status);
525         return;
526     case UCNV_ILLEGAL:
527     case UCNV_IRREGULAR:
528         ctx->onMalformedInput(ctx, args, codeUnits, length, reason, status);
529         return;
530     case UCNV_CLOSE:
531         delete ctx;
532         return;
533     default:
534         *status = U_ILLEGAL_ARGUMENT_ERROR;
535         return;
536     }
537 }
538 
NativeConverter_setCallbackDecode(JNIEnv * env,jclass,jlong address,jint onMalformedInput,jint onUnmappableInput,jcharArray subChars)539 static jint NativeConverter_setCallbackDecode(JNIEnv* env, jclass, jlong address,
540         jint onMalformedInput, jint onUnmappableInput, jcharArray subChars) {
541     UConverter* cnv = toUConverter(address);
542     if (cnv == NULL) {
543         return U_ILLEGAL_ARGUMENT_ERROR;
544     }
545 
546     UConverterToUCallback toUOldAction;
547     const void* toUOldContext;
548     ucnv_getToUCallBack(cnv, &toUOldAction, &toUOldContext);
549 
550     /* toUOldContext can only be DecodeCallbackContext since
551      * the converter created is private data for the decoder
552      * and callbacks can only be set via this method!
553      */
554     DecoderCallbackContext* toUNewContext = NULL;
555     UConverterToUCallback toUNewAction = NULL;
556     if (toUOldContext == NULL) {
557         toUNewContext = new DecoderCallbackContext;
558         toUNewAction = CHARSET_DECODER_CALLBACK;
559     } else {
560         toUNewContext = const_cast<DecoderCallbackContext*>(
561                 reinterpret_cast<const DecoderCallbackContext*>(toUOldContext));
562         toUNewAction = toUOldAction;
563         toUOldAction = NULL;
564         toUOldContext = NULL;
565     }
566     toUNewContext->onMalformedInput = getToUCallback(onMalformedInput);
567     toUNewContext->onUnmappableInput = getToUCallback(onUnmappableInput);
568     ScopedCharArrayRO sub(env, subChars);
569     if (sub.get() == NULL) {
570         return U_ILLEGAL_ARGUMENT_ERROR;
571     }
572     toUNewContext->length = sub.size();
573     u_strncpy(toUNewContext->subUChars, sub.get(), sub.size());
574     UErrorCode errorCode = U_ZERO_ERROR;
575     ucnv_setToUCallBack(cnv, toUNewAction, toUNewContext, &toUOldAction, &toUOldContext,
576             &errorCode);
577     return errorCode;
578 }
579 
NativeConverter_getAveCharsPerByte(JNIEnv * env,jclass,jlong handle)580 static jfloat NativeConverter_getAveCharsPerByte(JNIEnv* env, jclass, jlong handle) {
581     return (1 / (jfloat) NativeConverter_getMaxBytesPerChar(env, NULL, handle));
582 }
583 
NativeConverter_getSubstitutionBytes(JNIEnv * env,jclass,jlong address)584 static jbyteArray NativeConverter_getSubstitutionBytes(JNIEnv* env, jclass, jlong address) {
585     UConverter* cnv = toUConverter(address);
586     if (cnv == NULL) {
587         return NULL;
588     }
589     UErrorCode status = U_ZERO_ERROR;
590     char subBytes[10];
591     int8_t len = sizeof(subBytes);
592     ucnv_getSubstChars(cnv, subBytes, &len, &status);
593     if (!U_SUCCESS(status)) {
594         return env->NewByteArray(0);
595     }
596     jbyteArray result = env->NewByteArray(len);
597     if (result == NULL) {
598         return NULL;
599     }
600     env->SetByteArrayRegion(result, 0, len, reinterpret_cast<jbyte*>(subBytes));
601     return result;
602 }
603 
NativeConverter_contains(JNIEnv * env,jclass,jstring name1,jstring name2)604 static jboolean NativeConverter_contains(JNIEnv* env, jclass, jstring name1, jstring name2) {
605     ScopedUtfChars name1Chars(env, name1);
606     if (name1Chars.c_str() == NULL) {
607         return JNI_FALSE;
608     }
609     ScopedUtfChars name2Chars(env, name2);
610     if (name2Chars.c_str() == NULL) {
611         return JNI_FALSE;
612     }
613 
614     UErrorCode errorCode = U_ZERO_ERROR;
615     UniqueUConverter converter1(ucnv_open(name1Chars.c_str(), &errorCode));
616     UnicodeSet set1;
617     ucnv_getUnicodeSet(converter1.get(), set1.toUSet(), UCNV_ROUNDTRIP_SET, &errorCode);
618 
619     UniqueUConverter converter2(ucnv_open(name2Chars.c_str(), &errorCode));
620     UnicodeSet set2;
621     ucnv_getUnicodeSet(converter2.get(), set2.toUSet(), UCNV_ROUNDTRIP_SET, &errorCode);
622 
623     return U_SUCCESS(errorCode) && set1.containsAll(set2);
624 }
625 
NativeConverter_charsetForName(JNIEnv * env,jclass,jstring charsetName)626 static jobject NativeConverter_charsetForName(JNIEnv* env, jclass, jstring charsetName) {
627     ScopedUtfChars charsetNameChars(env, charsetName);
628     if (charsetNameChars.c_str() == NULL) {
629         return NULL;
630     }
631     // Get ICU's canonical name for this charset.
632     const char* icuCanonicalName = getICUCanonicalName(charsetNameChars.c_str());
633     if (icuCanonicalName == NULL) {
634         return NULL;
635     }
636     // Get Java's canonical name for this charset.
637     jstring javaCanonicalName = getJavaCanonicalName(env, icuCanonicalName);
638     if (env->ExceptionOccurred()) {
639         return NULL;
640     }
641 
642     // Check that this charset is supported.
643     // ICU doesn't offer any "isSupported", so we just open and immediately close.
644     // We ignore the UErrorCode because ucnv_open returning NULL is all the information we need.
645     UErrorCode dummy = U_ZERO_ERROR;
646     UniqueUConverter cnv(ucnv_open(icuCanonicalName, &dummy));
647     if (cnv.get() == NULL) {
648         return NULL;
649     }
650     cnv.reset();
651 
652     // Get the aliases for this charset.
653     jobjectArray aliases = getAliases(env, icuCanonicalName);
654     if (env->ExceptionOccurred()) {
655         return NULL;
656     }
657 
658     // Construct the CharsetICU object.
659     jmethodID charsetConstructor = env->GetMethodID(JniConstants::charsetICUClass, "<init>",
660             "(Ljava/lang/String;Ljava/lang/String;[Ljava/lang/String;)V");
661     if (env->ExceptionOccurred()) {
662         return NULL;
663     }
664     return env->NewObject(JniConstants::charsetICUClass, charsetConstructor,
665             javaCanonicalName, env->NewStringUTF(icuCanonicalName), aliases);
666 }
667 
668 static JNINativeMethod gMethods[] = {
669     NATIVE_METHOD(NativeConverter, canEncode, "(JI)Z"),
670     NATIVE_METHOD(NativeConverter, charsetForName, "(Ljava/lang/String;)Ljava/nio/charset/Charset;"),
671     NATIVE_METHOD(NativeConverter, closeConverter, "(J)V"),
672     NATIVE_METHOD(NativeConverter, contains, "(Ljava/lang/String;Ljava/lang/String;)Z"),
673     NATIVE_METHOD(NativeConverter, decode, "(J[BI[CI[IZ)I"),
674     NATIVE_METHOD(NativeConverter, encode, "(J[CI[BI[IZ)I"),
675     NATIVE_METHOD(NativeConverter, flushByteToChar, "(J[CI[I)I"),
676     NATIVE_METHOD(NativeConverter, flushCharToByte, "(J[BI[I)I"),
677     NATIVE_METHOD(NativeConverter, getAvailableCharsetNames, "()[Ljava/lang/String;"),
678     NATIVE_METHOD(NativeConverter, getAveBytesPerChar, "(J)F"),
679     NATIVE_METHOD(NativeConverter, getAveCharsPerByte, "(J)F"),
680     NATIVE_METHOD(NativeConverter, getMaxBytesPerChar, "(J)I"),
681     NATIVE_METHOD(NativeConverter, getMinBytesPerChar, "(J)I"),
682     NATIVE_METHOD(NativeConverter, getSubstitutionBytes, "(J)[B"),
683     NATIVE_METHOD(NativeConverter, openConverter, "(Ljava/lang/String;)J"),
684     NATIVE_METHOD(NativeConverter, resetByteToChar, "(J)V"),
685     NATIVE_METHOD(NativeConverter, resetCharToByte, "(J)V"),
686     NATIVE_METHOD(NativeConverter, setCallbackDecode, "(JII[C)I"),
687     NATIVE_METHOD(NativeConverter, setCallbackEncode, "(JII[B)I"),
688 };
register_com_ibm_icu4jni_converters_NativeConverter(JNIEnv * env)689 int register_com_ibm_icu4jni_converters_NativeConverter(JNIEnv* env) {
690     return jniRegisterNativeMethods(env, "com/ibm/icu4jni/charset/NativeConverter",
691                 gMethods, NELEM(gMethods));
692 }
693