• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2 *******************************************************************************
3 * Copyright (C) 1996-2006, International Business Machines Corporation and    *
4 * others. All Rights Reserved.                                                *
5 *******************************************************************************
6 *
7 *
8 *******************************************************************************
9 */
10 /*
11  * (C) Copyright IBM Corp. 2000 - All Rights Reserved
12  *  A JNI wrapper to ICU native converter Interface
13  * @author: Ram Viswanadha
14  */
15 
16 #define LOG_TAG "NativeConverter"
17 
18 #include "JNIHelp.h"
19 #include "JniConstants.h"
20 #include "JniException.h"
21 #include "ScopedLocalRef.h"
22 #include "ScopedPrimitiveArray.h"
23 #include "ScopedStringChars.h"
24 #include "ScopedUtfChars.h"
25 #include "UniquePtr.h"
26 #include "cutils/log.h"
27 #include "toStringArray.h"
28 #include "unicode/ucnv.h"
29 #include "unicode/ucnv_cb.h"
30 #include "unicode/uniset.h"
31 #include "unicode/ustring.h"
32 #include "unicode/utypes.h"
33 
34 #include <vector>
35 
36 #include <stdlib.h>
37 #include <string.h>
38 
39 #define NativeConverter_REPORT 0
40 #define NativeConverter_IGNORE 1
41 #define NativeConverter_REPLACE 2
42 
43 #define MAX_REPLACEMENT_LENGTH 32 // equivalent to UCNV_ERROR_BUFFER_LENGTH
44 
45 struct DecoderCallbackContext {
46     UChar replacementChars[MAX_REPLACEMENT_LENGTH];
47     size_t replacementCharCount;
48     UConverterToUCallback onUnmappableInput;
49     UConverterToUCallback onMalformedInput;
50 };
51 
52 struct EncoderCallbackContext {
53     char replacementBytes[MAX_REPLACEMENT_LENGTH];
54     size_t replacementByteCount;
55     UConverterFromUCallback onUnmappableInput;
56     UConverterFromUCallback onMalformedInput;
57 };
58 
59 struct UConverterDeleter {
operator ()UConverterDeleter60     void operator()(UConverter* p) const {
61         ucnv_close(p);
62     }
63 };
64 typedef UniquePtr<UConverter, UConverterDeleter> UniqueUConverter;
65 
toUConverter(jlong address)66 static UConverter* toUConverter(jlong address) {
67     return reinterpret_cast<UConverter*>(static_cast<uintptr_t>(address));
68 }
69 
NativeConverter_openConverter(JNIEnv * env,jclass,jstring converterName)70 static jlong NativeConverter_openConverter(JNIEnv* env, jclass, jstring converterName) {
71     ScopedUtfChars converterNameChars(env, converterName);
72     if (converterNameChars.c_str() == NULL) {
73         return 0;
74     }
75     UErrorCode status = U_ZERO_ERROR;
76     UConverter* cnv = ucnv_open(converterNameChars.c_str(), &status);
77     maybeThrowIcuException(env, status);
78     return reinterpret_cast<uintptr_t>(cnv);
79 }
80 
NativeConverter_closeConverter(JNIEnv *,jclass,jlong address)81 static void NativeConverter_closeConverter(JNIEnv*, jclass, jlong address) {
82     ucnv_close(toUConverter(address));
83 }
84 
NativeConverter_encode(JNIEnv * env,jclass,jlong address,jcharArray source,jint sourceEnd,jbyteArray target,jint targetEnd,jintArray data,jboolean flush)85 static jint NativeConverter_encode(JNIEnv* env, jclass, jlong address,
86         jcharArray source, jint sourceEnd, jbyteArray target, jint targetEnd,
87         jintArray data, jboolean flush) {
88 
89     UConverter* cnv = toUConverter(address);
90     if (cnv == NULL) {
91         return U_ILLEGAL_ARGUMENT_ERROR;
92     }
93     ScopedCharArrayRO uSource(env, source);
94     if (uSource.get() == NULL) {
95         return U_ILLEGAL_ARGUMENT_ERROR;
96     }
97     ScopedByteArrayRW uTarget(env, target);
98     if (uTarget.get() == NULL) {
99         return U_ILLEGAL_ARGUMENT_ERROR;
100     }
101     ScopedIntArrayRW myData(env, data);
102     if (myData.get() == NULL) {
103         return U_ILLEGAL_ARGUMENT_ERROR;
104     }
105 
106     // Do the conversion.
107     jint* sourceOffset = &myData[0];
108     jint* targetOffset = &myData[1];
109     const jchar* mySource = uSource.get() + *sourceOffset;
110     const UChar* mySourceLimit= uSource.get() + sourceEnd;
111     char* cTarget = reinterpret_cast<char*>(uTarget.get() + *targetOffset);
112     const char* cTargetLimit = reinterpret_cast<const char*>(uTarget.get() + targetEnd);
113     UErrorCode errorCode = U_ZERO_ERROR;
114     ucnv_fromUnicode(cnv , &cTarget, cTargetLimit, &mySource, mySourceLimit, NULL, (UBool) flush, &errorCode);
115     *sourceOffset = (mySource - uSource.get()) - *sourceOffset;
116     *targetOffset = (reinterpret_cast<jbyte*>(cTarget) - uTarget.get()) - *targetOffset;
117 
118     // If there was an error, count the problematic characters.
119     if (errorCode == U_ILLEGAL_CHAR_FOUND || errorCode == U_INVALID_CHAR_FOUND) {
120         int8_t invalidUCharCount = 32;
121         UChar invalidUChars[32];
122         UErrorCode minorErrorCode = U_ZERO_ERROR;
123         ucnv_getInvalidUChars(cnv, invalidUChars, &invalidUCharCount, &minorErrorCode);
124         if (U_SUCCESS(minorErrorCode)) {
125             myData[2] = invalidUCharCount;
126         }
127     }
128     return errorCode;
129 }
130 
NativeConverter_decode(JNIEnv * env,jclass,jlong address,jbyteArray source,jint sourceEnd,jcharArray target,jint targetEnd,jintArray data,jboolean flush)131 static jint NativeConverter_decode(JNIEnv* env, jclass, jlong address,
132         jbyteArray source, jint sourceEnd, jcharArray target, jint targetEnd,
133         jintArray data, jboolean flush) {
134 
135     UConverter* cnv = toUConverter(address);
136     if (cnv == NULL) {
137         return U_ILLEGAL_ARGUMENT_ERROR;
138     }
139     ScopedByteArrayRO uSource(env, source);
140     if (uSource.get() == NULL) {
141         return U_ILLEGAL_ARGUMENT_ERROR;
142     }
143     ScopedCharArrayRW uTarget(env, target);
144     if (uTarget.get() == NULL) {
145         return U_ILLEGAL_ARGUMENT_ERROR;
146     }
147     ScopedIntArrayRW myData(env, data);
148     if (myData.get() == NULL) {
149         return U_ILLEGAL_ARGUMENT_ERROR;
150     }
151 
152     // Do the conversion.
153     jint* sourceOffset = &myData[0];
154     jint* targetOffset = &myData[1];
155     const char* mySource = reinterpret_cast<const char*>(uSource.get() + *sourceOffset);
156     const char* mySourceLimit = reinterpret_cast<const char*>(uSource.get() + sourceEnd);
157     UChar* cTarget = uTarget.get() + *targetOffset;
158     const UChar* cTargetLimit = uTarget.get() + targetEnd;
159     UErrorCode errorCode = U_ZERO_ERROR;
160     ucnv_toUnicode(cnv, &cTarget, cTargetLimit, &mySource, mySourceLimit, NULL, flush, &errorCode);
161     *sourceOffset = mySource - reinterpret_cast<const char*>(uSource.get()) - *sourceOffset;
162     *targetOffset = cTarget - uTarget.get() - *targetOffset;
163 
164     // If there was an error, count the problematic bytes.
165     if (errorCode == U_ILLEGAL_CHAR_FOUND || errorCode == U_INVALID_CHAR_FOUND) {
166         int8_t invalidByteCount = 32;
167         char invalidBytes[32] = {'\0'};
168         UErrorCode minorErrorCode = U_ZERO_ERROR;
169         ucnv_getInvalidChars(cnv, invalidBytes, &invalidByteCount, &minorErrorCode);
170         if (U_SUCCESS(minorErrorCode)) {
171             myData[2] = invalidByteCount;
172         }
173     }
174 
175     return errorCode;
176 }
177 
NativeConverter_resetByteToChar(JNIEnv *,jclass,jlong address)178 static void NativeConverter_resetByteToChar(JNIEnv*, jclass, jlong address) {
179     UConverter* cnv = toUConverter(address);
180     if (cnv) {
181         ucnv_resetToUnicode(cnv);
182     }
183 }
184 
NativeConverter_resetCharToByte(JNIEnv *,jclass,jlong address)185 static void NativeConverter_resetCharToByte(JNIEnv*, jclass, jlong address) {
186     UConverter* cnv = toUConverter(address);
187     if (cnv) {
188         ucnv_resetFromUnicode(cnv);
189     }
190 }
191 
NativeConverter_getMaxBytesPerChar(JNIEnv *,jclass,jlong address)192 static jint NativeConverter_getMaxBytesPerChar(JNIEnv*, jclass, jlong address) {
193     UConverter* cnv = toUConverter(address);
194     return (cnv != NULL) ? ucnv_getMaxCharSize(cnv) : -1;
195 }
196 
NativeConverter_getMinBytesPerChar(JNIEnv *,jclass,jlong address)197 static jint NativeConverter_getMinBytesPerChar(JNIEnv*, jclass, jlong address) {
198     UConverter* cnv = toUConverter(address);
199     return (cnv != NULL) ? ucnv_getMinCharSize(cnv) : -1;
200 }
201 
NativeConverter_getAveBytesPerChar(JNIEnv *,jclass,jlong address)202 static jfloat NativeConverter_getAveBytesPerChar(JNIEnv*, jclass, jlong address) {
203     UConverter* cnv = toUConverter(address);
204     return (cnv != NULL) ? ((ucnv_getMaxCharSize(cnv) + ucnv_getMinCharSize(cnv)) / 2.0) : -1;
205 }
206 
NativeConverter_canEncode(JNIEnv *,jclass,jlong address,jint codeUnit)207 static jboolean NativeConverter_canEncode(JNIEnv*, jclass, jlong address, jint codeUnit) {
208     UErrorCode errorCode = U_ZERO_ERROR;
209     UConverter* cnv = toUConverter(address);
210     if (cnv == NULL) {
211         return JNI_FALSE;
212     }
213 
214     UChar srcBuffer[3];
215     const UChar* src = &srcBuffer[0];
216     const UChar* srcLimit = (codeUnit < 0x10000) ? &src[1] : &src[2];
217 
218     char dstBuffer[5];
219     char* dst = &dstBuffer[0];
220     const char* dstLimit = &dstBuffer[4];
221 
222     int i = 0;
223     UTF_APPEND_CHAR(&srcBuffer[0], i, 2, codeUnit);
224 
225     ucnv_fromUnicode(cnv, &dst, dstLimit, &src, srcLimit, NULL, TRUE, &errorCode);
226     return U_SUCCESS(errorCode);
227 }
228 
229 /*
230  * If a charset listed in the IANA Charset Registry is supported by an implementation
231  * of the Java platform then its canonical name must be the name listed in the registry.
232  * Many charsets are given more than one name in the registry, in which case the registry
233  * identifies one of the names as MIME-preferred. If a charset has more than one registry
234  * name then its canonical name must be the MIME-preferred name and the other names in
235  * the registry must be valid aliases. If a supported charset is not listed in the IANA
236  * registry then its canonical name must begin with one of the strings "X-" or "x-".
237  */
getJavaCanonicalName(JNIEnv * env,const char * icuCanonicalName)238 static jstring getJavaCanonicalName(JNIEnv* env, const char* icuCanonicalName) {
239     UErrorCode status = U_ZERO_ERROR;
240 
241     // Check to see if this is a well-known MIME or IANA name.
242     const char* cName = NULL;
243     if ((cName = ucnv_getStandardName(icuCanonicalName, "MIME", &status)) != NULL) {
244         return env->NewStringUTF(cName);
245     } else if ((cName = ucnv_getStandardName(icuCanonicalName, "IANA", &status)) != NULL) {
246         return env->NewStringUTF(cName);
247     }
248 
249     // Check to see if an alias already exists with "x-" prefix, if yes then
250     // make that the canonical name.
251     int32_t aliasCount = ucnv_countAliases(icuCanonicalName, &status);
252     for (int i = 0; i < aliasCount; ++i) {
253         const char* name = ucnv_getAlias(icuCanonicalName, i, &status);
254         if (name != NULL && name[0] == 'x' && name[1] == '-') {
255             return env->NewStringUTF(name);
256         }
257     }
258 
259     // As a last resort, prepend "x-" to any alias and make that the canonical name.
260     status = U_ZERO_ERROR;
261     const char* name = ucnv_getStandardName(icuCanonicalName, "UTR22", &status);
262     if (name == NULL && strchr(icuCanonicalName, ',') != NULL) {
263         name = ucnv_getAlias(icuCanonicalName, 1, &status);
264     }
265     // If there is no UTR22 canonical name then just return the original name.
266     if (name == NULL) {
267         name = icuCanonicalName;
268     }
269     UniquePtr<char[]> result(new char[2 + strlen(name) + 1]);
270     strcpy(&result[0], "x-");
271     strcat(&result[0], name);
272     return env->NewStringUTF(&result[0]);
273 }
274 
NativeConverter_getAvailableCharsetNames(JNIEnv * env,jclass)275 static jobjectArray NativeConverter_getAvailableCharsetNames(JNIEnv* env, jclass) {
276     int32_t num = ucnv_countAvailable();
277     jobjectArray result = env->NewObjectArray(num, JniConstants::stringClass, NULL);
278     if (result == NULL) {
279         return NULL;
280     }
281     for (int i = 0; i < num; ++i) {
282         const char* name = ucnv_getAvailableName(i);
283         ScopedLocalRef<jstring> javaCanonicalName(env, getJavaCanonicalName(env, name));
284         if (javaCanonicalName.get() == NULL) {
285             return NULL;
286         }
287         env->SetObjectArrayElement(result, i, javaCanonicalName.get());
288         if (env->ExceptionCheck()) {
289             return NULL;
290         }
291     }
292     return result;
293 }
294 
getAliases(JNIEnv * env,const char * icuCanonicalName)295 static jobjectArray getAliases(JNIEnv* env, const char* icuCanonicalName) {
296     // Get an upper bound on the number of aliases...
297     const char* myEncName = icuCanonicalName;
298     UErrorCode error = U_ZERO_ERROR;
299     size_t aliasCount = ucnv_countAliases(myEncName, &error);
300     if (aliasCount == 0 && myEncName[0] == 'x' && myEncName[1] == '-') {
301         myEncName = myEncName + 2;
302         aliasCount = ucnv_countAliases(myEncName, &error);
303     }
304     if (!U_SUCCESS(error)) {
305         return NULL;
306     }
307 
308     // Collect the aliases we want...
309     std::vector<std::string> aliases;
310     for (size_t i = 0; i < aliasCount; ++i) {
311         const char* name = ucnv_getAlias(myEncName, i, &error);
312         if (!U_SUCCESS(error)) {
313             return NULL;
314         }
315         // TODO: why do we ignore these ones?
316         if (strchr(name, '+') == 0 && strchr(name, ',') == 0) {
317             aliases.push_back(name);
318         }
319     }
320     return toStringArray(env, aliases);
321 }
322 
getICUCanonicalName(const char * name)323 static const char* getICUCanonicalName(const char* name) {
324     UErrorCode error = U_ZERO_ERROR;
325     const char* canonicalName = NULL;
326     if ((canonicalName = ucnv_getCanonicalName(name, "MIME", &error)) != NULL) {
327         return canonicalName;
328     } else if((canonicalName = ucnv_getCanonicalName(name, "IANA", &error)) != NULL) {
329         return canonicalName;
330     } else if((canonicalName = ucnv_getCanonicalName(name, "", &error)) != NULL) {
331         return canonicalName;
332     } else if((canonicalName =  ucnv_getAlias(name, 0, &error)) != NULL) {
333         /* we have some aliases in the form x-blah .. match those first */
334         return canonicalName;
335     } else if (strstr(name, "x-") == name) {
336         /* check if the converter can be opened with the name given */
337         error = U_ZERO_ERROR;
338         UniqueUConverter cnv(ucnv_open(name + 2, &error));
339         if (cnv.get() != NULL) {
340             return name + 2;
341         }
342     }
343     return NULL;
344 }
345 
CHARSET_ENCODER_CALLBACK(const void * rawContext,UConverterFromUnicodeArgs * args,const UChar * codeUnits,int32_t length,UChar32 codePoint,UConverterCallbackReason reason,UErrorCode * status)346 static void CHARSET_ENCODER_CALLBACK(const void* rawContext, UConverterFromUnicodeArgs* args,
347         const UChar* codeUnits, int32_t length, UChar32 codePoint, UConverterCallbackReason reason,
348         UErrorCode* status) {
349     if (!rawContext) {
350         return;
351     }
352     const EncoderCallbackContext* ctx = reinterpret_cast<const EncoderCallbackContext*>(rawContext);
353     switch(reason) {
354     case UCNV_UNASSIGNED:
355         ctx->onUnmappableInput(ctx, args, codeUnits, length, codePoint, reason, status);
356         return;
357     case UCNV_ILLEGAL:
358     case UCNV_IRREGULAR:
359         ctx->onMalformedInput(ctx, args, codeUnits, length, codePoint, reason, status);
360         return;
361     case UCNV_CLOSE:
362         delete ctx;
363         return;
364     default:
365         *status = U_ILLEGAL_ARGUMENT_ERROR;
366         return;
367     }
368 }
369 
encoderReplaceCallback(const void * rawContext,UConverterFromUnicodeArgs * fromArgs,const UChar *,int32_t,UChar32,UConverterCallbackReason,UErrorCode * err)370 static void encoderReplaceCallback(const void* rawContext,
371         UConverterFromUnicodeArgs* fromArgs, const UChar*, int32_t, UChar32,
372         UConverterCallbackReason, UErrorCode * err) {
373     if (rawContext == NULL) {
374         return;
375     }
376     const EncoderCallbackContext* context = reinterpret_cast<const EncoderCallbackContext*>(rawContext);
377     *err = U_ZERO_ERROR;
378     ucnv_cbFromUWriteBytes(fromArgs, context->replacementBytes, context->replacementByteCount, 0, err);
379 }
380 
getFromUCallback(int32_t mode)381 static UConverterFromUCallback getFromUCallback(int32_t mode) {
382     switch(mode) {
383     case NativeConverter_IGNORE: return UCNV_FROM_U_CALLBACK_SKIP;
384     case NativeConverter_REPLACE: return encoderReplaceCallback;
385     case NativeConverter_REPORT: return UCNV_FROM_U_CALLBACK_STOP;
386     }
387     abort();
388 }
389 
NativeConverter_setCallbackEncode(JNIEnv * env,jclass,jlong address,jint onMalformedInput,jint onUnmappableInput,jbyteArray javaReplacement)390 static jint NativeConverter_setCallbackEncode(JNIEnv* env, jclass, jlong address,
391         jint onMalformedInput, jint onUnmappableInput, jbyteArray javaReplacement) {
392     UConverter* cnv = toUConverter(address);
393     if (!cnv) {
394         return U_ILLEGAL_ARGUMENT_ERROR;
395     }
396 
397     UConverterFromUCallback oldCallback = NULL;
398     const void* oldCallbackContext = NULL;
399     ucnv_getFromUCallBack(cnv, &oldCallback, const_cast<const void**>(&oldCallbackContext));
400 
401     EncoderCallbackContext* callbackContext = const_cast<EncoderCallbackContext*>(
402             reinterpret_cast<const EncoderCallbackContext*>(oldCallbackContext));
403     if (callbackContext == NULL) {
404         callbackContext = new EncoderCallbackContext;
405     }
406 
407     callbackContext->onMalformedInput = getFromUCallback(onMalformedInput);
408     callbackContext->onUnmappableInput = getFromUCallback(onUnmappableInput);
409 
410     ScopedByteArrayRO replacementBytes(env, javaReplacement);
411     if (replacementBytes.get() == NULL) {
412         return U_ILLEGAL_ARGUMENT_ERROR;
413     }
414     memcpy(callbackContext->replacementBytes, replacementBytes.get(), replacementBytes.size());
415     callbackContext->replacementByteCount = replacementBytes.size();
416 
417     UErrorCode errorCode = U_ZERO_ERROR;
418     ucnv_setFromUCallBack(cnv, CHARSET_ENCODER_CALLBACK, callbackContext, NULL, NULL, &errorCode);
419     return errorCode;
420 }
421 
decoderIgnoreCallback(const void *,UConverterToUnicodeArgs *,const char *,int32_t,UConverterCallbackReason,UErrorCode * err)422 static void decoderIgnoreCallback(const void*, UConverterToUnicodeArgs*, const char*, int32_t, UConverterCallbackReason, UErrorCode* err) {
423     // The icu4c UCNV_FROM_U_CALLBACK_SKIP callback requires that the context is NULL, which is
424     // never true for us.
425     *err = U_ZERO_ERROR;
426 }
427 
decoderReplaceCallback(const void * rawContext,UConverterToUnicodeArgs * toArgs,const char *,int32_t,UConverterCallbackReason,UErrorCode * err)428 static void decoderReplaceCallback(const void* rawContext,
429         UConverterToUnicodeArgs* toArgs, const char*, int32_t, UConverterCallbackReason,
430         UErrorCode* err) {
431     if (!rawContext) {
432         return;
433     }
434     const DecoderCallbackContext* context = reinterpret_cast<const DecoderCallbackContext*>(rawContext);
435     *err = U_ZERO_ERROR;
436     ucnv_cbToUWriteUChars(toArgs,context->replacementChars, context->replacementCharCount, 0, err);
437 }
438 
getToUCallback(int32_t mode)439 static UConverterToUCallback getToUCallback(int32_t mode) {
440     switch (mode) {
441     case NativeConverter_IGNORE: return decoderIgnoreCallback;
442     case NativeConverter_REPLACE: return decoderReplaceCallback;
443     case NativeConverter_REPORT: return UCNV_TO_U_CALLBACK_STOP;
444     }
445     abort();
446 }
447 
CHARSET_DECODER_CALLBACK(const void * rawContext,UConverterToUnicodeArgs * args,const char * codeUnits,int32_t length,UConverterCallbackReason reason,UErrorCode * status)448 static void CHARSET_DECODER_CALLBACK(const void* rawContext, UConverterToUnicodeArgs* args,
449         const char* codeUnits, int32_t length,
450         UConverterCallbackReason reason, UErrorCode* status) {
451     if (!rawContext) {
452         return;
453     }
454     const DecoderCallbackContext* ctx = reinterpret_cast<const DecoderCallbackContext*>(rawContext);
455     switch(reason) {
456     case UCNV_UNASSIGNED:
457         ctx->onUnmappableInput(ctx, args, codeUnits, length, reason, status);
458         return;
459     case UCNV_ILLEGAL:
460     case UCNV_IRREGULAR:
461         ctx->onMalformedInput(ctx, args, codeUnits, length, reason, status);
462         return;
463     case UCNV_CLOSE:
464         delete ctx;
465         return;
466     default:
467         *status = U_ILLEGAL_ARGUMENT_ERROR;
468         return;
469     }
470 }
471 
NativeConverter_setCallbackDecode(JNIEnv * env,jclass,jlong address,jint onMalformedInput,jint onUnmappableInput,jstring javaReplacement)472 static jint NativeConverter_setCallbackDecode(JNIEnv* env, jclass, jlong address,
473         jint onMalformedInput, jint onUnmappableInput, jstring javaReplacement) {
474     UConverter* cnv = toUConverter(address);
475     if (cnv == NULL) {
476         return U_ILLEGAL_ARGUMENT_ERROR;
477     }
478 
479     UConverterToUCallback oldCallback;
480     const void* oldCallbackContext;
481     ucnv_getToUCallBack(cnv, &oldCallback, &oldCallbackContext);
482 
483     DecoderCallbackContext* callbackContext = const_cast<DecoderCallbackContext*>(
484             reinterpret_cast<const DecoderCallbackContext*>(oldCallbackContext));
485     if (callbackContext == NULL) {
486         callbackContext = new DecoderCallbackContext;
487     }
488 
489     callbackContext->onMalformedInput = getToUCallback(onMalformedInput);
490     callbackContext->onUnmappableInput = getToUCallback(onUnmappableInput);
491 
492     ScopedStringChars replacement(env, javaReplacement);
493     if (replacement.get() == NULL) {
494         return U_ILLEGAL_ARGUMENT_ERROR;
495     }
496     u_strncpy(callbackContext->replacementChars, replacement.get(), replacement.size());
497     callbackContext->replacementCharCount = replacement.size();
498 
499     UErrorCode errorCode = U_ZERO_ERROR;
500     ucnv_setToUCallBack(cnv, CHARSET_DECODER_CALLBACK, callbackContext, NULL, NULL, &errorCode);
501     return errorCode;
502 }
503 
NativeConverter_getAveCharsPerByte(JNIEnv * env,jclass,jlong handle)504 static jfloat NativeConverter_getAveCharsPerByte(JNIEnv* env, jclass, jlong handle) {
505     return (1 / (jfloat) NativeConverter_getMaxBytesPerChar(env, NULL, handle));
506 }
507 
NativeConverter_getSubstitutionBytes(JNIEnv * env,jclass,jlong address)508 static jbyteArray NativeConverter_getSubstitutionBytes(JNIEnv* env, jclass, jlong address) {
509     UConverter* cnv = toUConverter(address);
510     if (cnv == NULL) {
511         return NULL;
512     }
513     UErrorCode status = U_ZERO_ERROR;
514     char replacementBytes[MAX_REPLACEMENT_LENGTH];
515     int8_t len = sizeof(replacementBytes);
516     ucnv_getSubstChars(cnv, replacementBytes, &len, &status);
517     if (!U_SUCCESS(status)) {
518         return env->NewByteArray(0);
519     }
520     jbyteArray result = env->NewByteArray(len);
521     if (result == NULL) {
522         return NULL;
523     }
524     env->SetByteArrayRegion(result, 0, len, reinterpret_cast<jbyte*>(replacementBytes));
525     return result;
526 }
527 
NativeConverter_contains(JNIEnv * env,jclass,jstring name1,jstring name2)528 static jboolean NativeConverter_contains(JNIEnv* env, jclass, jstring name1, jstring name2) {
529     ScopedUtfChars name1Chars(env, name1);
530     if (name1Chars.c_str() == NULL) {
531         return JNI_FALSE;
532     }
533     ScopedUtfChars name2Chars(env, name2);
534     if (name2Chars.c_str() == NULL) {
535         return JNI_FALSE;
536     }
537 
538     UErrorCode errorCode = U_ZERO_ERROR;
539     UniqueUConverter converter1(ucnv_open(name1Chars.c_str(), &errorCode));
540     UnicodeSet set1;
541     ucnv_getUnicodeSet(converter1.get(), set1.toUSet(), UCNV_ROUNDTRIP_SET, &errorCode);
542 
543     UniqueUConverter converter2(ucnv_open(name2Chars.c_str(), &errorCode));
544     UnicodeSet set2;
545     ucnv_getUnicodeSet(converter2.get(), set2.toUSet(), UCNV_ROUNDTRIP_SET, &errorCode);
546 
547     return U_SUCCESS(errorCode) && set1.containsAll(set2);
548 }
549 
NativeConverter_charsetForName(JNIEnv * env,jclass,jstring charsetName)550 static jobject NativeConverter_charsetForName(JNIEnv* env, jclass, jstring charsetName) {
551     ScopedUtfChars charsetNameChars(env, charsetName);
552     if (charsetNameChars.c_str() == NULL) {
553         return NULL;
554     }
555     // Get ICU's canonical name for this charset.
556     const char* icuCanonicalName = getICUCanonicalName(charsetNameChars.c_str());
557     if (icuCanonicalName == NULL) {
558         return NULL;
559     }
560     // Get Java's canonical name for this charset.
561     jstring javaCanonicalName = getJavaCanonicalName(env, icuCanonicalName);
562     if (env->ExceptionOccurred()) {
563         return NULL;
564     }
565 
566     // Check that this charset is supported.
567     // ICU doesn't offer any "isSupported", so we just open and immediately close.
568     // We ignore the UErrorCode because ucnv_open returning NULL is all the information we need.
569     UErrorCode dummy = U_ZERO_ERROR;
570     UniqueUConverter cnv(ucnv_open(icuCanonicalName, &dummy));
571     if (cnv.get() == NULL) {
572         return NULL;
573     }
574     cnv.reset();
575 
576     // Get the aliases for this charset.
577     jobjectArray aliases = getAliases(env, icuCanonicalName);
578     if (env->ExceptionOccurred()) {
579         return NULL;
580     }
581 
582     // Construct the CharsetICU object.
583     jmethodID charsetConstructor = env->GetMethodID(JniConstants::charsetICUClass, "<init>",
584             "(Ljava/lang/String;Ljava/lang/String;[Ljava/lang/String;)V");
585     if (env->ExceptionOccurred()) {
586         return NULL;
587     }
588     return env->NewObject(JniConstants::charsetICUClass, charsetConstructor,
589             javaCanonicalName, env->NewStringUTF(icuCanonicalName), aliases);
590 }
591 
592 static JNINativeMethod gMethods[] = {
593     NATIVE_METHOD(NativeConverter, canEncode, "(JI)Z"),
594     NATIVE_METHOD(NativeConverter, charsetForName, "(Ljava/lang/String;)Ljava/nio/charset/Charset;"),
595     NATIVE_METHOD(NativeConverter, closeConverter, "(J)V"),
596     NATIVE_METHOD(NativeConverter, contains, "(Ljava/lang/String;Ljava/lang/String;)Z"),
597     NATIVE_METHOD(NativeConverter, decode, "(J[BI[CI[IZ)I"),
598     NATIVE_METHOD(NativeConverter, encode, "(J[CI[BI[IZ)I"),
599     NATIVE_METHOD(NativeConverter, getAvailableCharsetNames, "()[Ljava/lang/String;"),
600     NATIVE_METHOD(NativeConverter, getAveBytesPerChar, "(J)F"),
601     NATIVE_METHOD(NativeConverter, getAveCharsPerByte, "(J)F"),
602     NATIVE_METHOD(NativeConverter, getMaxBytesPerChar, "(J)I"),
603     NATIVE_METHOD(NativeConverter, getMinBytesPerChar, "(J)I"),
604     NATIVE_METHOD(NativeConverter, getSubstitutionBytes, "(J)[B"),
605     NATIVE_METHOD(NativeConverter, openConverter, "(Ljava/lang/String;)J"),
606     NATIVE_METHOD(NativeConverter, resetByteToChar, "(J)V"),
607     NATIVE_METHOD(NativeConverter, resetCharToByte, "(J)V"),
608     NATIVE_METHOD(NativeConverter, setCallbackDecode, "(JIILjava/lang/String;)I"),
609     NATIVE_METHOD(NativeConverter, setCallbackEncode, "(JII[B)I"),
610 };
register_libcore_icu_NativeConverter(JNIEnv * env)611 void register_libcore_icu_NativeConverter(JNIEnv* env) {
612     jniRegisterNativeMethods(env, "libcore/icu/NativeConverter", gMethods, NELEM(gMethods));
613 }
614