• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2 *******************************************************************************
3 * Copyright (C) 1996-2006, International Business Machines Corporation and    *
4 * others. All Rights Reserved.                                                *
5 *******************************************************************************
6 *
7 *
8 *******************************************************************************
9 */
10 /*
11  * (C) Copyright IBM Corp. 2000 - All Rights Reserved
12  *  A JNI wrapper to ICU native converter Interface
13  * @author: Ram Viswanadha
14  */
15 
16 #define LOG_TAG "NativeConverter"
17 
18 #include <stdlib.h>
19 #include <string.h>
20 
21 #include <memory>
22 #include <vector>
23 
24 #include <android/log.h>
25 
26 #include "IcuUtilities.h"
27 #include "JNIHelp.h"
28 #include "JniConstants.h"
29 #include "JniException.h"
30 #include "ScopedLocalRef.h"
31 #include "ScopedPrimitiveArray.h"
32 #include "ScopedStringChars.h"
33 #include "ScopedUtfChars.h"
34 #include "toStringArray.h"
35 #include "unicode/ucnv.h"
36 #include "unicode/ucnv_cb.h"
37 #include "unicode/uniset.h"
38 #include "unicode/ustring.h"
39 #include "unicode/utypes.h"
40 
41 #define NativeConverter_REPORT 0
42 #define NativeConverter_IGNORE 1
43 #define NativeConverter_REPLACE 2
44 
45 #define MAX_REPLACEMENT_LENGTH 32 // equivalent to UCNV_ERROR_BUFFER_LENGTH
46 
47 struct DecoderCallbackContext {
48     UChar replacementChars[MAX_REPLACEMENT_LENGTH];
49     size_t replacementCharCount;
50     UConverterToUCallback onUnmappableInput;
51     UConverterToUCallback onMalformedInput;
52 };
53 
54 struct EncoderCallbackContext {
55     char replacementBytes[MAX_REPLACEMENT_LENGTH];
56     size_t replacementByteCount;
57     UConverterFromUCallback onUnmappableInput;
58     UConverterFromUCallback onMalformedInput;
59 };
60 
toUConverter(jlong address)61 static UConverter* toUConverter(jlong address) {
62     return reinterpret_cast<UConverter*>(static_cast<uintptr_t>(address));
63 }
64 
collectStandardNames(JNIEnv * env,const char * canonicalName,const char * standard,std::vector<std::string> & result)65 static bool collectStandardNames(JNIEnv* env, const char* canonicalName, const char* standard,
66                                  std::vector<std::string>& result) {
67   UErrorCode status = U_ZERO_ERROR;
68   icu::UStringEnumeration e(ucnv_openStandardNames(canonicalName, standard, &status));
69   if (maybeThrowIcuException(env, "ucnv_openStandardNames", status)) {
70     return false;
71   }
72 
73   int32_t count = e.count(status);
74   if (maybeThrowIcuException(env, "StringEnumeration::count", status)) {
75     return false;
76   }
77 
78   for (int32_t i = 0; i < count; ++i) {
79     const icu::UnicodeString* string = e.snext(status);
80     if (maybeThrowIcuException(env, "StringEnumeration::snext", status)) {
81       return false;
82     }
83     std::string s;
84     string->toUTF8String(s);
85     if (s.find_first_of("+,") == std::string::npos) {
86       result.push_back(s);
87     }
88   }
89 
90   return true;
91 }
92 
getICUCanonicalName(const char * name)93 static const char* getICUCanonicalName(const char* name) {
94   UErrorCode error = U_ZERO_ERROR;
95   const char* canonicalName = NULL;
96   if ((canonicalName = ucnv_getCanonicalName(name, "MIME", &error)) != NULL) {
97     return canonicalName;
98   } else if ((canonicalName = ucnv_getCanonicalName(name, "IANA", &error)) != NULL) {
99     return canonicalName;
100   } else if ((canonicalName = ucnv_getCanonicalName(name, "", &error)) != NULL) {
101     return canonicalName;
102   } else if ((canonicalName = ucnv_getAlias(name, 0, &error)) != NULL) {
103     // We have some aliases in the form x-blah .. match those first.
104     return canonicalName;
105   } else if (strstr(name, "x-") == name) {
106     // Check if the converter can be opened with the name given.
107     error = U_ZERO_ERROR;
108     icu::LocalUConverterPointer cnv(ucnv_open(name + 2, &error));
109     if (U_SUCCESS(error)) {
110       return name + 2;
111     }
112   }
113   return NULL;
114 }
115 
116 // If a charset listed in the IANA Charset Registry is supported by an implementation
117 // of the Java platform then its canonical name must be the name listed in the registry.
118 // Many charsets are given more than one name in the registry, in which case the registry
119 // identifies one of the names as MIME-preferred. If a charset has more than one registry
120 // name then its canonical name must be the MIME-preferred name and the other names in
121 // the registry must be valid aliases. If a supported charset is not listed in the IANA
122 // registry then its canonical name must begin with one of the strings "X-" or "x-".
getJavaCanonicalName(JNIEnv * env,const char * icuCanonicalName)123 static jstring getJavaCanonicalName(JNIEnv* env, const char* icuCanonicalName) {
124   UErrorCode status = U_ZERO_ERROR;
125 
126   // Check to see if this is a well-known MIME or IANA name.
127   const char* cName = NULL;
128   if ((cName = ucnv_getStandardName(icuCanonicalName, "MIME", &status)) != NULL) {
129     return env->NewStringUTF(cName);
130   } else if ((cName = ucnv_getStandardName(icuCanonicalName, "IANA", &status)) != NULL) {
131     return env->NewStringUTF(cName);
132   }
133 
134   // Check to see if an alias already exists with "x-" prefix, if yes then
135   // make that the canonical name.
136   int32_t aliasCount = ucnv_countAliases(icuCanonicalName, &status);
137   for (int i = 0; i < aliasCount; ++i) {
138     const char* name = ucnv_getAlias(icuCanonicalName, i, &status);
139     if (name != NULL && name[0] == 'x' && name[1] == '-') {
140       return env->NewStringUTF(name);
141     }
142   }
143 
144   // As a last resort, prepend "x-" to any alias and make that the canonical name.
145   status = U_ZERO_ERROR;
146   const char* name = ucnv_getStandardName(icuCanonicalName, "UTR22", &status);
147   if (name == NULL && strchr(icuCanonicalName, ',') != NULL) {
148     name = ucnv_getAlias(icuCanonicalName, 1, &status);
149   }
150   // If there is no UTR22 canonical name then just return the original name.
151   if (name == NULL) {
152     name = icuCanonicalName;
153   }
154   std::unique_ptr<char[]> result(new char[2 + strlen(name) + 1]);
155   strcpy(&result[0], "x-");
156   strcat(&result[0], name);
157   return env->NewStringUTF(&result[0]);
158 }
159 
NativeConverter_openConverter(JNIEnv * env,jclass,jstring converterName)160 static jlong NativeConverter_openConverter(JNIEnv* env, jclass, jstring converterName) {
161     ScopedUtfChars converterNameChars(env, converterName);
162     if (converterNameChars.c_str() == NULL) {
163         return 0;
164     }
165     UErrorCode status = U_ZERO_ERROR;
166     UConverter* cnv = ucnv_open(converterNameChars.c_str(), &status);
167     maybeThrowIcuException(env, "ucnv_open", status);
168     return reinterpret_cast<uintptr_t>(cnv);
169 }
170 
NativeConverter_closeConverter(JNIEnv *,jclass,jlong address)171 static void NativeConverter_closeConverter(JNIEnv*, jclass, jlong address) {
172     ucnv_close(toUConverter(address));
173 }
174 
shouldCodecThrow(jboolean flush,UErrorCode error)175 static bool shouldCodecThrow(jboolean flush, UErrorCode error) {
176     if (flush) {
177         return (error != U_BUFFER_OVERFLOW_ERROR && error != U_TRUNCATED_CHAR_FOUND);
178     } else {
179         return (error != U_BUFFER_OVERFLOW_ERROR && error != U_INVALID_CHAR_FOUND && error != U_ILLEGAL_CHAR_FOUND);
180     }
181 }
182 
NativeConverter_encode(JNIEnv * env,jclass,jlong address,jcharArray source,jint sourceEnd,jbyteArray target,jint targetEnd,jintArray data,jboolean flush)183 static jint NativeConverter_encode(JNIEnv* env, jclass, jlong address,
184         jcharArray source, jint sourceEnd, jbyteArray target, jint targetEnd,
185         jintArray data, jboolean flush) {
186 
187     UConverter* cnv = toUConverter(address);
188     if (cnv == NULL) {
189         maybeThrowIcuException(env, "toUConverter", U_ILLEGAL_ARGUMENT_ERROR);
190         return U_ILLEGAL_ARGUMENT_ERROR;
191     }
192     ScopedCharArrayRO uSource(env, source);
193     if (uSource.get() == NULL) {
194         maybeThrowIcuException(env, "uSource", U_ILLEGAL_ARGUMENT_ERROR);
195         return U_ILLEGAL_ARGUMENT_ERROR;
196     }
197     ScopedByteArrayRW uTarget(env, target);
198     if (uTarget.get() == NULL) {
199         maybeThrowIcuException(env, "uTarget", U_ILLEGAL_ARGUMENT_ERROR);
200         return U_ILLEGAL_ARGUMENT_ERROR;
201     }
202     ScopedIntArrayRW myData(env, data);
203     if (myData.get() == NULL) {
204         maybeThrowIcuException(env, "myData", U_ILLEGAL_ARGUMENT_ERROR);
205         return U_ILLEGAL_ARGUMENT_ERROR;
206     }
207 
208     // Do the conversion.
209     jint* sourceOffset = &myData[0];
210     jint* targetOffset = &myData[1];
211     const jchar* mySource = uSource.get() + *sourceOffset;
212     const UChar* mySourceLimit= uSource.get() + sourceEnd;
213     char* cTarget = reinterpret_cast<char*>(uTarget.get() + *targetOffset);
214     const char* cTargetLimit = reinterpret_cast<const char*>(uTarget.get() + targetEnd);
215     UErrorCode errorCode = U_ZERO_ERROR;
216     ucnv_fromUnicode(cnv , &cTarget, cTargetLimit, &mySource, mySourceLimit, NULL, (UBool) flush, &errorCode);
217     *sourceOffset = (mySource - uSource.get()) - *sourceOffset;
218     *targetOffset = (reinterpret_cast<jbyte*>(cTarget) - uTarget.get());
219 
220     // If there was an error, count the problematic characters.
221     if (errorCode == U_ILLEGAL_CHAR_FOUND || errorCode == U_INVALID_CHAR_FOUND ||
222         errorCode == U_TRUNCATED_CHAR_FOUND) {
223         int8_t invalidUCharCount = 32;
224         UChar invalidUChars[32];
225         UErrorCode minorErrorCode = U_ZERO_ERROR;
226         ucnv_getInvalidUChars(cnv, invalidUChars, &invalidUCharCount, &minorErrorCode);
227         if (U_SUCCESS(minorErrorCode)) {
228             myData[2] = invalidUCharCount;
229         }
230     }
231 
232     // Managed code handles some cases; throw all other errors.
233     if (shouldCodecThrow(flush, errorCode)) {
234         maybeThrowIcuException(env, "ucnv_fromUnicode", errorCode);
235     }
236     return errorCode;
237 }
238 
NativeConverter_decode(JNIEnv * env,jclass,jlong address,jbyteArray source,jint sourceEnd,jcharArray target,jint targetEnd,jintArray data,jboolean flush)239 static jint NativeConverter_decode(JNIEnv* env, jclass, jlong address,
240         jbyteArray source, jint sourceEnd, jcharArray target, jint targetEnd,
241         jintArray data, jboolean flush) {
242 
243     UConverter* cnv = toUConverter(address);
244     if (cnv == NULL) {
245         maybeThrowIcuException(env, "toUConverter", U_ILLEGAL_ARGUMENT_ERROR);
246         return U_ILLEGAL_ARGUMENT_ERROR;
247     }
248     ScopedByteArrayRO uSource(env, source);
249     if (uSource.get() == NULL) {
250         maybeThrowIcuException(env, "uSource", U_ILLEGAL_ARGUMENT_ERROR);
251         return U_ILLEGAL_ARGUMENT_ERROR;
252     }
253     ScopedCharArrayRW uTarget(env, target);
254     if (uTarget.get() == NULL) {
255         maybeThrowIcuException(env, "uTarget", U_ILLEGAL_ARGUMENT_ERROR);
256         return U_ILLEGAL_ARGUMENT_ERROR;
257     }
258     ScopedIntArrayRW myData(env, data);
259     if (myData.get() == NULL) {
260         maybeThrowIcuException(env, "myData", U_ILLEGAL_ARGUMENT_ERROR);
261         return U_ILLEGAL_ARGUMENT_ERROR;
262     }
263 
264     // Do the conversion.
265     jint* sourceOffset = &myData[0];
266     jint* targetOffset = &myData[1];
267     const char* mySource = reinterpret_cast<const char*>(uSource.get() + *sourceOffset);
268     const char* mySourceLimit = reinterpret_cast<const char*>(uSource.get() + sourceEnd);
269     UChar* cTarget = uTarget.get() + *targetOffset;
270     const UChar* cTargetLimit = uTarget.get() + targetEnd;
271     UErrorCode errorCode = U_ZERO_ERROR;
272     ucnv_toUnicode(cnv, &cTarget, cTargetLimit, &mySource, mySourceLimit, NULL, flush, &errorCode);
273     *sourceOffset = mySource - reinterpret_cast<const char*>(uSource.get()) - *sourceOffset;
274     *targetOffset = cTarget - uTarget.get() - *targetOffset;
275 
276     // If there was an error, count the problematic bytes.
277     if (errorCode == U_ILLEGAL_CHAR_FOUND || errorCode == U_INVALID_CHAR_FOUND ||
278         errorCode == U_TRUNCATED_CHAR_FOUND) {
279         int8_t invalidByteCount = 32;
280         char invalidBytes[32] = {'\0'};
281         UErrorCode minorErrorCode = U_ZERO_ERROR;
282         ucnv_getInvalidChars(cnv, invalidBytes, &invalidByteCount, &minorErrorCode);
283         if (U_SUCCESS(minorErrorCode)) {
284             myData[2] = invalidByteCount;
285         }
286     }
287 
288     // Managed code handles some cases; throw all other errors.
289     if (shouldCodecThrow(flush, errorCode)) {
290         maybeThrowIcuException(env, "ucnv_toUnicode", errorCode);
291     }
292     return errorCode;
293 }
294 
NativeConverter_resetByteToChar(JNIEnv *,jclass,jlong address)295 static void NativeConverter_resetByteToChar(JNIEnv*, jclass, jlong address) {
296     UConverter* cnv = toUConverter(address);
297     if (cnv) {
298         ucnv_resetToUnicode(cnv);
299     }
300 }
301 
NativeConverter_resetCharToByte(JNIEnv *,jclass,jlong address)302 static void NativeConverter_resetCharToByte(JNIEnv*, jclass, jlong address) {
303     UConverter* cnv = toUConverter(address);
304     if (cnv) {
305         ucnv_resetFromUnicode(cnv);
306     }
307 }
308 
NativeConverter_getMaxBytesPerChar(JNIEnv *,jclass,jlong address)309 static jint NativeConverter_getMaxBytesPerChar(JNIEnv*, jclass, jlong address) {
310     UConverter* cnv = toUConverter(address);
311     return (cnv != NULL) ? ucnv_getMaxCharSize(cnv) : -1;
312 }
313 
NativeConverter_getMinBytesPerChar(JNIEnv *,jclass,jlong address)314 static jint NativeConverter_getMinBytesPerChar(JNIEnv*, jclass, jlong address) {
315     UConverter* cnv = toUConverter(address);
316     return (cnv != NULL) ? ucnv_getMinCharSize(cnv) : -1;
317 }
318 
NativeConverter_getAveBytesPerChar(JNIEnv *,jclass,jlong address)319 static jfloat NativeConverter_getAveBytesPerChar(JNIEnv*, jclass, jlong address) {
320     UConverter* cnv = toUConverter(address);
321     return (cnv != NULL) ? ((ucnv_getMaxCharSize(cnv) + ucnv_getMinCharSize(cnv)) / 2.0) : -1;
322 }
323 
NativeConverter_getAvailableCharsetNames(JNIEnv * env,jclass)324 static jobjectArray NativeConverter_getAvailableCharsetNames(JNIEnv* env, jclass) {
325     int32_t num = ucnv_countAvailable();
326     jobjectArray result = env->NewObjectArray(num, JniConstants::stringClass, NULL);
327     if (result == NULL) {
328         return NULL;
329     }
330     for (int i = 0; i < num; ++i) {
331         const char* name = ucnv_getAvailableName(i);
332         ScopedLocalRef<jstring> javaCanonicalName(env, getJavaCanonicalName(env, name));
333         if (javaCanonicalName.get() == NULL) {
334             return NULL;
335         }
336         env->SetObjectArrayElement(result, i, javaCanonicalName.get());
337         if (env->ExceptionCheck()) {
338             return NULL;
339         }
340     }
341     return result;
342 }
343 
CHARSET_ENCODER_CALLBACK(const void * rawContext,UConverterFromUnicodeArgs * args,const UChar * codeUnits,int32_t length,UChar32 codePoint,UConverterCallbackReason reason,UErrorCode * status)344 static void CHARSET_ENCODER_CALLBACK(const void* rawContext, UConverterFromUnicodeArgs* args,
345         const UChar* codeUnits, int32_t length, UChar32 codePoint, UConverterCallbackReason reason,
346         UErrorCode* status) {
347     if (!rawContext) {
348         return;
349     }
350     const EncoderCallbackContext* ctx = reinterpret_cast<const EncoderCallbackContext*>(rawContext);
351     switch(reason) {
352     case UCNV_UNASSIGNED:
353         ctx->onUnmappableInput(ctx, args, codeUnits, length, codePoint, reason, status);
354         return;
355     case UCNV_ILLEGAL:
356     case UCNV_IRREGULAR:
357         ctx->onMalformedInput(ctx, args, codeUnits, length, codePoint, reason, status);
358         return;
359     case UCNV_CLOSE:
360         delete ctx;
361         return;
362     default:
363         *status = U_ILLEGAL_ARGUMENT_ERROR;
364         return;
365     }
366 }
367 
encoderReplaceCallback(const void * rawContext,UConverterFromUnicodeArgs * fromArgs,const UChar *,int32_t,UChar32,UConverterCallbackReason,UErrorCode * err)368 static void encoderReplaceCallback(const void* rawContext,
369         UConverterFromUnicodeArgs* fromArgs, const UChar*, int32_t, UChar32,
370         UConverterCallbackReason, UErrorCode * err) {
371     if (rawContext == NULL) {
372         return;
373     }
374     const EncoderCallbackContext* context = reinterpret_cast<const EncoderCallbackContext*>(rawContext);
375     *err = U_ZERO_ERROR;
376     ucnv_cbFromUWriteBytes(fromArgs, context->replacementBytes, context->replacementByteCount, 0, err);
377 }
378 
getFromUCallback(int32_t mode)379 static UConverterFromUCallback getFromUCallback(int32_t mode) {
380     switch(mode) {
381     case NativeConverter_IGNORE: return UCNV_FROM_U_CALLBACK_SKIP;
382     case NativeConverter_REPLACE: return encoderReplaceCallback;
383     case NativeConverter_REPORT: return UCNV_FROM_U_CALLBACK_STOP;
384     }
385     abort();
386 }
387 
NativeConverter_setCallbackEncode(JNIEnv * env,jclass,jlong address,jint onMalformedInput,jint onUnmappableInput,jbyteArray javaReplacement)388 static void NativeConverter_setCallbackEncode(JNIEnv* env, jclass, jlong address,
389         jint onMalformedInput, jint onUnmappableInput, jbyteArray javaReplacement) {
390     UConverter* cnv = toUConverter(address);
391     if (cnv == NULL) {
392         maybeThrowIcuException(env, "toUConverter", U_ILLEGAL_ARGUMENT_ERROR);
393         return;
394     }
395 
396     UConverterFromUCallback oldCallback = NULL;
397     const void* oldCallbackContext = NULL;
398     ucnv_getFromUCallBack(cnv, &oldCallback, const_cast<const void**>(&oldCallbackContext));
399 
400     EncoderCallbackContext* callbackContext = const_cast<EncoderCallbackContext*>(
401             reinterpret_cast<const EncoderCallbackContext*>(oldCallbackContext));
402     if (callbackContext == NULL) {
403         callbackContext = new EncoderCallbackContext;
404     }
405 
406     callbackContext->onMalformedInput = getFromUCallback(onMalformedInput);
407     callbackContext->onUnmappableInput = getFromUCallback(onUnmappableInput);
408 
409     ScopedByteArrayRO replacementBytes(env, javaReplacement);
410     if (replacementBytes.get() == NULL) {
411         maybeThrowIcuException(env, "replacementBytes", U_ILLEGAL_ARGUMENT_ERROR);
412         return;
413     }
414     memcpy(callbackContext->replacementBytes, replacementBytes.get(), replacementBytes.size());
415     callbackContext->replacementByteCount = replacementBytes.size();
416 
417     UErrorCode errorCode = U_ZERO_ERROR;
418     ucnv_setFromUCallBack(cnv, CHARSET_ENCODER_CALLBACK, callbackContext, NULL, NULL, &errorCode);
419     maybeThrowIcuException(env, "ucnv_setFromUCallBack", errorCode);
420 }
421 
decoderIgnoreCallback(const void *,UConverterToUnicodeArgs *,const char *,int32_t,UConverterCallbackReason,UErrorCode * err)422 static void decoderIgnoreCallback(const void*, UConverterToUnicodeArgs*, const char*, int32_t, UConverterCallbackReason, UErrorCode* err) {
423     // The icu4c UCNV_FROM_U_CALLBACK_SKIP callback requires that the context is NULL, which is
424     // never true for us.
425     *err = U_ZERO_ERROR;
426 }
427 
decoderReplaceCallback(const void * rawContext,UConverterToUnicodeArgs * toArgs,const char *,int32_t,UConverterCallbackReason,UErrorCode * err)428 static void decoderReplaceCallback(const void* rawContext,
429         UConverterToUnicodeArgs* toArgs, const char*, int32_t, UConverterCallbackReason,
430         UErrorCode* err) {
431     if (!rawContext) {
432         return;
433     }
434     const DecoderCallbackContext* context = reinterpret_cast<const DecoderCallbackContext*>(rawContext);
435     *err = U_ZERO_ERROR;
436     ucnv_cbToUWriteUChars(toArgs,context->replacementChars, context->replacementCharCount, 0, err);
437 }
438 
getToUCallback(int32_t mode)439 static UConverterToUCallback getToUCallback(int32_t mode) {
440     switch (mode) {
441     case NativeConverter_IGNORE: return decoderIgnoreCallback;
442     case NativeConverter_REPLACE: return decoderReplaceCallback;
443     case NativeConverter_REPORT: return UCNV_TO_U_CALLBACK_STOP;
444     }
445     abort();
446 }
447 
CHARSET_DECODER_CALLBACK(const void * rawContext,UConverterToUnicodeArgs * args,const char * codeUnits,int32_t length,UConverterCallbackReason reason,UErrorCode * status)448 static void CHARSET_DECODER_CALLBACK(const void* rawContext, UConverterToUnicodeArgs* args,
449         const char* codeUnits, int32_t length,
450         UConverterCallbackReason reason, UErrorCode* status) {
451     if (!rawContext) {
452         return;
453     }
454     const DecoderCallbackContext* ctx = reinterpret_cast<const DecoderCallbackContext*>(rawContext);
455     switch(reason) {
456     case UCNV_UNASSIGNED:
457         ctx->onUnmappableInput(ctx, args, codeUnits, length, reason, status);
458         return;
459     case UCNV_ILLEGAL:
460     case UCNV_IRREGULAR:
461         ctx->onMalformedInput(ctx, args, codeUnits, length, reason, status);
462         return;
463     case UCNV_CLOSE:
464         delete ctx;
465         return;
466     default:
467         *status = U_ILLEGAL_ARGUMENT_ERROR;
468         return;
469     }
470 }
471 
NativeConverter_setCallbackDecode(JNIEnv * env,jclass,jlong address,jint onMalformedInput,jint onUnmappableInput,jstring javaReplacement)472 static void NativeConverter_setCallbackDecode(JNIEnv* env, jclass, jlong address,
473         jint onMalformedInput, jint onUnmappableInput, jstring javaReplacement) {
474     UConverter* cnv = toUConverter(address);
475     if (cnv == NULL) {
476         maybeThrowIcuException(env, "toConverter", U_ILLEGAL_ARGUMENT_ERROR);
477         return;
478     }
479 
480     UConverterToUCallback oldCallback;
481     const void* oldCallbackContext;
482     ucnv_getToUCallBack(cnv, &oldCallback, &oldCallbackContext);
483 
484     DecoderCallbackContext* callbackContext = const_cast<DecoderCallbackContext*>(
485             reinterpret_cast<const DecoderCallbackContext*>(oldCallbackContext));
486     if (callbackContext == NULL) {
487         callbackContext = new DecoderCallbackContext;
488     }
489 
490     callbackContext->onMalformedInput = getToUCallback(onMalformedInput);
491     callbackContext->onUnmappableInput = getToUCallback(onUnmappableInput);
492 
493     ScopedStringChars replacement(env, javaReplacement);
494     if (replacement.get() == NULL) {
495         maybeThrowIcuException(env, "replacement", U_ILLEGAL_ARGUMENT_ERROR);
496         return;
497     }
498     u_strncpy(callbackContext->replacementChars, replacement.get(), replacement.size());
499     callbackContext->replacementCharCount = replacement.size();
500 
501     UErrorCode errorCode = U_ZERO_ERROR;
502     ucnv_setToUCallBack(cnv, CHARSET_DECODER_CALLBACK, callbackContext, NULL, NULL, &errorCode);
503     maybeThrowIcuException(env, "ucnv_setToUCallBack", errorCode);
504 }
505 
NativeConverter_getAveCharsPerByte(JNIEnv * env,jclass,jlong handle)506 static jfloat NativeConverter_getAveCharsPerByte(JNIEnv* env, jclass, jlong handle) {
507     return (1 / (jfloat) NativeConverter_getMaxBytesPerChar(env, NULL, handle));
508 }
509 
NativeConverter_getSubstitutionBytes(JNIEnv * env,jclass,jlong address)510 static jbyteArray NativeConverter_getSubstitutionBytes(JNIEnv* env, jclass, jlong address) {
511     UConverter* cnv = toUConverter(address);
512     if (cnv == NULL) {
513         return NULL;
514     }
515     UErrorCode status = U_ZERO_ERROR;
516     char replacementBytes[MAX_REPLACEMENT_LENGTH];
517     int8_t len = sizeof(replacementBytes);
518     ucnv_getSubstChars(cnv, replacementBytes, &len, &status);
519     if (!U_SUCCESS(status)) {
520         return env->NewByteArray(0);
521     }
522     jbyteArray result = env->NewByteArray(len);
523     if (result == NULL) {
524         return NULL;
525     }
526     env->SetByteArrayRegion(result, 0, len, reinterpret_cast<jbyte*>(replacementBytes));
527     return result;
528 }
529 
NativeConverter_contains(JNIEnv * env,jclass,jstring name1,jstring name2)530 static jboolean NativeConverter_contains(JNIEnv* env, jclass, jstring name1, jstring name2) {
531     ScopedUtfChars name1Chars(env, name1);
532     if (name1Chars.c_str() == NULL) {
533         return JNI_FALSE;
534     }
535     ScopedUtfChars name2Chars(env, name2);
536     if (name2Chars.c_str() == NULL) {
537         return JNI_FALSE;
538     }
539 
540     UErrorCode errorCode = U_ZERO_ERROR;
541     icu::LocalUConverterPointer converter1(ucnv_open(name1Chars.c_str(), &errorCode));
542     icu::UnicodeSet set1;
543     ucnv_getUnicodeSet(&*converter1, set1.toUSet(), UCNV_ROUNDTRIP_SET, &errorCode);
544 
545     icu::LocalUConverterPointer converter2(ucnv_open(name2Chars.c_str(), &errorCode));
546     icu::UnicodeSet set2;
547     ucnv_getUnicodeSet(&*converter2, set2.toUSet(), UCNV_ROUNDTRIP_SET, &errorCode);
548 
549     return U_SUCCESS(errorCode) && set1.containsAll(set2);
550 }
551 
NativeConverter_charsetForName(JNIEnv * env,jclass,jstring charsetName)552 static jobject NativeConverter_charsetForName(JNIEnv* env, jclass, jstring charsetName) {
553     ScopedUtfChars charsetNameChars(env, charsetName);
554     if (charsetNameChars.c_str() == NULL) {
555         return NULL;
556     }
557 
558     // Get ICU's canonical name for this charset.
559     const char* icuCanonicalName = getICUCanonicalName(charsetNameChars.c_str());
560     if (icuCanonicalName == NULL) {
561         return NULL;
562     }
563 
564     // Get Java's canonical name for this charset.
565     jstring javaCanonicalName = getJavaCanonicalName(env, icuCanonicalName);
566     if (env->ExceptionCheck()) {
567         return NULL;
568     }
569 
570     // Check that this charset is supported.
571     {
572         // ICU doesn't offer any "isSupported", so we just open and immediately close.
573         UErrorCode error = U_ZERO_ERROR;
574         icu::LocalUConverterPointer cnv(ucnv_open(icuCanonicalName, &error));
575         if (!U_SUCCESS(error)) {
576             return NULL;
577         }
578     }
579 
580     // Get the aliases for this charset.
581     std::vector<std::string> aliases;
582     if (!collectStandardNames(env, icuCanonicalName, "IANA", aliases)) {
583         return NULL;
584     }
585     if (!collectStandardNames(env, icuCanonicalName, "MIME", aliases)) {
586         return NULL;
587     }
588     if (!collectStandardNames(env, icuCanonicalName, "JAVA", aliases)) {
589         return NULL;
590     }
591     if (!collectStandardNames(env, icuCanonicalName, "WINDOWS", aliases)) {
592         return NULL;
593     }
594     jobjectArray javaAliases = toStringArray(env, aliases);
595     if (env->ExceptionCheck()) {
596         return NULL;
597     }
598 
599     // Construct the CharsetICU object.
600     static jmethodID charsetConstructor = env->GetMethodID(JniConstants::charsetICUClass, "<init>",
601             "(Ljava/lang/String;Ljava/lang/String;[Ljava/lang/String;)V");
602     if (env->ExceptionCheck()) {
603         return NULL;
604     }
605 
606     jstring icuCanonicalNameStr = env->NewStringUTF(icuCanonicalName);
607     if (env->ExceptionCheck()) {
608         return NULL;
609     }
610 
611     return env->NewObject(JniConstants::charsetICUClass, charsetConstructor,
612             javaCanonicalName, icuCanonicalNameStr, javaAliases);
613 }
614 
FreeNativeConverter(void * converter)615 static void FreeNativeConverter(void *converter) {
616     ucnv_close(reinterpret_cast<UConverter*>(converter));
617 }
618 
NativeConverter_getNativeFinalizer(JNIEnv *,jclass)619 static jlong NativeConverter_getNativeFinalizer(JNIEnv*, jclass) {
620     return reinterpret_cast<jlong>(&FreeNativeConverter);
621 }
622 
623 
NativeConverter_getNativeSize(JNIEnv *,jclass,jstring)624 static jlong NativeConverter_getNativeSize(JNIEnv*, jclass, jstring) {
625     // TODO: Improve estimate.
626     return 200;
627 }
628 
629 static JNINativeMethod gMethods[] = {
630     NATIVE_METHOD(NativeConverter, charsetForName, "(Ljava/lang/String;)Ljava/nio/charset/Charset;"),
631     NATIVE_METHOD(NativeConverter, closeConverter, "(J)V"),
632     NATIVE_METHOD(NativeConverter, contains, "(Ljava/lang/String;Ljava/lang/String;)Z"),
633     NATIVE_METHOD(NativeConverter, decode, "(J[BI[CI[IZ)I"),
634     NATIVE_METHOD(NativeConverter, encode, "(J[CI[BI[IZ)I"),
635     NATIVE_METHOD(NativeConverter, getAvailableCharsetNames, "()[Ljava/lang/String;"),
636     NATIVE_METHOD(NativeConverter, getAveBytesPerChar, "(J)F"),
637     NATIVE_METHOD(NativeConverter, getAveCharsPerByte, "(J)F"),
638     NATIVE_METHOD(NativeConverter, getMaxBytesPerChar, "(J)I"),
639     NATIVE_METHOD(NativeConverter, getMinBytesPerChar, "(J)I"),
640     NATIVE_METHOD(NativeConverter, getSubstitutionBytes, "(J)[B"),
641     NATIVE_METHOD(NativeConverter, openConverter, "(Ljava/lang/String;)J"),
642     NATIVE_METHOD(NativeConverter, resetByteToChar, "(J)V"),
643     NATIVE_METHOD(NativeConverter, resetCharToByte, "(J)V"),
644     NATIVE_METHOD(NativeConverter, setCallbackDecode, "(JIILjava/lang/String;)V"),
645     NATIVE_METHOD(NativeConverter, setCallbackEncode, "(JII[B)V"),
646     NATIVE_METHOD(NativeConverter, getNativeFinalizer, "()J"),
647     NATIVE_METHOD(NativeConverter, getNativeSize, "()J")
648 };
register_libcore_icu_NativeConverter(JNIEnv * env)649 void register_libcore_icu_NativeConverter(JNIEnv* env) {
650     jniRegisterNativeMethods(env, "libcore/icu/NativeConverter", gMethods, NELEM(gMethods));
651 }
652