1 /*
2 * Copyright (C) 2009 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include <media/mediascanner.h>
18
19 #include <utils/StringArray.h>
20
21 #include "autodetect.h"
22 #include "unicode/ucnv.h"
23 #include "unicode/ustring.h"
24
25 namespace android {
26
MediaScannerClient()27 MediaScannerClient::MediaScannerClient()
28 : mNames(NULL),
29 mValues(NULL),
30 mLocaleEncoding(kEncodingNone)
31 {
32 }
33
~MediaScannerClient()34 MediaScannerClient::~MediaScannerClient()
35 {
36 delete mNames;
37 delete mValues;
38 }
39
setLocale(const char * locale)40 void MediaScannerClient::setLocale(const char* locale)
41 {
42 if (!locale) return;
43
44 if (!strncmp(locale, "ja", 2))
45 mLocaleEncoding = kEncodingShiftJIS;
46 else if (!strncmp(locale, "ko", 2))
47 mLocaleEncoding = kEncodingEUCKR;
48 else if (!strncmp(locale, "zh", 2)) {
49 if (!strcmp(locale, "zh_CN")) {
50 // simplified chinese for mainland China
51 mLocaleEncoding = kEncodingGBK;
52 } else {
53 // assume traditional for non-mainland Chinese locales (Taiwan, Hong Kong, Singapore)
54 mLocaleEncoding = kEncodingBig5;
55 }
56 }
57 }
58
beginFile()59 void MediaScannerClient::beginFile()
60 {
61 mNames = new StringArray;
62 mValues = new StringArray;
63 }
64
addStringTag(const char * name,const char * value)65 status_t MediaScannerClient::addStringTag(const char* name, const char* value)
66 {
67 if (mLocaleEncoding != kEncodingNone) {
68 // don't bother caching strings that are all ASCII.
69 // call handleStringTag directly instead.
70 // check to see if value (which should be utf8) has any non-ASCII characters
71 bool nonAscii = false;
72 const char* chp = value;
73 char ch;
74 while ((ch = *chp++)) {
75 if (ch & 0x80) {
76 nonAscii = true;
77 break;
78 }
79 }
80
81 if (nonAscii) {
82 // save the strings for later so they can be used for native encoding detection
83 mNames->push_back(name);
84 mValues->push_back(value);
85 return OK;
86 }
87 // else fall through
88 }
89
90 // autodetection is not necessary, so no need to cache the values
91 // pass directly to the client instead
92 return handleStringTag(name, value);
93 }
94
possibleEncodings(const char * s)95 static uint32_t possibleEncodings(const char* s)
96 {
97 uint32_t result = kEncodingAll;
98 // if s contains a native encoding, then it was mistakenly encoded in utf8 as if it were latin-1
99 // so we need to reverse the latin-1 -> utf8 conversion to get the native chars back
100 uint8_t ch1, ch2;
101 uint8_t* chp = (uint8_t *)s;
102
103 while ((ch1 = *chp++)) {
104 if (ch1 & 0x80) {
105 ch2 = *chp++;
106 ch1 = ((ch1 << 6) & 0xC0) | (ch2 & 0x3F);
107 // ch1 is now the first byte of the potential native char
108
109 ch2 = *chp++;
110 if (ch2 & 0x80)
111 ch2 = ((ch2 << 6) & 0xC0) | (*chp++ & 0x3F);
112 // ch2 is now the second byte of the potential native char
113 int ch = (int)ch1 << 8 | (int)ch2;
114 result &= findPossibleEncodings(ch);
115 }
116 // else ASCII character, which could be anything
117 }
118
119 return result;
120 }
121
convertValues(uint32_t encoding)122 void MediaScannerClient::convertValues(uint32_t encoding)
123 {
124 const char* enc = NULL;
125 switch (encoding) {
126 case kEncodingShiftJIS:
127 enc = "shift-jis";
128 break;
129 case kEncodingGBK:
130 enc = "gbk";
131 break;
132 case kEncodingBig5:
133 enc = "Big5";
134 break;
135 case kEncodingEUCKR:
136 enc = "EUC-KR";
137 break;
138 }
139
140 if (enc) {
141 UErrorCode status = U_ZERO_ERROR;
142
143 UConverter *conv = ucnv_open(enc, &status);
144 if (U_FAILURE(status)) {
145 ALOGE("could not create UConverter for %s", enc);
146 return;
147 }
148 UConverter *utf8Conv = ucnv_open("UTF-8", &status);
149 if (U_FAILURE(status)) {
150 ALOGE("could not create UConverter for UTF-8");
151 ucnv_close(conv);
152 return;
153 }
154
155 // for each value string, convert from native encoding to UTF-8
156 for (int i = 0; i < mNames->size(); i++) {
157 // first we need to untangle the utf8 and convert it back to the original bytes
158 // since we are reducing the length of the string, we can do this in place
159 uint8_t* src = (uint8_t *)mValues->getEntry(i);
160 int len = strlen((char *)src);
161 uint8_t* dest = src;
162
163 uint8_t uch;
164 while ((uch = *src++)) {
165 if (uch & 0x80)
166 *dest++ = ((uch << 6) & 0xC0) | (*src++ & 0x3F);
167 else
168 *dest++ = uch;
169 }
170 *dest = 0;
171
172 // now convert from native encoding to UTF-8
173 const char* source = mValues->getEntry(i);
174 int targetLength = len * 3 + 1;
175 char* buffer = new char[targetLength];
176 // don't normally check for NULL, but in this case targetLength may be large
177 if (!buffer)
178 break;
179 char* target = buffer;
180
181 ucnv_convertEx(utf8Conv, conv, &target, target + targetLength,
182 &source, (const char *)dest, NULL, NULL, NULL, NULL, TRUE, TRUE, &status);
183 if (U_FAILURE(status)) {
184 ALOGE("ucnv_convertEx failed: %d", status);
185 mValues->setEntry(i, "???");
186 } else {
187 // zero terminate
188 *target = 0;
189 mValues->setEntry(i, buffer);
190 }
191
192 delete[] buffer;
193 }
194
195 ucnv_close(conv);
196 ucnv_close(utf8Conv);
197 }
198 }
199
endFile()200 void MediaScannerClient::endFile()
201 {
202 if (mLocaleEncoding != kEncodingNone) {
203 int size = mNames->size();
204 uint32_t encoding = kEncodingAll;
205
206 // compute a bit mask containing all possible encodings
207 for (int i = 0; i < mNames->size(); i++)
208 encoding &= possibleEncodings(mValues->getEntry(i));
209
210 // if the locale encoding matches, then assume we have a native encoding.
211 if (encoding & mLocaleEncoding)
212 convertValues(mLocaleEncoding);
213
214 // finally, push all name/value pairs to the client
215 for (int i = 0; i < mNames->size(); i++) {
216 status_t status = handleStringTag(mNames->getEntry(i), mValues->getEntry(i));
217 if (status) {
218 break;
219 }
220 }
221 }
222 // else addStringTag() has done all the work so we have nothing to do
223
224 delete mNames;
225 delete mValues;
226 mNames = NULL;
227 mValues = NULL;
228 }
229
230 } // namespace android
231