• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2009 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include <media/mediascanner.h>
18 
19 #include <utils/StringArray.h>
20 
21 #include "autodetect.h"
22 #include "unicode/ucnv.h"
23 #include "unicode/ustring.h"
24 
25 namespace android {
26 
MediaScannerClient()27 MediaScannerClient::MediaScannerClient()
28     :   mNames(NULL),
29         mValues(NULL),
30         mLocaleEncoding(kEncodingNone)
31 {
32 }
33 
~MediaScannerClient()34 MediaScannerClient::~MediaScannerClient()
35 {
36     delete mNames;
37     delete mValues;
38 }
39 
setLocale(const char * locale)40 void MediaScannerClient::setLocale(const char* locale)
41 {
42     if (!locale) return;
43 
44     if (!strncmp(locale, "ja", 2))
45         mLocaleEncoding = kEncodingShiftJIS;
46     else if (!strncmp(locale, "ko", 2))
47         mLocaleEncoding = kEncodingEUCKR;
48     else if (!strncmp(locale, "zh", 2)) {
49         if (!strcmp(locale, "zh_CN")) {
50             // simplified chinese for mainland China
51             mLocaleEncoding = kEncodingGBK;
52         } else {
53             // assume traditional for non-mainland Chinese locales (Taiwan, Hong Kong, Singapore)
54             mLocaleEncoding = kEncodingBig5;
55         }
56     }
57 }
58 
beginFile()59 void MediaScannerClient::beginFile()
60 {
61     mNames = new StringArray;
62     mValues = new StringArray;
63 }
64 
addStringTag(const char * name,const char * value)65 status_t MediaScannerClient::addStringTag(const char* name, const char* value)
66 {
67     if (mLocaleEncoding != kEncodingNone) {
68         // don't bother caching strings that are all ASCII.
69         // call handleStringTag directly instead.
70         // check to see if value (which should be utf8) has any non-ASCII characters
71         bool nonAscii = false;
72         const char* chp = value;
73         char ch;
74         while ((ch = *chp++)) {
75             if (ch & 0x80) {
76                 nonAscii = true;
77                 break;
78             }
79         }
80 
81         if (nonAscii) {
82             // save the strings for later so they can be used for native encoding detection
83             mNames->push_back(name);
84             mValues->push_back(value);
85             return OK;
86         }
87         // else fall through
88     }
89 
90     // autodetection is not necessary, so no need to cache the values
91     // pass directly to the client instead
92     return handleStringTag(name, value);
93 }
94 
possibleEncodings(const char * s)95 static uint32_t possibleEncodings(const char* s)
96 {
97     uint32_t result = kEncodingAll;
98     // if s contains a native encoding, then it was mistakenly encoded in utf8 as if it were latin-1
99     // so we need to reverse the latin-1 -> utf8 conversion to get the native chars back
100     uint8_t ch1, ch2;
101     uint8_t* chp = (uint8_t *)s;
102 
103     while ((ch1 = *chp++)) {
104         if (ch1 & 0x80) {
105             ch2 = *chp++;
106             ch1 = ((ch1 << 6) & 0xC0) | (ch2 & 0x3F);
107             // ch1 is now the first byte of the potential native char
108 
109             ch2 = *chp++;
110             if (ch2 & 0x80)
111                 ch2 = ((ch2 << 6) & 0xC0) | (*chp++ & 0x3F);
112             // ch2 is now the second byte of the potential native char
113             int ch = (int)ch1 << 8 | (int)ch2;
114             result &= findPossibleEncodings(ch);
115         }
116         // else ASCII character, which could be anything
117     }
118 
119     return result;
120 }
121 
convertValues(uint32_t encoding)122 void MediaScannerClient::convertValues(uint32_t encoding)
123 {
124     const char* enc = NULL;
125     switch (encoding) {
126         case kEncodingShiftJIS:
127             enc = "shift-jis";
128             break;
129         case kEncodingGBK:
130             enc = "gbk";
131             break;
132         case kEncodingBig5:
133             enc = "Big5";
134             break;
135         case kEncodingEUCKR:
136             enc = "EUC-KR";
137             break;
138     }
139 
140     if (enc) {
141         UErrorCode status = U_ZERO_ERROR;
142 
143         UConverter *conv = ucnv_open(enc, &status);
144         if (U_FAILURE(status)) {
145             ALOGE("could not create UConverter for %s", enc);
146             return;
147         }
148         UConverter *utf8Conv = ucnv_open("UTF-8", &status);
149         if (U_FAILURE(status)) {
150             ALOGE("could not create UConverter for UTF-8");
151             ucnv_close(conv);
152             return;
153         }
154 
155         // for each value string, convert from native encoding to UTF-8
156         for (int i = 0; i < mNames->size(); i++) {
157             // first we need to untangle the utf8 and convert it back to the original bytes
158             // since we are reducing the length of the string, we can do this in place
159             uint8_t* src = (uint8_t *)mValues->getEntry(i);
160             int len = strlen((char *)src);
161             uint8_t* dest = src;
162 
163             uint8_t uch;
164             while ((uch = *src++)) {
165                 if (uch & 0x80)
166                     *dest++ = ((uch << 6) & 0xC0) | (*src++ & 0x3F);
167                 else
168                     *dest++ = uch;
169             }
170             *dest = 0;
171 
172             // now convert from native encoding to UTF-8
173             const char* source = mValues->getEntry(i);
174             int targetLength = len * 3 + 1;
175             char* buffer = new char[targetLength];
176             // don't normally check for NULL, but in this case targetLength may be large
177             if (!buffer)
178                 break;
179             char* target = buffer;
180 
181             ucnv_convertEx(utf8Conv, conv, &target, target + targetLength,
182                     &source, (const char *)dest, NULL, NULL, NULL, NULL, TRUE, TRUE, &status);
183             if (U_FAILURE(status)) {
184                 ALOGE("ucnv_convertEx failed: %d", status);
185                 mValues->setEntry(i, "???");
186             } else {
187                 // zero terminate
188                 *target = 0;
189                 mValues->setEntry(i, buffer);
190             }
191 
192             delete[] buffer;
193         }
194 
195         ucnv_close(conv);
196         ucnv_close(utf8Conv);
197     }
198 }
199 
endFile()200 void MediaScannerClient::endFile()
201 {
202     if (mLocaleEncoding != kEncodingNone) {
203         int size = mNames->size();
204         uint32_t encoding = kEncodingAll;
205 
206         // compute a bit mask containing all possible encodings
207         for (int i = 0; i < mNames->size(); i++)
208             encoding &= possibleEncodings(mValues->getEntry(i));
209 
210         // if the locale encoding matches, then assume we have a native encoding.
211         if (encoding & mLocaleEncoding)
212             convertValues(mLocaleEncoding);
213 
214         // finally, push all name/value pairs to the client
215         for (int i = 0; i < mNames->size(); i++) {
216             status_t status = handleStringTag(mNames->getEntry(i), mValues->getEntry(i));
217             if (status) {
218                 break;
219             }
220         }
221     }
222     // else addStringTag() has done all the work so we have nothing to do
223 
224     delete mNames;
225     delete mValues;
226     mNames = NULL;
227     mValues = NULL;
228 }
229 
230 }  // namespace android
231