• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2015 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include <unicode/utf.h>
18 #include <unicode/utf8.h>
19 #include <cstdlib>
20 #include <cutils/log.h>
21 #include <vector>
22 #include <string>
23 
24 namespace minikin {
25 
26 // src is of the form "U+1F431 | 'h' 'i'". Position of "|" gets saved to offset if non-null.
27 // Size is returned in an out parameter because gtest needs a void return for ASSERT to work.
ParseUnicode(uint16_t * buf,size_t buf_size,const char * src,size_t * result_size,size_t * offset)28 void ParseUnicode(uint16_t* buf, size_t buf_size, const char* src, size_t* result_size,
29         size_t* offset) {
30     size_t input_ix = 0;
31     size_t output_ix = 0;
32     bool seen_offset = false;
33 
34     while (src[input_ix] != 0) {
35         switch (src[input_ix]) {
36         case '\'':
37             // single ASCII char
38             LOG_ALWAYS_FATAL_IF(static_cast<uint8_t>(src[input_ix]) >= 0x80);
39             input_ix++;
40             LOG_ALWAYS_FATAL_IF(src[input_ix] == 0);
41             LOG_ALWAYS_FATAL_IF(output_ix >= buf_size);
42             buf[output_ix++] = (uint16_t)src[input_ix++];
43             LOG_ALWAYS_FATAL_IF(src[input_ix] != '\'');
44             input_ix++;
45             break;
46         case 'u':
47         case 'U': {
48             // Unicode codepoint in hex syntax
49             input_ix++;
50             LOG_ALWAYS_FATAL_IF(src[input_ix] != '+');
51             input_ix++;
52             char* endptr = (char*)src + input_ix;
53             unsigned long int codepoint = strtoul(src + input_ix, &endptr, 16);
54             size_t num_hex_digits = endptr - (src + input_ix);
55 
56             // also triggers on invalid number syntax, digits = 0
57             LOG_ALWAYS_FATAL_IF(num_hex_digits < 4u);
58             LOG_ALWAYS_FATAL_IF(num_hex_digits > 6u);
59             LOG_ALWAYS_FATAL_IF(codepoint > 0x10FFFFu);
60             input_ix += num_hex_digits;
61             if (U16_LENGTH(codepoint) == 1) {
62                 LOG_ALWAYS_FATAL_IF(output_ix + 1 > buf_size);
63                 buf[output_ix++] = codepoint;
64             } else {
65                 // UTF-16 encoding
66                 LOG_ALWAYS_FATAL_IF(output_ix + 2 > buf_size);
67                 buf[output_ix++] = U16_LEAD(codepoint);
68                 buf[output_ix++] = U16_TRAIL(codepoint);
69             }
70             break;
71         }
72         case ' ':
73             input_ix++;
74             break;
75         case '|':
76             LOG_ALWAYS_FATAL_IF(seen_offset);
77             LOG_ALWAYS_FATAL_IF(offset == nullptr);
78             *offset = output_ix;
79             seen_offset = true;
80             input_ix++;
81             break;
82         default:
83             LOG_ALWAYS_FATAL("Unexpected Character");
84         }
85     }
86     LOG_ALWAYS_FATAL_IF(result_size == nullptr);
87     *result_size = output_ix;
88     LOG_ALWAYS_FATAL_IF(!seen_offset && offset != nullptr);
89 }
90 
parseUnicodeStringWithOffset(const std::string & in,size_t * offset)91 std::vector<uint16_t> parseUnicodeStringWithOffset(const std::string& in, size_t* offset) {
92     std::unique_ptr<uint16_t[]> buffer(new uint16_t[in.size()]);
93     size_t result_size = 0;
94     ParseUnicode(buffer.get(), in.size(), in.c_str(), &result_size, offset);
95     return std::vector<uint16_t>(buffer.get(), buffer.get() + result_size);
96 }
97 
parseUnicodeString(const std::string & in)98 std::vector<uint16_t> parseUnicodeString(const std::string& in) {
99     return parseUnicodeStringWithOffset(in, nullptr);
100 }
101 
utf8ToUtf16(const std::string & text)102 std::vector<uint16_t> utf8ToUtf16(const std::string& text) {
103     std::vector<uint16_t> result;
104     int32_t i = 0;
105     const int32_t textLength = static_cast<int32_t>(text.size());
106     uint32_t c = 0;
107     while (i < textLength) {
108         U8_NEXT(text.c_str(), i, textLength, c);
109         if (U16_LENGTH(c) == 1) {
110             result.push_back(c);
111         } else {
112             result.push_back(U16_LEAD(c));
113             result.push_back(U16_TRAIL(c));
114         }
115     }
116     return result;
117 }
118 
119 }  // namespace minikin
120