• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2005 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef ANDROID_STRING8_H
18 #define ANDROID_STRING8_H
19 
20 #include <utils/Errors.h>
21 
22 // Need this for the char16_t type; String8.h should not
23 // be depedent on the String16 class.
24 #include <utils/String16.h>
25 
26 #include <stdint.h>
27 #include <string.h>
28 #include <sys/types.h>
29 
30 // ---------------------------------------------------------------------------
31 
32 extern "C" {
33 
34 typedef uint32_t char32_t;
35 
36 size_t strlen32(const char32_t *);
37 size_t strnlen32(const char32_t *, size_t);
38 
39 /*
40  * Returns the length of "src" when "src" is valid UTF-8 string.
41  * Returns 0 if src is NULL, 0-length string or non UTF-8 string.
42  * This function should be used to determine whether "src" is valid UTF-8
43  * characters with valid unicode codepoints. "src" must be null-terminated.
44  *
45  * If you are going to use other GetUtf... functions defined in this header
46  * with string which may not be valid UTF-8 with valid codepoint (form 0 to
47  * 0x10FFFF), you should use this function before calling others, since the
48  * other functions do not check whether the string is valid UTF-8 or not.
49  *
50  * If you do not care whether "src" is valid UTF-8 or not, you should use
51  * strlen() as usual, which should be much faster.
52  */
53 size_t utf8_length(const char *src);
54 
55 /*
56  * Returns the UTF-32 length of "src".
57  */
58 size_t utf32_length(const char *src, size_t src_len);
59 
60 /*
61  * Returns the UTF-8 length of "src".
62  */
63 size_t utf8_length_from_utf16(const char16_t *src, size_t src_len);
64 
65 /*
66  * Returns the UTF-8 length of "src".
67  */
68 size_t utf8_length_from_utf32(const char32_t *src, size_t src_len);
69 
70 /*
71  * Returns the unicode value at "index".
72  * Returns -1 when the index is invalid (equals to or more than "src_len").
73  * If returned value is positive, it is able to be converted to char32_t, which
74  * is unsigned. Then, if "next_index" is not NULL, the next index to be used is
75  * stored in "next_index". "next_index" can be NULL.
76  */
77 int32_t utf32_at(const char *src, size_t src_len,
78                  size_t index, size_t *next_index);
79 
80 /*
81  * Stores a UTF-32 string converted from "src" in "dst", if "dst_length" is not
82  * large enough to store the string, the part of the "src" string is stored
83  * into "dst".
84  * Returns the size actually used for storing the string.
85  * "dst" is not null-terminated when dst_len is fully used (like strncpy).
86  */
87 size_t utf8_to_utf32(const char* src, size_t src_len,
88                      char32_t* dst, size_t dst_len);
89 
90 /*
91  * Stores a UTF-8 string converted from "src" in "dst", if "dst_length" is not
92  * large enough to store the string, the part of the "src" string is stored
93  * into "dst" as much as possible. See the examples for more detail.
94  * Returns the size actually used for storing the string.
95  * dst" is not null-terminated when dst_len is fully used (like strncpy).
96  *
97  * Example 1
98  * "src" == \u3042\u3044 (\xE3\x81\x82\xE3\x81\x84)
99  * "src_len" == 2
100  * "dst_len" >= 7
101  * ->
102  * Returned value == 6
103  * "dst" becomes \xE3\x81\x82\xE3\x81\x84\0
104  * (note that "dst" is null-terminated)
105  *
106  * Example 2
107  * "src" == \u3042\u3044 (\xE3\x81\x82\xE3\x81\x84)
108  * "src_len" == 2
109  * "dst_len" == 5
110  * ->
111  * Returned value == 3
112  * "dst" becomes \xE3\x81\x82\0
113  * (note that "dst" is null-terminated, but \u3044 is not stored in "dst"
114  * since "dst" does not have enough size to store the character)
115  *
116  * Example 3
117  * "src" == \u3042\u3044 (\xE3\x81\x82\xE3\x81\x84)
118  * "src_len" == 2
119  * "dst_len" == 6
120  * ->
121  * Returned value == 6
122  * "dst" becomes \xE3\x81\x82\xE3\x81\x84
123  * (note that "dst" is NOT null-terminated, like strncpy)
124  */
125 size_t utf32_to_utf8(const char32_t* src, size_t src_len,
126                      char* dst, size_t dst_len);
127 
128 size_t utf16_to_utf8(const char16_t* src, size_t src_len,
129                      char* dst, size_t dst_len);
130 
131 }
132 
133 // ---------------------------------------------------------------------------
134 
135 namespace android {
136 
137 class TextOutput;
138 
139 //! This is a string holding UTF-8 characters. Does not allow the value more
140 // than 0x10FFFF, which is not valid unicode codepoint.
141 class String8
142 {
143 public:
144                                 String8();
145                                 String8(const String8& o);
146     explicit                    String8(const char* o);
147     explicit                    String8(const char* o, size_t numChars);
148 
149     explicit                    String8(const String16& o);
150     explicit                    String8(const char16_t* o);
151     explicit                    String8(const char16_t* o, size_t numChars);
152     explicit                    String8(const char32_t* o);
153     explicit                    String8(const char32_t* o, size_t numChars);
154                                 ~String8();
155 
156     inline  const char*         string() const;
157     inline  size_t              size() const;
158     inline  size_t              length() const;
159     inline  size_t              bytes() const;
160 
161     inline  const SharedBuffer* sharedBuffer() const;
162 
163             void                setTo(const String8& other);
164             status_t            setTo(const char* other);
165             status_t            setTo(const char* other, size_t numChars);
166             status_t            setTo(const char16_t* other, size_t numChars);
167             status_t            setTo(const char32_t* other,
168                                       size_t length);
169 
170             status_t            append(const String8& other);
171             status_t            append(const char* other);
172             status_t            append(const char* other, size_t numChars);
173 
174             status_t            appendFormat(const char* fmt, ...)
175                     __attribute__((format (printf, 2, 3)));
176 
177             // Note that this function takes O(N) time to calculate the value.
178             // No cache value is stored.
179             size_t              getUtf32Length() const;
180             int32_t             getUtf32At(size_t index,
181                                            size_t *next_index) const;
182             size_t              getUtf32(char32_t* dst, size_t dst_len) const;
183 
184     inline  String8&            operator=(const String8& other);
185     inline  String8&            operator=(const char* other);
186 
187     inline  String8&            operator+=(const String8& other);
188     inline  String8             operator+(const String8& other) const;
189 
190     inline  String8&            operator+=(const char* other);
191     inline  String8             operator+(const char* other) const;
192 
193     inline  int                 compare(const String8& other) const;
194 
195     inline  bool                operator<(const String8& other) const;
196     inline  bool                operator<=(const String8& other) const;
197     inline  bool                operator==(const String8& other) const;
198     inline  bool                operator!=(const String8& other) const;
199     inline  bool                operator>=(const String8& other) const;
200     inline  bool                operator>(const String8& other) const;
201 
202     inline  bool                operator<(const char* other) const;
203     inline  bool                operator<=(const char* other) const;
204     inline  bool                operator==(const char* other) const;
205     inline  bool                operator!=(const char* other) const;
206     inline  bool                operator>=(const char* other) const;
207     inline  bool                operator>(const char* other) const;
208 
209     inline                      operator const char*() const;
210 
211             char*               lockBuffer(size_t size);
212             void                unlockBuffer();
213             status_t            unlockBuffer(size_t size);
214 
215             // return the index of the first byte of other in this at or after
216             // start, or -1 if not found
217             ssize_t             find(const char* other, size_t start = 0) const;
218 
219             void                toLower();
220             void                toLower(size_t start, size_t numChars);
221             void                toUpper();
222             void                toUpper(size_t start, size_t numChars);
223 
224     /*
225      * These methods operate on the string as if it were a path name.
226      */
227 
228     /*
229      * Set the filename field to a specific value.
230      *
231      * Normalizes the filename, removing a trailing '/' if present.
232      */
233     void setPathName(const char* name);
234     void setPathName(const char* name, size_t numChars);
235 
236     /*
237      * Get just the filename component.
238      *
239      * "/tmp/foo/bar.c" --> "bar.c"
240      */
241     String8 getPathLeaf(void) const;
242 
243     /*
244      * Remove the last (file name) component, leaving just the directory
245      * name.
246      *
247      * "/tmp/foo/bar.c" --> "/tmp/foo"
248      * "/tmp" --> "" // ????? shouldn't this be "/" ???? XXX
249      * "bar.c" --> ""
250      */
251     String8 getPathDir(void) const;
252 
253     /*
254      * Retrieve the front (root dir) component.  Optionally also return the
255      * remaining components.
256      *
257      * "/tmp/foo/bar.c" --> "tmp" (remain = "foo/bar.c")
258      * "/tmp" --> "tmp" (remain = "")
259      * "bar.c" --> "bar.c" (remain = "")
260      */
261     String8 walkPath(String8* outRemains = NULL) const;
262 
263     /*
264      * Return the filename extension.  This is the last '.' and up to
265      * four characters that follow it.  The '.' is included in case we
266      * decide to expand our definition of what constitutes an extension.
267      *
268      * "/tmp/foo/bar.c" --> ".c"
269      * "/tmp" --> ""
270      * "/tmp/foo.bar/baz" --> ""
271      * "foo.jpeg" --> ".jpeg"
272      * "foo." --> ""
273      */
274     String8 getPathExtension(void) const;
275 
276     /*
277      * Return the path without the extension.  Rules for what constitutes
278      * an extension are described in the comment for getPathExtension().
279      *
280      * "/tmp/foo/bar.c" --> "/tmp/foo/bar"
281      */
282     String8 getBasePath(void) const;
283 
284     /*
285      * Add a component to the pathname.  We guarantee that there is
286      * exactly one path separator between the old path and the new.
287      * If there is no existing name, we just copy the new name in.
288      *
289      * If leaf is a fully qualified path (i.e. starts with '/', it
290      * replaces whatever was there before.
291      */
292     String8& appendPath(const char* leaf);
appendPath(const String8 & leaf)293     String8& appendPath(const String8& leaf)  { return appendPath(leaf.string()); }
294 
295     /*
296      * Like appendPath(), but does not affect this string.  Returns a new one instead.
297      */
appendPathCopy(const char * leaf)298     String8 appendPathCopy(const char* leaf) const
299                                              { String8 p(*this); p.appendPath(leaf); return p; }
appendPathCopy(const String8 & leaf)300     String8 appendPathCopy(const String8& leaf) const { return appendPathCopy(leaf.string()); }
301 
302     /*
303      * Converts all separators in this string to /, the default path separator.
304      *
305      * If the default OS separator is backslash, this converts all
306      * backslashes to slashes, in-place. Otherwise it does nothing.
307      * Returns self.
308      */
309     String8& convertToResPath();
310 
311 private:
312             status_t            real_append(const char* other, size_t numChars);
313             char*               find_extension(void) const;
314 
315             const char* mString;
316 };
317 
318 TextOutput& operator<<(TextOutput& to, const String16& val);
319 
320 // ---------------------------------------------------------------------------
321 // No user servicable parts below.
322 
compare_type(const String8 & lhs,const String8 & rhs)323 inline int compare_type(const String8& lhs, const String8& rhs)
324 {
325     return lhs.compare(rhs);
326 }
327 
strictly_order_type(const String8 & lhs,const String8 & rhs)328 inline int strictly_order_type(const String8& lhs, const String8& rhs)
329 {
330     return compare_type(lhs, rhs) < 0;
331 }
332 
string()333 inline const char* String8::string() const
334 {
335     return mString;
336 }
337 
length()338 inline size_t String8::length() const
339 {
340     return SharedBuffer::sizeFromData(mString)-1;
341 }
342 
size()343 inline size_t String8::size() const
344 {
345     return length();
346 }
347 
bytes()348 inline size_t String8::bytes() const
349 {
350     return SharedBuffer::sizeFromData(mString)-1;
351 }
352 
sharedBuffer()353 inline const SharedBuffer* String8::sharedBuffer() const
354 {
355     return SharedBuffer::bufferFromData(mString);
356 }
357 
358 inline String8& String8::operator=(const String8& other)
359 {
360     setTo(other);
361     return *this;
362 }
363 
364 inline String8& String8::operator=(const char* other)
365 {
366     setTo(other);
367     return *this;
368 }
369 
370 inline String8& String8::operator+=(const String8& other)
371 {
372     append(other);
373     return *this;
374 }
375 
376 inline String8 String8::operator+(const String8& other) const
377 {
378     String8 tmp(*this);
379     tmp += other;
380     return tmp;
381 }
382 
383 inline String8& String8::operator+=(const char* other)
384 {
385     append(other);
386     return *this;
387 }
388 
389 inline String8 String8::operator+(const char* other) const
390 {
391     String8 tmp(*this);
392     tmp += other;
393     return tmp;
394 }
395 
compare(const String8 & other)396 inline int String8::compare(const String8& other) const
397 {
398     return strcmp(mString, other.mString);
399 }
400 
401 inline bool String8::operator<(const String8& other) const
402 {
403     return strcmp(mString, other.mString) < 0;
404 }
405 
406 inline bool String8::operator<=(const String8& other) const
407 {
408     return strcmp(mString, other.mString) <= 0;
409 }
410 
411 inline bool String8::operator==(const String8& other) const
412 {
413     return strcmp(mString, other.mString) == 0;
414 }
415 
416 inline bool String8::operator!=(const String8& other) const
417 {
418     return strcmp(mString, other.mString) != 0;
419 }
420 
421 inline bool String8::operator>=(const String8& other) const
422 {
423     return strcmp(mString, other.mString) >= 0;
424 }
425 
426 inline bool String8::operator>(const String8& other) const
427 {
428     return strcmp(mString, other.mString) > 0;
429 }
430 
431 inline bool String8::operator<(const char* other) const
432 {
433     return strcmp(mString, other) < 0;
434 }
435 
436 inline bool String8::operator<=(const char* other) const
437 {
438     return strcmp(mString, other) <= 0;
439 }
440 
441 inline bool String8::operator==(const char* other) const
442 {
443     return strcmp(mString, other) == 0;
444 }
445 
446 inline bool String8::operator!=(const char* other) const
447 {
448     return strcmp(mString, other) != 0;
449 }
450 
451 inline bool String8::operator>=(const char* other) const
452 {
453     return strcmp(mString, other) >= 0;
454 }
455 
456 inline bool String8::operator>(const char* other) const
457 {
458     return strcmp(mString, other) > 0;
459 }
460 
461 inline String8::operator const char*() const
462 {
463     return mString;
464 }
465 
466 }  // namespace android
467 
468 // ---------------------------------------------------------------------------
469 
470 #endif // ANDROID_STRING8_H
471