1 /* 2 * Copyright (C) 2013 Google Inc. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions are 6 * met: 7 * 8 * * Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * * Redistributions in binary form must reproduce the above 11 * copyright notice, this list of conditions and the following disclaimer 12 * in the documentation and/or other materials provided with the 13 * distribution. 14 * * Neither the name of Google Inc. nor the names of its 15 * contributors may be used to endorse or promote products derived from 16 * this software without specific prior written permission. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 */ 30 31 #ifndef StringUTF8Adaptor_h 32 #define StringUTF8Adaptor_h 33 34 #include "wtf/text/CString.h" 35 #include "wtf/text/TextEncoding.h" 36 #include "wtf/text/WTFString.h" 37 38 namespace WTF { 39 40 // This class lets you get UTF-8 data out of a String without mallocing a 41 // separate buffer to hold the data if the String happens to be 8 bit and 42 // contain only ASCII characters. 43 class StringUTF8Adaptor { 44 public: 45 enum ShouldNormalize { 46 DoNotNormalize, 47 Normalize 48 }; 49 50 explicit StringUTF8Adaptor(const String& string, ShouldNormalize normalize = DoNotNormalize, UnencodableHandling handling = EntitiesForUnencodables) 51 : m_data(0) 52 , m_length(0) 53 { 54 if (string.isEmpty()) 55 return; 56 // Unfortunately, 8 bit WTFStrings are encoded in Latin-1 and GURL uses UTF-8 57 // when processing 8 bit strings. If |relative| is entirely ASCII, we luck out 58 // and can avoid mallocing a new buffer to hold the UTF-8 data because UTF-8 59 // and Latin-1 use the same code units for ASCII code points. 60 if (string.is8Bit() && string.containsOnlyASCII()) { 61 m_data = reinterpret_cast<const char*>(string.characters8()); 62 m_length = string.length(); 63 } else { 64 if (normalize == Normalize) 65 m_utf8Buffer = UTF8Encoding().normalizeAndEncode(string, handling); 66 else 67 m_utf8Buffer = string.utf8(); 68 m_data = m_utf8Buffer.data(); 69 m_length = m_utf8Buffer.length(); 70 } 71 } 72 data()73 const char* data() const { return m_data; } length()74 size_t length() const { return m_length; } 75 76 private: 77 CString m_utf8Buffer; 78 const char* m_data; 79 size_t m_length; 80 }; 81 82 } // namespace WTF 83 84 using WTF::StringUTF8Adaptor; 85 86 #endif // StringUTF8Adaptor_h 87