1 /* 2 Copyright (C) 1999 Lars Knoll (knoll@mpi-hd.mpg.de) 3 Copyright (C) 2006 Alexey Proskuryakov (ap@nypop.com) 4 Copyright (C) 2006, 2008 Apple Inc. All rights reserved. 5 6 This library is free software; you can redistribute it and/or 7 modify it under the terms of the GNU Library General Public 8 License as published by the Free Software Foundation; either 9 version 2 of the License, or (at your option) any later version. 10 11 This library is distributed in the hope that it will be useful, 12 but WITHOUT ANY WARRANTY; without even the implied warranty of 13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 Library General Public License for more details. 15 16 You should have received a copy of the GNU Library General Public License 17 along with this library; see the file COPYING.LIB. If not, write to 18 the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 19 Boston, MA 02110-1301, USA. 20 21 */ 22 23 #ifndef TextResourceDecoder_h 24 #define TextResourceDecoder_h 25 26 #include "wtf/RefCounted.h" 27 #include "wtf/text/TextEncoding.h" 28 29 namespace WebCore { 30 31 class DocumentEncodingData; 32 class HTMLMetaCharsetParser; 33 34 class TextResourceDecoder { 35 public: 36 enum EncodingSource { 37 DefaultEncoding, 38 AutoDetectedEncoding, 39 EncodingFromContentSniffing, 40 EncodingFromXMLHeader, 41 EncodingFromMetaTag, 42 EncodingFromCSSCharset, 43 EncodingFromHTTPHeader, 44 UserChosenEncoding, 45 EncodingFromParentFrame 46 }; 47 48 static PassOwnPtr<TextResourceDecoder> create(const String& mimeType, const WTF::TextEncoding& defaultEncoding = WTF::TextEncoding(), bool usesEncodingDetector = false) 49 { 50 return adoptPtr(new TextResourceDecoder(mimeType, defaultEncoding, usesEncodingDetector)); 51 } 52 ~TextResourceDecoder(); 53 54 void setEncoding(const WTF::TextEncoding&, EncodingSource); encoding()55 const WTF::TextEncoding& encoding() const { return m_encoding; } encodingWasDetectedHeuristically()56 bool encodingWasDetectedHeuristically() const 57 { 58 return m_source == AutoDetectedEncoding 59 || m_source == EncodingFromContentSniffing; 60 } 61 62 String decode(const char* data, size_t length); 63 String flush(); 64 setHintEncoding(const WTF::TextEncoding & encoding)65 void setHintEncoding(const WTF::TextEncoding& encoding) 66 { 67 m_hintEncoding = encoding.name(); 68 } 69 useLenientXMLDecoding()70 void useLenientXMLDecoding() { m_useLenientXMLDecoding = true; } sawError()71 bool sawError() const { return m_sawError; } 72 73 private: 74 TextResourceDecoder(const String& mimeType, const WTF::TextEncoding& defaultEncoding, bool usesEncodingDetector); 75 76 enum ContentType { PlainTextContent, HTMLContent, XMLContent, CSSContent }; // PlainText only checks for BOM. 77 static ContentType determineContentType(const String& mimeType); 78 static const WTF::TextEncoding& defaultEncoding(ContentType, const WTF::TextEncoding& defaultEncoding); 79 80 size_t checkForBOM(const char*, size_t); 81 bool checkForCSSCharset(const char*, size_t, bool& movedDataToBuffer); 82 bool checkForXMLCharset(const char*, size_t, bool& movedDataToBuffer); 83 void checkForMetaCharset(const char*, size_t); 84 void detectJapaneseEncoding(const char*, size_t); 85 bool shouldAutoDetect() const; 86 87 ContentType m_contentType; 88 WTF::TextEncoding m_encoding; 89 OwnPtr<TextCodec> m_codec; 90 EncodingSource m_source; 91 const char* m_hintEncoding; 92 Vector<char> m_buffer; 93 bool m_checkedForBOM; 94 bool m_checkedForCSSCharset; 95 bool m_checkedForXMLCharset; 96 bool m_checkedForMetaCharset; 97 bool m_useLenientXMLDecoding; // Don't stop on XML decoding errors. 98 bool m_sawError; 99 bool m_usesEncodingDetector; 100 101 OwnPtr<HTMLMetaCharsetParser> m_charsetParser; 102 }; 103 104 } 105 106 #endif 107