• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2     Copyright (C) 1999 Lars Knoll (knoll@mpi-hd.mpg.de)
3     Copyright (C) 2006 Alexey Proskuryakov (ap@nypop.com)
4     Copyright (C) 2006, 2008 Apple Inc. All rights reserved.
5 
6     This library is free software; you can redistribute it and/or
7     modify it under the terms of the GNU Library General Public
8     License as published by the Free Software Foundation; either
9     version 2 of the License, or (at your option) any later version.
10 
11     This library is distributed in the hope that it will be useful,
12     but WITHOUT ANY WARRANTY; without even the implied warranty of
13     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14     Library General Public License for more details.
15 
16     You should have received a copy of the GNU Library General Public License
17     along with this library; see the file COPYING.LIB.  If not, write to
18     the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
19     Boston, MA 02110-1301, USA.
20 
21 */
22 
23 #ifndef TextResourceDecoder_h
24 #define TextResourceDecoder_h
25 
26 #include "wtf/RefCounted.h"
27 #include "wtf/text/TextEncoding.h"
28 
29 namespace blink {
30 
31 class DocumentEncodingData;
32 class HTMLMetaCharsetParser;
33 
34 class TextResourceDecoder {
35 public:
36     enum EncodingSource {
37         DefaultEncoding,
38         AutoDetectedEncoding,
39         EncodingFromContentSniffing,
40         EncodingFromXMLHeader,
41         EncodingFromMetaTag,
42         EncodingFromCSSCharset,
43         EncodingFromHTTPHeader,
44         UserChosenEncoding,
45         EncodingFromParentFrame
46     };
47 
48     static PassOwnPtr<TextResourceDecoder> create(const String& mimeType, const WTF::TextEncoding& defaultEncoding = WTF::TextEncoding(), bool usesEncodingDetector = false)
49     {
50         return adoptPtr(new TextResourceDecoder(mimeType, defaultEncoding, usesEncodingDetector));
51     }
52     ~TextResourceDecoder();
53 
54     void setEncoding(const WTF::TextEncoding&, EncodingSource);
encoding()55     const WTF::TextEncoding& encoding() const { return m_encoding; }
encodingWasDetectedHeuristically()56     bool encodingWasDetectedHeuristically() const
57     {
58         return m_source == AutoDetectedEncoding
59             || m_source == EncodingFromContentSniffing;
60     }
61 
62     String decode(const char* data, size_t length);
63     String flush();
64 
setHintEncoding(const WTF::TextEncoding & encoding)65     void setHintEncoding(const WTF::TextEncoding& encoding)
66     {
67         m_hintEncoding = encoding.name();
68     }
69 
useLenientXMLDecoding()70     void useLenientXMLDecoding() { m_useLenientXMLDecoding = true; }
sawError()71     bool sawError() const { return m_sawError; }
72 
73 private:
74     TextResourceDecoder(const String& mimeType, const WTF::TextEncoding& defaultEncoding, bool usesEncodingDetector);
75 
76     enum ContentType { PlainTextContent, HTMLContent, XMLContent, CSSContent }; // PlainText only checks for BOM.
77     static ContentType determineContentType(const String& mimeType);
78     static const WTF::TextEncoding& defaultEncoding(ContentType, const WTF::TextEncoding& defaultEncoding);
79 
80     size_t checkForBOM(const char*, size_t);
81     bool checkForCSSCharset(const char*, size_t, bool& movedDataToBuffer);
82     bool checkForXMLCharset(const char*, size_t, bool& movedDataToBuffer);
83     void checkForMetaCharset(const char*, size_t);
84     void detectJapaneseEncoding(const char*, size_t);
85     bool shouldAutoDetect() const;
86 
87     ContentType m_contentType;
88     WTF::TextEncoding m_encoding;
89     OwnPtr<TextCodec> m_codec;
90     EncodingSource m_source;
91     const char* m_hintEncoding;
92     Vector<char> m_buffer;
93     bool m_checkedForBOM;
94     bool m_checkedForCSSCharset;
95     bool m_checkedForXMLCharset;
96     bool m_checkedForMetaCharset;
97     bool m_useLenientXMLDecoding; // Don't stop on XML decoding errors.
98     bool m_sawError;
99     bool m_usesEncodingDetector;
100 
101     OwnPtr<HTMLMetaCharsetParser> m_charsetParser;
102 };
103 
104 }
105 
106 #endif
107