• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1  /*
2      Copyright (C) 1999 Lars Knoll (knoll@mpi-hd.mpg.de)
3      Copyright (C) 2006 Alexey Proskuryakov (ap@nypop.com)
4      Copyright (C) 2006, 2008 Apple Inc. All rights reserved.
5  
6      This library is free software; you can redistribute it and/or
7      modify it under the terms of the GNU Library General Public
8      License as published by the Free Software Foundation; either
9      version 2 of the License, or (at your option) any later version.
10  
11      This library is distributed in the hope that it will be useful,
12      but WITHOUT ANY WARRANTY; without even the implied warranty of
13      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14      Library General Public License for more details.
15  
16      You should have received a copy of the GNU Library General Public License
17      along with this library; see the file COPYING.LIB.  If not, write to
18      the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
19      Boston, MA 02110-1301, USA.
20  
21  */
22  
23  #ifndef TextResourceDecoder_h
24  #define TextResourceDecoder_h
25  
26  #include "TextEncoding.h"
27  
28  namespace WebCore {
29  
30  class HTMLMetaCharsetParser;
31  
32  class TextResourceDecoder : public RefCounted<TextResourceDecoder> {
33  public:
34      enum EncodingSource {
35          DefaultEncoding,
36          AutoDetectedEncoding,
37          EncodingFromXMLHeader,
38          EncodingFromMetaTag,
39          EncodingFromCSSCharset,
40          EncodingFromHTTPHeader,
41          UserChosenEncoding,
42          EncodingFromParentFrame
43      };
44  
45      static PassRefPtr<TextResourceDecoder> create(const String& mimeType, const TextEncoding& defaultEncoding = TextEncoding(), bool usesEncodingDetector = false)
46      {
47          return adoptRef(new TextResourceDecoder(mimeType, defaultEncoding, usesEncodingDetector));
48      }
49      ~TextResourceDecoder();
50  
51      void setEncoding(const TextEncoding&, EncodingSource);
encoding()52      const TextEncoding& encoding() const { return m_encoding; }
53  
54      String decode(const char* data, size_t length);
55      String flush();
56  
setHintEncoding(const TextResourceDecoder * hintDecoder)57      void setHintEncoding(const TextResourceDecoder* hintDecoder)
58      {
59          // hintEncoding is for use with autodetection, which should be
60          // only invoked when hintEncoding comes from auto-detection.
61          if (hintDecoder && hintDecoder->m_source == AutoDetectedEncoding)
62              m_hintEncoding = hintDecoder->encoding().name();
63      }
64  
useLenientXMLDecoding()65      void useLenientXMLDecoding() { m_useLenientXMLDecoding = true; }
sawError()66      bool sawError() const { return m_sawError; }
67  
68  private:
69      TextResourceDecoder(const String& mimeType, const TextEncoding& defaultEncoding,
70                          bool usesEncodingDetector);
71  
72      enum ContentType { PlainText, HTML, XML, CSS }; // PlainText only checks for BOM.
73      static ContentType determineContentType(const String& mimeType);
74      static const TextEncoding& defaultEncoding(ContentType, const TextEncoding& defaultEncoding);
75  
76      size_t checkForBOM(const char*, size_t);
77      bool checkForCSSCharset(const char*, size_t, bool& movedDataToBuffer);
78      bool checkForHeadCharset(const char*, size_t, bool& movedDataToBuffer);
79      bool checkForMetaCharset(const char*, size_t);
80      void detectJapaneseEncoding(const char*, size_t);
81      bool shouldAutoDetect() const;
82  
83      ContentType m_contentType;
84      TextEncoding m_encoding;
85      OwnPtr<TextCodec> m_codec;
86      EncodingSource m_source;
87      const char* m_hintEncoding;
88      Vector<char> m_buffer;
89      bool m_checkedForBOM;
90      bool m_checkedForCSSCharset;
91      bool m_checkedForHeadCharset;
92      bool m_useLenientXMLDecoding; // Don't stop on XML decoding errors.
93      bool m_sawError;
94      bool m_usesEncodingDetector;
95  
96      OwnPtr<HTMLMetaCharsetParser> m_charsetParser;
97  };
98  
99  }
100  
101  #endif
102