• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2006, 2007, 2008 Apple Inc. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
14  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE COMPUTER, INC. OR
17  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
20  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
21  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23  */
24 
25 #include "config.h"
26 #include "TextDocument.h"
27 
28 #include "Element.h"
29 #include "HTMLNames.h"
30 #include "HTMLViewSourceDocument.h"
31 #include "SegmentedString.h"
32 #include "Text.h"
33 #include "XMLTokenizer.h"
34 
35 using namespace std;
36 
37 namespace WebCore {
38 
39 using namespace HTMLNames;
40 
41 class TextTokenizer : public Tokenizer {
42 public:
43     TextTokenizer(Document*);
44     virtual ~TextTokenizer();
45     TextTokenizer(HTMLViewSourceDocument*);
46 
47     virtual void write(const SegmentedString&, bool appendData);
48     virtual void finish();
49     virtual bool isWaitingForScripts() const;
50 
checkBuffer(int len=10)51     inline void checkBuffer(int len = 10)
52     {
53         if ((m_dest - m_buffer) > m_size - len) {
54             // Enlarge buffer
55             int newSize = std::max(m_size * 2, m_size + len);
56             int oldOffset = m_dest - m_buffer;
57             m_buffer = static_cast<UChar*>(fastRealloc(m_buffer, newSize * sizeof(UChar)));
58             m_dest = m_buffer + oldOffset;
59             m_size = newSize;
60         }
61     }
62 
63 private:
64     Document* m_doc;
65     Element* m_preElement;
66 
67     bool m_skipLF;
68 
69     int m_size;
70     UChar* m_buffer;
71     UChar* m_dest;
72 };
73 
TextTokenizer(Document * doc)74 TextTokenizer::TextTokenizer(Document* doc)
75     : m_doc(doc)
76     , m_preElement(0)
77     , m_skipLF(false)
78 {
79     // Allocate buffer
80     m_size = 254;
81     m_buffer = static_cast<UChar*>(fastMalloc(sizeof(UChar) * m_size));
82     m_dest = m_buffer;
83 }
84 
TextTokenizer(HTMLViewSourceDocument * doc)85 TextTokenizer::TextTokenizer(HTMLViewSourceDocument* doc)
86     : Tokenizer(true)
87     , m_doc(doc)
88     , m_preElement(0)
89     , m_skipLF(false)
90 {
91     // Allocate buffer
92     m_size = 254;
93     m_buffer = static_cast<UChar*>(fastMalloc(sizeof(UChar) * m_size));
94     m_dest = m_buffer;
95 }
96 
~TextTokenizer()97 TextTokenizer::~TextTokenizer()
98 {
99     // finish() should have been called to prevent any leaks
100     ASSERT(!m_buffer);
101 }
102 
write(const SegmentedString & s,bool)103 void TextTokenizer::write(const SegmentedString& s, bool)
104 {
105     ExceptionCode ec;
106 
107     m_dest = m_buffer;
108 
109     SegmentedString str = s;
110     while (!str.isEmpty()) {
111         UChar c = *str;
112 
113         if (c == '\r') {
114             *m_dest++ = '\n';
115 
116             // possibly skip an LF in the case of an CRLF sequence
117             m_skipLF = true;
118         } else if (c == '\n') {
119             if (!m_skipLF)
120                 *m_dest++ = c;
121             else
122                 m_skipLF = false;
123         } else {
124             *m_dest++ = c;
125             m_skipLF = false;
126         }
127 
128         str.advance();
129 
130         // Maybe enlarge the buffer
131         checkBuffer();
132     }
133 
134     if (!m_preElement && !inViewSourceMode()) {
135         RefPtr<Element> rootElement = m_doc->createElement(htmlTag, false);
136         m_doc->appendChild(rootElement, ec);
137 
138         RefPtr<Element> body = m_doc->createElement(bodyTag, false);
139         rootElement->appendChild(body, ec);
140 
141         RefPtr<Element> preElement = m_doc->createElement(preTag, false);
142         preElement->setAttribute("style", "word-wrap: break-word; white-space: pre-wrap;", ec);
143 
144         body->appendChild(preElement, ec);
145 
146         m_preElement = preElement.get();
147     }
148 
149     String string = String(m_buffer, m_dest - m_buffer);
150     if (inViewSourceMode()) {
151         static_cast<HTMLViewSourceDocument*>(m_doc)->addViewSourceText(string);
152         return;
153     }
154 
155     unsigned charsLeft = string.length();
156     while (charsLeft) {
157         // split large text to nodes of manageable size
158         RefPtr<Text> text = Text::createWithLengthLimit(m_doc, string, charsLeft);
159         m_preElement->appendChild(text, ec);
160     }
161 }
162 
finish()163 void TextTokenizer::finish()
164 {
165     if (!m_preElement)
166         write(SegmentedString(), true); // Create document structure for an empty text document.
167     m_preElement = 0;
168     fastFree(m_buffer);
169     m_buffer = 0;
170     m_dest = 0;
171 
172     m_doc->finishedParsing();
173 }
174 
isWaitingForScripts() const175 bool TextTokenizer::isWaitingForScripts() const
176 {
177     // A text document is never waiting for scripts
178     return false;
179 }
180 
TextDocument(Frame * frame)181 TextDocument::TextDocument(Frame* frame)
182     : HTMLDocument(frame)
183 {
184 }
185 
createTokenizer()186 Tokenizer* TextDocument::createTokenizer()
187 {
188     return new TextTokenizer(this);
189 }
190 
createTextTokenizer(HTMLViewSourceDocument * document)191 Tokenizer* createTextTokenizer(HTMLViewSourceDocument* document)
192 {
193     return new TextTokenizer(document);
194 }
195 
196 }
197