1 /*
2 * Copyright (C) 2006, 2007, 2008 Apple Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
20 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
21 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
22 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23 */
24
25 #include "config.h"
26 #include "TextDocument.h"
27
28 #include "Element.h"
29 #include "HTMLNames.h"
30 #include "HTMLViewSourceDocument.h"
31 #include "SegmentedString.h"
32 #include "Text.h"
33 #include "XMLTokenizer.h"
34
35 using namespace std;
36
37 namespace WebCore {
38
39 using namespace HTMLNames;
40
41 class TextTokenizer : public Tokenizer {
42 public:
43 TextTokenizer(Document*);
44 virtual ~TextTokenizer();
45 TextTokenizer(HTMLViewSourceDocument*);
46
47 virtual void write(const SegmentedString&, bool appendData);
48 virtual void finish();
49 virtual bool isWaitingForScripts() const;
50
checkBuffer(int len=10)51 inline void checkBuffer(int len = 10)
52 {
53 if ((m_dest - m_buffer) > m_size - len) {
54 // Enlarge buffer
55 int newSize = std::max(m_size * 2, m_size + len);
56 int oldOffset = m_dest - m_buffer;
57 m_buffer = static_cast<UChar*>(fastRealloc(m_buffer, newSize * sizeof(UChar)));
58 m_dest = m_buffer + oldOffset;
59 m_size = newSize;
60 }
61 }
62
63 private:
64 Document* m_doc;
65 Element* m_preElement;
66
67 bool m_skipLF;
68
69 int m_size;
70 UChar* m_buffer;
71 UChar* m_dest;
72 };
73
TextTokenizer(Document * doc)74 TextTokenizer::TextTokenizer(Document* doc)
75 : m_doc(doc)
76 , m_preElement(0)
77 , m_skipLF(false)
78 {
79 // Allocate buffer
80 m_size = 254;
81 m_buffer = static_cast<UChar*>(fastMalloc(sizeof(UChar) * m_size));
82 m_dest = m_buffer;
83 }
84
TextTokenizer(HTMLViewSourceDocument * doc)85 TextTokenizer::TextTokenizer(HTMLViewSourceDocument* doc)
86 : Tokenizer(true)
87 , m_doc(doc)
88 , m_preElement(0)
89 , m_skipLF(false)
90 {
91 // Allocate buffer
92 m_size = 254;
93 m_buffer = static_cast<UChar*>(fastMalloc(sizeof(UChar) * m_size));
94 m_dest = m_buffer;
95 }
96
~TextTokenizer()97 TextTokenizer::~TextTokenizer()
98 {
99 // finish() should have been called to prevent any leaks
100 ASSERT(!m_buffer);
101 }
102
write(const SegmentedString & s,bool)103 void TextTokenizer::write(const SegmentedString& s, bool)
104 {
105 ExceptionCode ec;
106
107 m_dest = m_buffer;
108
109 SegmentedString str = s;
110 while (!str.isEmpty()) {
111 UChar c = *str;
112
113 if (c == '\r') {
114 *m_dest++ = '\n';
115
116 // possibly skip an LF in the case of an CRLF sequence
117 m_skipLF = true;
118 } else if (c == '\n') {
119 if (!m_skipLF)
120 *m_dest++ = c;
121 else
122 m_skipLF = false;
123 } else {
124 *m_dest++ = c;
125 m_skipLF = false;
126 }
127
128 str.advance();
129
130 // Maybe enlarge the buffer
131 checkBuffer();
132 }
133
134 if (!m_preElement && !inViewSourceMode()) {
135 RefPtr<Element> rootElement = m_doc->createElement(htmlTag, false);
136 m_doc->appendChild(rootElement, ec);
137
138 RefPtr<Element> body = m_doc->createElement(bodyTag, false);
139 rootElement->appendChild(body, ec);
140
141 RefPtr<Element> preElement = m_doc->createElement(preTag, false);
142 preElement->setAttribute("style", "word-wrap: break-word; white-space: pre-wrap;", ec);
143
144 body->appendChild(preElement, ec);
145
146 m_preElement = preElement.get();
147 }
148
149 String string = String(m_buffer, m_dest - m_buffer);
150 if (inViewSourceMode()) {
151 static_cast<HTMLViewSourceDocument*>(m_doc)->addViewSourceText(string);
152 return;
153 }
154
155 unsigned charsLeft = string.length();
156 while (charsLeft) {
157 // split large text to nodes of manageable size
158 RefPtr<Text> text = Text::createWithLengthLimit(m_doc, string, charsLeft);
159 m_preElement->appendChild(text, ec);
160 }
161 }
162
finish()163 void TextTokenizer::finish()
164 {
165 if (!m_preElement)
166 write(SegmentedString(), true); // Create document structure for an empty text document.
167 m_preElement = 0;
168 fastFree(m_buffer);
169 m_buffer = 0;
170 m_dest = 0;
171
172 m_doc->finishedParsing();
173 }
174
isWaitingForScripts() const175 bool TextTokenizer::isWaitingForScripts() const
176 {
177 // A text document is never waiting for scripts
178 return false;
179 }
180
TextDocument(Frame * frame)181 TextDocument::TextDocument(Frame* frame)
182 : HTMLDocument(frame)
183 {
184 }
185
createTokenizer()186 Tokenizer* TextDocument::createTokenizer()
187 {
188 return new TextTokenizer(this);
189 }
190
createTextTokenizer(HTMLViewSourceDocument * document)191 Tokenizer* createTextTokenizer(HTMLViewSourceDocument* document)
192 {
193 return new TextTokenizer(document);
194 }
195
196 }
197