1 /*
2 * Copyright (C) 2006, 2007, 2008 Apple Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
20 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
21 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
22 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23 */
24
25 #include "config.h"
26 #include "TextDocument.h"
27
28 #include "Element.h"
29 #include "HTMLNames.h"
30 #include "HTMLViewSourceDocument.h"
31 #include "SegmentedString.h"
32 #include "Text.h"
33 #include "XMLTokenizer.h"
34
35 using namespace std;
36
37 namespace WebCore {
38
39 using namespace HTMLNames;
40
41 class TextTokenizer : public Tokenizer {
42 public:
43 TextTokenizer(Document*);
44 TextTokenizer(HTMLViewSourceDocument*);
45
46 virtual bool write(const SegmentedString&, bool appendData);
47 virtual void finish();
48 virtual bool isWaitingForScripts() const;
49
checkBuffer(int len=10)50 inline void checkBuffer(int len = 10)
51 {
52 if ((m_dest - m_buffer) > m_size - len) {
53 // Enlarge buffer
54 int newSize = std::max(m_size * 2, m_size + len);
55 int oldOffset = m_dest - m_buffer;
56 m_buffer = static_cast<UChar*>(fastRealloc(m_buffer, newSize * sizeof(UChar)));
57 m_dest = m_buffer + oldOffset;
58 m_size = newSize;
59 }
60 }
61
62 private:
63 Document* m_doc;
64 Element* m_preElement;
65
66 bool m_skipLF;
67
68 int m_size;
69 UChar* m_buffer;
70 UChar* m_dest;
71 };
72
TextTokenizer(Document * doc)73 TextTokenizer::TextTokenizer(Document* doc)
74 : m_doc(doc)
75 , m_preElement(0)
76 , m_skipLF(false)
77 {
78 // Allocate buffer
79 m_size = 254;
80 m_buffer = static_cast<UChar*>(fastMalloc(sizeof(UChar) * m_size));
81 m_dest = m_buffer;
82 }
83
TextTokenizer(HTMLViewSourceDocument * doc)84 TextTokenizer::TextTokenizer(HTMLViewSourceDocument* doc)
85 : Tokenizer(true)
86 , m_doc(doc)
87 , m_preElement(0)
88 , m_skipLF(false)
89 {
90 // Allocate buffer
91 m_size = 254;
92 m_buffer = static_cast<UChar*>(fastMalloc(sizeof(UChar) * m_size));
93 m_dest = m_buffer;
94 }
95
write(const SegmentedString & s,bool)96 bool TextTokenizer::write(const SegmentedString& s, bool)
97 {
98 ExceptionCode ec;
99
100 m_dest = m_buffer;
101
102 SegmentedString str = s;
103 while (!str.isEmpty()) {
104 UChar c = *str;
105
106 if (c == '\r') {
107 *m_dest++ = '\n';
108
109 // possibly skip an LF in the case of an CRLF sequence
110 m_skipLF = true;
111 } else if (c == '\n') {
112 if (!m_skipLF)
113 *m_dest++ = c;
114 else
115 m_skipLF = false;
116 } else {
117 *m_dest++ = c;
118 m_skipLF = false;
119 }
120
121 str.advance();
122
123 // Maybe enlarge the buffer
124 checkBuffer();
125 }
126
127 if (!m_preElement && !inViewSourceMode()) {
128 RefPtr<Element> rootElement = m_doc->createElementNS(xhtmlNamespaceURI, "html", ec);
129 m_doc->appendChild(rootElement, ec);
130
131 RefPtr<Element> body = m_doc->createElementNS(xhtmlNamespaceURI, "body", ec);
132 rootElement->appendChild(body, ec);
133
134 RefPtr<Element> preElement = m_doc->createElementNS(xhtmlNamespaceURI, "pre", ec);
135 preElement->setAttribute("style", "word-wrap: break-word; white-space: pre-wrap;", ec);
136
137 body->appendChild(preElement, ec);
138
139 m_preElement = preElement.get();
140 }
141
142 String string = String(m_buffer, m_dest - m_buffer);
143 if (inViewSourceMode()) {
144 static_cast<HTMLViewSourceDocument*>(m_doc)->addViewSourceText(string);
145 return false;
146 }
147
148 unsigned charsLeft = string.length();
149 while (charsLeft) {
150 // split large text to nodes of manageable size
151 RefPtr<Text> text = Text::createWithLengthLimit(m_doc, string, charsLeft);
152 m_preElement->appendChild(text, ec);
153 }
154
155 return false;
156 }
157
finish()158 void TextTokenizer::finish()
159 {
160 m_preElement = 0;
161 fastFree(m_buffer);
162
163 m_doc->finishedParsing();
164 }
165
isWaitingForScripts() const166 bool TextTokenizer::isWaitingForScripts() const
167 {
168 // A text document is never waiting for scripts
169 return false;
170 }
171
TextDocument(Frame * frame)172 TextDocument::TextDocument(Frame* frame)
173 : HTMLDocument(frame)
174 {
175 }
176
createTokenizer()177 Tokenizer* TextDocument::createTokenizer()
178 {
179 return new TextTokenizer(this);
180 }
181
createTextTokenizer(HTMLViewSourceDocument * document)182 Tokenizer* createTextTokenizer(HTMLViewSourceDocument* document)
183 {
184 return new TextTokenizer(document);
185 }
186
187 }
188