• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *  Created by Phil on 19/07/2017.
3  *
4  *  Distributed under the Boost Software License, Version 1.0. (See accompanying
5  *  file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
6  */
7 
8 #include "catch_xmlwriter.h"
9 
10 #include "catch_enforce.h"
11 
12 #include <iomanip>
13 
14 using uchar = unsigned char;
15 
16 namespace Catch {
17 
18 namespace {
19 
trailingBytes(unsigned char c)20     size_t trailingBytes(unsigned char c) {
21         if ((c & 0xE0) == 0xC0) {
22             return 2;
23         }
24         if ((c & 0xF0) == 0xE0) {
25             return 3;
26         }
27         if ((c & 0xF8) == 0xF0) {
28             return 4;
29         }
30         CATCH_INTERNAL_ERROR("Invalid multibyte utf-8 start byte encountered");
31     }
32 
headerValue(unsigned char c)33     uint32_t headerValue(unsigned char c) {
34         if ((c & 0xE0) == 0xC0) {
35             return c & 0x1F;
36         }
37         if ((c & 0xF0) == 0xE0) {
38             return c & 0x0F;
39         }
40         if ((c & 0xF8) == 0xF0) {
41             return c & 0x07;
42         }
43         CATCH_INTERNAL_ERROR("Invalid multibyte utf-8 start byte encountered");
44     }
45 
hexEscapeChar(std::ostream & os,unsigned char c)46     void hexEscapeChar(std::ostream& os, unsigned char c) {
47         std::ios_base::fmtflags f(os.flags());
48         os << "\\x"
49             << std::uppercase << std::hex << std::setfill('0') << std::setw(2)
50             << static_cast<int>(c);
51         os.flags(f);
52     }
53 
54 } // anonymous namespace
55 
XmlEncode(std::string const & str,ForWhat forWhat)56     XmlEncode::XmlEncode( std::string const& str, ForWhat forWhat )
57     :   m_str( str ),
58         m_forWhat( forWhat )
59     {}
60 
encodeTo(std::ostream & os) const61     void XmlEncode::encodeTo( std::ostream& os ) const {
62         // Apostrophe escaping not necessary if we always use " to write attributes
63         // (see: http://www.w3.org/TR/xml/#syntax)
64 
65         for( std::size_t idx = 0; idx < m_str.size(); ++ idx ) {
66             uchar c = m_str[idx];
67             switch (c) {
68             case '<':   os << "&lt;"; break;
69             case '&':   os << "&amp;"; break;
70 
71             case '>':
72                 // See: http://www.w3.org/TR/xml/#syntax
73                 if (idx > 2 && m_str[idx - 1] == ']' && m_str[idx - 2] == ']')
74                     os << "&gt;";
75                 else
76                     os << c;
77                 break;
78 
79             case '\"':
80                 if (m_forWhat == ForAttributes)
81                     os << "&quot;";
82                 else
83                     os << c;
84                 break;
85 
86             default:
87                 // Check for control characters and invalid utf-8
88 
89                 // Escape control characters in standard ascii
90                 // see http://stackoverflow.com/questions/404107/why-are-control-characters-illegal-in-xml-1-0
91                 if (c < 0x09 || (c > 0x0D && c < 0x20) || c == 0x7F) {
92                     hexEscapeChar(os, c);
93                     break;
94                 }
95 
96                 // Plain ASCII: Write it to stream
97                 if (c < 0x7F) {
98                     os << c;
99                     break;
100                 }
101 
102                 // UTF-8 territory
103                 // Check if the encoding is valid and if it is not, hex escape bytes.
104                 // Important: We do not check the exact decoded values for validity, only the encoding format
105                 // First check that this bytes is a valid lead byte:
106                 // This means that it is not encoded as 1111 1XXX
107                 // Or as 10XX XXXX
108                 if (c <  0xC0 ||
109                     c >= 0xF8) {
110                     hexEscapeChar(os, c);
111                     break;
112                 }
113 
114                 auto encBytes = trailingBytes(c);
115                 // Are there enough bytes left to avoid accessing out-of-bounds memory?
116                 if (idx + encBytes - 1 >= m_str.size()) {
117                     hexEscapeChar(os, c);
118                     break;
119                 }
120                 // The header is valid, check data
121                 // The next encBytes bytes must together be a valid utf-8
122                 // This means: bitpattern 10XX XXXX and the extracted value is sane (ish)
123                 bool valid = true;
124                 uint32_t value = headerValue(c);
125                 for (std::size_t n = 1; n < encBytes; ++n) {
126                     uchar nc = m_str[idx + n];
127                     valid &= ((nc & 0xC0) == 0x80);
128                     value = (value << 6) | (nc & 0x3F);
129                 }
130 
131                 if (
132                     // Wrong bit pattern of following bytes
133                     (!valid) ||
134                     // Overlong encodings
135                     (value < 0x80) ||
136                     (0x80 <= value && value < 0x800   && encBytes > 2) ||
137                     (0x800 < value && value < 0x10000 && encBytes > 3) ||
138                     // Encoded value out of range
139                     (value >= 0x110000)
140                     ) {
141                     hexEscapeChar(os, c);
142                     break;
143                 }
144 
145                 // If we got here, this is in fact a valid(ish) utf-8 sequence
146                 for (std::size_t n = 0; n < encBytes; ++n) {
147                     os << m_str[idx + n];
148                 }
149                 idx += encBytes - 1;
150                 break;
151             }
152         }
153     }
154 
operator <<(std::ostream & os,XmlEncode const & xmlEncode)155     std::ostream& operator << ( std::ostream& os, XmlEncode const& xmlEncode ) {
156         xmlEncode.encodeTo( os );
157         return os;
158     }
159 
ScopedElement(XmlWriter * writer)160     XmlWriter::ScopedElement::ScopedElement( XmlWriter* writer )
161     :   m_writer( writer )
162     {}
163 
ScopedElement(ScopedElement && other)164     XmlWriter::ScopedElement::ScopedElement( ScopedElement&& other ) noexcept
165     :   m_writer( other.m_writer ){
166         other.m_writer = nullptr;
167     }
operator =(ScopedElement && other)168     XmlWriter::ScopedElement& XmlWriter::ScopedElement::operator=( ScopedElement&& other ) noexcept {
169         if ( m_writer ) {
170             m_writer->endElement();
171         }
172         m_writer = other.m_writer;
173         other.m_writer = nullptr;
174         return *this;
175     }
176 
177 
~ScopedElement()178     XmlWriter::ScopedElement::~ScopedElement() {
179         if( m_writer )
180             m_writer->endElement();
181     }
182 
writeText(std::string const & text,bool indent)183     XmlWriter::ScopedElement& XmlWriter::ScopedElement::writeText( std::string const& text, bool indent ) {
184         m_writer->writeText( text, indent );
185         return *this;
186     }
187 
XmlWriter(std::ostream & os)188     XmlWriter::XmlWriter( std::ostream& os ) : m_os( os )
189     {
190         writeDeclaration();
191     }
192 
~XmlWriter()193     XmlWriter::~XmlWriter() {
194         while( !m_tags.empty() )
195             endElement();
196     }
197 
startElement(std::string const & name)198     XmlWriter& XmlWriter::startElement( std::string const& name ) {
199         ensureTagClosed();
200         newlineIfNecessary();
201         m_os << m_indent << '<' << name;
202         m_tags.push_back( name );
203         m_indent += "  ";
204         m_tagIsOpen = true;
205         return *this;
206     }
207 
scopedElement(std::string const & name)208     XmlWriter::ScopedElement XmlWriter::scopedElement( std::string const& name ) {
209         ScopedElement scoped( this );
210         startElement( name );
211         return scoped;
212     }
213 
endElement()214     XmlWriter& XmlWriter::endElement() {
215         newlineIfNecessary();
216         m_indent = m_indent.substr( 0, m_indent.size()-2 );
217         if( m_tagIsOpen ) {
218             m_os << "/>";
219             m_tagIsOpen = false;
220         }
221         else {
222             m_os << m_indent << "</" << m_tags.back() << ">";
223         }
224         m_os << std::endl;
225         m_tags.pop_back();
226         return *this;
227     }
228 
writeAttribute(std::string const & name,std::string const & attribute)229     XmlWriter& XmlWriter::writeAttribute( std::string const& name, std::string const& attribute ) {
230         if( !name.empty() && !attribute.empty() )
231             m_os << ' ' << name << "=\"" << XmlEncode( attribute, XmlEncode::ForAttributes ) << '"';
232         return *this;
233     }
234 
writeAttribute(std::string const & name,bool attribute)235     XmlWriter& XmlWriter::writeAttribute( std::string const& name, bool attribute ) {
236         m_os << ' ' << name << "=\"" << ( attribute ? "true" : "false" ) << '"';
237         return *this;
238     }
239 
writeText(std::string const & text,bool indent)240     XmlWriter& XmlWriter::writeText( std::string const& text, bool indent ) {
241         if( !text.empty() ){
242             bool tagWasOpen = m_tagIsOpen;
243             ensureTagClosed();
244             if( tagWasOpen && indent )
245                 m_os << m_indent;
246             m_os << XmlEncode( text );
247             m_needsNewline = true;
248         }
249         return *this;
250     }
251 
writeComment(std::string const & text)252     XmlWriter& XmlWriter::writeComment( std::string const& text ) {
253         ensureTagClosed();
254         m_os << m_indent << "<!--" << text << "-->";
255         m_needsNewline = true;
256         return *this;
257     }
258 
writeStylesheetRef(std::string const & url)259     void XmlWriter::writeStylesheetRef( std::string const& url ) {
260         m_os << "<?xml-stylesheet type=\"text/xsl\" href=\"" << url << "\"?>\n";
261     }
262 
writeBlankLine()263     XmlWriter& XmlWriter::writeBlankLine() {
264         ensureTagClosed();
265         m_os << '\n';
266         return *this;
267     }
268 
ensureTagClosed()269     void XmlWriter::ensureTagClosed() {
270         if( m_tagIsOpen ) {
271             m_os << ">" << std::endl;
272             m_tagIsOpen = false;
273         }
274     }
275 
writeDeclaration()276     void XmlWriter::writeDeclaration() {
277         m_os << "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n";
278     }
279 
newlineIfNecessary()280     void XmlWriter::newlineIfNecessary() {
281         if( m_needsNewline ) {
282             m_os << std::endl;
283             m_needsNewline = false;
284         }
285     }
286 }
287