1 /* 2 * Created by Phil on 19/07/2017. 3 * 4 * Distributed under the Boost Software License, Version 1.0. (See accompanying 5 * file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) 6 */ 7 8 #include "catch_xmlwriter.h" 9 10 #include "catch_enforce.h" 11 12 #include <iomanip> 13 14 using uchar = unsigned char; 15 16 namespace Catch { 17 18 namespace { 19 trailingBytes(unsigned char c)20 size_t trailingBytes(unsigned char c) { 21 if ((c & 0xE0) == 0xC0) { 22 return 2; 23 } 24 if ((c & 0xF0) == 0xE0) { 25 return 3; 26 } 27 if ((c & 0xF8) == 0xF0) { 28 return 4; 29 } 30 CATCH_INTERNAL_ERROR("Invalid multibyte utf-8 start byte encountered"); 31 } 32 headerValue(unsigned char c)33 uint32_t headerValue(unsigned char c) { 34 if ((c & 0xE0) == 0xC0) { 35 return c & 0x1F; 36 } 37 if ((c & 0xF0) == 0xE0) { 38 return c & 0x0F; 39 } 40 if ((c & 0xF8) == 0xF0) { 41 return c & 0x07; 42 } 43 CATCH_INTERNAL_ERROR("Invalid multibyte utf-8 start byte encountered"); 44 } 45 hexEscapeChar(std::ostream & os,unsigned char c)46 void hexEscapeChar(std::ostream& os, unsigned char c) { 47 std::ios_base::fmtflags f(os.flags()); 48 os << "\\x" 49 << std::uppercase << std::hex << std::setfill('0') << std::setw(2) 50 << static_cast<int>(c); 51 os.flags(f); 52 } 53 54 } // anonymous namespace 55 XmlEncode(std::string const & str,ForWhat forWhat)56 XmlEncode::XmlEncode( std::string const& str, ForWhat forWhat ) 57 : m_str( str ), 58 m_forWhat( forWhat ) 59 {} 60 encodeTo(std::ostream & os) const61 void XmlEncode::encodeTo( std::ostream& os ) const { 62 // Apostrophe escaping not necessary if we always use " to write attributes 63 // (see: http://www.w3.org/TR/xml/#syntax) 64 65 for( std::size_t idx = 0; idx < m_str.size(); ++ idx ) { 66 uchar c = m_str[idx]; 67 switch (c) { 68 case '<': os << "<"; break; 69 case '&': os << "&"; break; 70 71 case '>': 72 // See: http://www.w3.org/TR/xml/#syntax 73 if (idx > 2 && m_str[idx - 1] == ']' && m_str[idx - 2] == ']') 74 os << ">"; 75 else 76 os << c; 77 break; 78 79 case '\"': 80 if (m_forWhat == ForAttributes) 81 os << """; 82 else 83 os << c; 84 break; 85 86 default: 87 // Check for control characters and invalid utf-8 88 89 // Escape control characters in standard ascii 90 // see http://stackoverflow.com/questions/404107/why-are-control-characters-illegal-in-xml-1-0 91 if (c < 0x09 || (c > 0x0D && c < 0x20) || c == 0x7F) { 92 hexEscapeChar(os, c); 93 break; 94 } 95 96 // Plain ASCII: Write it to stream 97 if (c < 0x7F) { 98 os << c; 99 break; 100 } 101 102 // UTF-8 territory 103 // Check if the encoding is valid and if it is not, hex escape bytes. 104 // Important: We do not check the exact decoded values for validity, only the encoding format 105 // First check that this bytes is a valid lead byte: 106 // This means that it is not encoded as 1111 1XXX 107 // Or as 10XX XXXX 108 if (c < 0xC0 || 109 c >= 0xF8) { 110 hexEscapeChar(os, c); 111 break; 112 } 113 114 auto encBytes = trailingBytes(c); 115 // Are there enough bytes left to avoid accessing out-of-bounds memory? 116 if (idx + encBytes - 1 >= m_str.size()) { 117 hexEscapeChar(os, c); 118 break; 119 } 120 // The header is valid, check data 121 // The next encBytes bytes must together be a valid utf-8 122 // This means: bitpattern 10XX XXXX and the extracted value is sane (ish) 123 bool valid = true; 124 uint32_t value = headerValue(c); 125 for (std::size_t n = 1; n < encBytes; ++n) { 126 uchar nc = m_str[idx + n]; 127 valid &= ((nc & 0xC0) == 0x80); 128 value = (value << 6) | (nc & 0x3F); 129 } 130 131 if ( 132 // Wrong bit pattern of following bytes 133 (!valid) || 134 // Overlong encodings 135 (value < 0x80) || 136 (0x80 <= value && value < 0x800 && encBytes > 2) || 137 (0x800 < value && value < 0x10000 && encBytes > 3) || 138 // Encoded value out of range 139 (value >= 0x110000) 140 ) { 141 hexEscapeChar(os, c); 142 break; 143 } 144 145 // If we got here, this is in fact a valid(ish) utf-8 sequence 146 for (std::size_t n = 0; n < encBytes; ++n) { 147 os << m_str[idx + n]; 148 } 149 idx += encBytes - 1; 150 break; 151 } 152 } 153 } 154 operator <<(std::ostream & os,XmlEncode const & xmlEncode)155 std::ostream& operator << ( std::ostream& os, XmlEncode const& xmlEncode ) { 156 xmlEncode.encodeTo( os ); 157 return os; 158 } 159 ScopedElement(XmlWriter * writer)160 XmlWriter::ScopedElement::ScopedElement( XmlWriter* writer ) 161 : m_writer( writer ) 162 {} 163 ScopedElement(ScopedElement && other)164 XmlWriter::ScopedElement::ScopedElement( ScopedElement&& other ) noexcept 165 : m_writer( other.m_writer ){ 166 other.m_writer = nullptr; 167 } operator =(ScopedElement && other)168 XmlWriter::ScopedElement& XmlWriter::ScopedElement::operator=( ScopedElement&& other ) noexcept { 169 if ( m_writer ) { 170 m_writer->endElement(); 171 } 172 m_writer = other.m_writer; 173 other.m_writer = nullptr; 174 return *this; 175 } 176 177 ~ScopedElement()178 XmlWriter::ScopedElement::~ScopedElement() { 179 if( m_writer ) 180 m_writer->endElement(); 181 } 182 writeText(std::string const & text,bool indent)183 XmlWriter::ScopedElement& XmlWriter::ScopedElement::writeText( std::string const& text, bool indent ) { 184 m_writer->writeText( text, indent ); 185 return *this; 186 } 187 XmlWriter(std::ostream & os)188 XmlWriter::XmlWriter( std::ostream& os ) : m_os( os ) 189 { 190 writeDeclaration(); 191 } 192 ~XmlWriter()193 XmlWriter::~XmlWriter() { 194 while( !m_tags.empty() ) 195 endElement(); 196 } 197 startElement(std::string const & name)198 XmlWriter& XmlWriter::startElement( std::string const& name ) { 199 ensureTagClosed(); 200 newlineIfNecessary(); 201 m_os << m_indent << '<' << name; 202 m_tags.push_back( name ); 203 m_indent += " "; 204 m_tagIsOpen = true; 205 return *this; 206 } 207 scopedElement(std::string const & name)208 XmlWriter::ScopedElement XmlWriter::scopedElement( std::string const& name ) { 209 ScopedElement scoped( this ); 210 startElement( name ); 211 return scoped; 212 } 213 endElement()214 XmlWriter& XmlWriter::endElement() { 215 newlineIfNecessary(); 216 m_indent = m_indent.substr( 0, m_indent.size()-2 ); 217 if( m_tagIsOpen ) { 218 m_os << "/>"; 219 m_tagIsOpen = false; 220 } 221 else { 222 m_os << m_indent << "</" << m_tags.back() << ">"; 223 } 224 m_os << std::endl; 225 m_tags.pop_back(); 226 return *this; 227 } 228 writeAttribute(std::string const & name,std::string const & attribute)229 XmlWriter& XmlWriter::writeAttribute( std::string const& name, std::string const& attribute ) { 230 if( !name.empty() && !attribute.empty() ) 231 m_os << ' ' << name << "=\"" << XmlEncode( attribute, XmlEncode::ForAttributes ) << '"'; 232 return *this; 233 } 234 writeAttribute(std::string const & name,bool attribute)235 XmlWriter& XmlWriter::writeAttribute( std::string const& name, bool attribute ) { 236 m_os << ' ' << name << "=\"" << ( attribute ? "true" : "false" ) << '"'; 237 return *this; 238 } 239 writeText(std::string const & text,bool indent)240 XmlWriter& XmlWriter::writeText( std::string const& text, bool indent ) { 241 if( !text.empty() ){ 242 bool tagWasOpen = m_tagIsOpen; 243 ensureTagClosed(); 244 if( tagWasOpen && indent ) 245 m_os << m_indent; 246 m_os << XmlEncode( text ); 247 m_needsNewline = true; 248 } 249 return *this; 250 } 251 writeComment(std::string const & text)252 XmlWriter& XmlWriter::writeComment( std::string const& text ) { 253 ensureTagClosed(); 254 m_os << m_indent << "<!--" << text << "-->"; 255 m_needsNewline = true; 256 return *this; 257 } 258 writeStylesheetRef(std::string const & url)259 void XmlWriter::writeStylesheetRef( std::string const& url ) { 260 m_os << "<?xml-stylesheet type=\"text/xsl\" href=\"" << url << "\"?>\n"; 261 } 262 writeBlankLine()263 XmlWriter& XmlWriter::writeBlankLine() { 264 ensureTagClosed(); 265 m_os << '\n'; 266 return *this; 267 } 268 ensureTagClosed()269 void XmlWriter::ensureTagClosed() { 270 if( m_tagIsOpen ) { 271 m_os << ">" << std::endl; 272 m_tagIsOpen = false; 273 } 274 } 275 writeDeclaration()276 void XmlWriter::writeDeclaration() { 277 m_os << "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"; 278 } 279 newlineIfNecessary()280 void XmlWriter::newlineIfNecessary() { 281 if( m_needsNewline ) { 282 m_os << std::endl; 283 m_needsNewline = false; 284 } 285 } 286 } 287