1 /* 2 * Created by Phil on 19/07/2017. 3 * 4 * Distributed under the Boost Software License, Version 1.0. (See accompanying 5 * file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) 6 */ 7 8 #include "catch_xmlwriter.h" 9 10 #include "catch_enforce.h" 11 12 #include <iomanip> 13 #include <type_traits> 14 15 namespace Catch { 16 17 namespace { 18 trailingBytes(unsigned char c)19 size_t trailingBytes(unsigned char c) { 20 if ((c & 0xE0) == 0xC0) { 21 return 2; 22 } 23 if ((c & 0xF0) == 0xE0) { 24 return 3; 25 } 26 if ((c & 0xF8) == 0xF0) { 27 return 4; 28 } 29 CATCH_INTERNAL_ERROR("Invalid multibyte utf-8 start byte encountered"); 30 } 31 headerValue(unsigned char c)32 uint32_t headerValue(unsigned char c) { 33 if ((c & 0xE0) == 0xC0) { 34 return c & 0x1F; 35 } 36 if ((c & 0xF0) == 0xE0) { 37 return c & 0x0F; 38 } 39 if ((c & 0xF8) == 0xF0) { 40 return c & 0x07; 41 } 42 CATCH_INTERNAL_ERROR("Invalid multibyte utf-8 start byte encountered"); 43 } 44 hexEscapeChar(std::ostream & os,unsigned char c)45 void hexEscapeChar(std::ostream& os, unsigned char c) { 46 std::ios_base::fmtflags f(os.flags()); 47 os << "\\x" 48 << std::uppercase << std::hex << std::setfill('0') << std::setw(2) 49 << static_cast<int>(c); 50 os.flags(f); 51 } 52 shouldNewline(XmlFormatting fmt)53 bool shouldNewline(XmlFormatting fmt) { 54 return !!(static_cast<std::underlying_type<XmlFormatting>::type>(fmt & XmlFormatting::Newline)); 55 } 56 shouldIndent(XmlFormatting fmt)57 bool shouldIndent(XmlFormatting fmt) { 58 return !!(static_cast<std::underlying_type<XmlFormatting>::type>(fmt & XmlFormatting::Indent)); 59 } 60 61 } // anonymous namespace 62 operator |(XmlFormatting lhs,XmlFormatting rhs)63 XmlFormatting operator | (XmlFormatting lhs, XmlFormatting rhs) { 64 return static_cast<XmlFormatting>( 65 static_cast<std::underlying_type<XmlFormatting>::type>(lhs) | 66 static_cast<std::underlying_type<XmlFormatting>::type>(rhs) 67 ); 68 } 69 operator &(XmlFormatting lhs,XmlFormatting rhs)70 XmlFormatting operator & (XmlFormatting lhs, XmlFormatting rhs) { 71 return static_cast<XmlFormatting>( 72 static_cast<std::underlying_type<XmlFormatting>::type>(lhs) & 73 static_cast<std::underlying_type<XmlFormatting>::type>(rhs) 74 ); 75 } 76 77 XmlEncode(std::string const & str,ForWhat forWhat)78 XmlEncode::XmlEncode( std::string const& str, ForWhat forWhat ) 79 : m_str( str ), 80 m_forWhat( forWhat ) 81 {} 82 encodeTo(std::ostream & os) const83 void XmlEncode::encodeTo( std::ostream& os ) const { 84 // Apostrophe escaping not necessary if we always use " to write attributes 85 // (see: http://www.w3.org/TR/xml/#syntax) 86 87 for( std::size_t idx = 0; idx < m_str.size(); ++ idx ) { 88 unsigned char c = m_str[idx]; 89 switch (c) { 90 case '<': os << "<"; break; 91 case '&': os << "&"; break; 92 93 case '>': 94 // See: http://www.w3.org/TR/xml/#syntax 95 if (idx > 2 && m_str[idx - 1] == ']' && m_str[idx - 2] == ']') 96 os << ">"; 97 else 98 os << c; 99 break; 100 101 case '\"': 102 if (m_forWhat == ForAttributes) 103 os << """; 104 else 105 os << c; 106 break; 107 108 default: 109 // Check for control characters and invalid utf-8 110 111 // Escape control characters in standard ascii 112 // see http://stackoverflow.com/questions/404107/why-are-control-characters-illegal-in-xml-1-0 113 if (c < 0x09 || (c > 0x0D && c < 0x20) || c == 0x7F) { 114 hexEscapeChar(os, c); 115 break; 116 } 117 118 // Plain ASCII: Write it to stream 119 if (c < 0x7F) { 120 os << c; 121 break; 122 } 123 124 // UTF-8 territory 125 // Check if the encoding is valid and if it is not, hex escape bytes. 126 // Important: We do not check the exact decoded values for validity, only the encoding format 127 // First check that this bytes is a valid lead byte: 128 // This means that it is not encoded as 1111 1XXX 129 // Or as 10XX XXXX 130 if (c < 0xC0 || 131 c >= 0xF8) { 132 hexEscapeChar(os, c); 133 break; 134 } 135 136 auto encBytes = trailingBytes(c); 137 // Are there enough bytes left to avoid accessing out-of-bounds memory? 138 if (idx + encBytes - 1 >= m_str.size()) { 139 hexEscapeChar(os, c); 140 break; 141 } 142 // The header is valid, check data 143 // The next encBytes bytes must together be a valid utf-8 144 // This means: bitpattern 10XX XXXX and the extracted value is sane (ish) 145 bool valid = true; 146 uint32_t value = headerValue(c); 147 for (std::size_t n = 1; n < encBytes; ++n) { 148 unsigned char nc = m_str[idx + n]; 149 valid &= ((nc & 0xC0) == 0x80); 150 value = (value << 6) | (nc & 0x3F); 151 } 152 153 if ( 154 // Wrong bit pattern of following bytes 155 (!valid) || 156 // Overlong encodings 157 (value < 0x80) || 158 (0x80 <= value && value < 0x800 && encBytes > 2) || 159 (0x800 < value && value < 0x10000 && encBytes > 3) || 160 // Encoded value out of range 161 (value >= 0x110000) 162 ) { 163 hexEscapeChar(os, c); 164 break; 165 } 166 167 // If we got here, this is in fact a valid(ish) utf-8 sequence 168 for (std::size_t n = 0; n < encBytes; ++n) { 169 os << m_str[idx + n]; 170 } 171 idx += encBytes - 1; 172 break; 173 } 174 } 175 } 176 operator <<(std::ostream & os,XmlEncode const & xmlEncode)177 std::ostream& operator << ( std::ostream& os, XmlEncode const& xmlEncode ) { 178 xmlEncode.encodeTo( os ); 179 return os; 180 } 181 ScopedElement(XmlWriter * writer,XmlFormatting fmt)182 XmlWriter::ScopedElement::ScopedElement( XmlWriter* writer, XmlFormatting fmt ) 183 : m_writer( writer ), 184 m_fmt(fmt) 185 {} 186 ScopedElement(ScopedElement && other)187 XmlWriter::ScopedElement::ScopedElement( ScopedElement&& other ) noexcept 188 : m_writer( other.m_writer ), 189 m_fmt(other.m_fmt) 190 { 191 other.m_writer = nullptr; 192 other.m_fmt = XmlFormatting::None; 193 } operator =(ScopedElement && other)194 XmlWriter::ScopedElement& XmlWriter::ScopedElement::operator=( ScopedElement&& other ) noexcept { 195 if ( m_writer ) { 196 m_writer->endElement(); 197 } 198 m_writer = other.m_writer; 199 other.m_writer = nullptr; 200 m_fmt = other.m_fmt; 201 other.m_fmt = XmlFormatting::None; 202 return *this; 203 } 204 205 ~ScopedElement()206 XmlWriter::ScopedElement::~ScopedElement() { 207 if (m_writer) { 208 m_writer->endElement(m_fmt); 209 } 210 } 211 writeText(std::string const & text,XmlFormatting fmt)212 XmlWriter::ScopedElement& XmlWriter::ScopedElement::writeText( std::string const& text, XmlFormatting fmt ) { 213 m_writer->writeText( text, fmt ); 214 return *this; 215 } 216 XmlWriter(std::ostream & os)217 XmlWriter::XmlWriter( std::ostream& os ) : m_os( os ) 218 { 219 writeDeclaration(); 220 } 221 ~XmlWriter()222 XmlWriter::~XmlWriter() { 223 while (!m_tags.empty()) { 224 endElement(); 225 } 226 newlineIfNecessary(); 227 } 228 startElement(std::string const & name,XmlFormatting fmt)229 XmlWriter& XmlWriter::startElement( std::string const& name, XmlFormatting fmt ) { 230 ensureTagClosed(); 231 newlineIfNecessary(); 232 if (shouldIndent(fmt)) { 233 m_os << m_indent; 234 m_indent += " "; 235 } 236 m_os << '<' << name; 237 m_tags.push_back( name ); 238 m_tagIsOpen = true; 239 applyFormatting(fmt); 240 return *this; 241 } 242 scopedElement(std::string const & name,XmlFormatting fmt)243 XmlWriter::ScopedElement XmlWriter::scopedElement( std::string const& name, XmlFormatting fmt ) { 244 ScopedElement scoped( this, fmt ); 245 startElement( name, fmt ); 246 return scoped; 247 } 248 endElement(XmlFormatting fmt)249 XmlWriter& XmlWriter::endElement(XmlFormatting fmt) { 250 m_indent = m_indent.substr(0, m_indent.size() - 2); 251 252 if( m_tagIsOpen ) { 253 m_os << "/>"; 254 m_tagIsOpen = false; 255 } else { 256 newlineIfNecessary(); 257 if (shouldIndent(fmt)) { 258 m_os << m_indent; 259 } 260 m_os << "</" << m_tags.back() << ">"; 261 } 262 m_os << std::flush; 263 applyFormatting(fmt); 264 m_tags.pop_back(); 265 return *this; 266 } 267 writeAttribute(std::string const & name,std::string const & attribute)268 XmlWriter& XmlWriter::writeAttribute( std::string const& name, std::string const& attribute ) { 269 if( !name.empty() && !attribute.empty() ) 270 m_os << ' ' << name << "=\"" << XmlEncode( attribute, XmlEncode::ForAttributes ) << '"'; 271 return *this; 272 } 273 writeAttribute(std::string const & name,bool attribute)274 XmlWriter& XmlWriter::writeAttribute( std::string const& name, bool attribute ) { 275 m_os << ' ' << name << "=\"" << ( attribute ? "true" : "false" ) << '"'; 276 return *this; 277 } 278 writeText(std::string const & text,XmlFormatting fmt)279 XmlWriter& XmlWriter::writeText( std::string const& text, XmlFormatting fmt) { 280 if( !text.empty() ){ 281 bool tagWasOpen = m_tagIsOpen; 282 ensureTagClosed(); 283 if (tagWasOpen && shouldIndent(fmt)) { 284 m_os << m_indent; 285 } 286 m_os << XmlEncode( text ); 287 applyFormatting(fmt); 288 } 289 return *this; 290 } 291 writeComment(std::string const & text,XmlFormatting fmt)292 XmlWriter& XmlWriter::writeComment( std::string const& text, XmlFormatting fmt) { 293 ensureTagClosed(); 294 if (shouldIndent(fmt)) { 295 m_os << m_indent; 296 } 297 m_os << "<!--" << text << "-->"; 298 applyFormatting(fmt); 299 return *this; 300 } 301 writeStylesheetRef(std::string const & url)302 void XmlWriter::writeStylesheetRef( std::string const& url ) { 303 m_os << "<?xml-stylesheet type=\"text/xsl\" href=\"" << url << "\"?>\n"; 304 } 305 writeBlankLine()306 XmlWriter& XmlWriter::writeBlankLine() { 307 ensureTagClosed(); 308 m_os << '\n'; 309 return *this; 310 } 311 ensureTagClosed()312 void XmlWriter::ensureTagClosed() { 313 if( m_tagIsOpen ) { 314 m_os << '>' << std::flush; 315 newlineIfNecessary(); 316 m_tagIsOpen = false; 317 } 318 } 319 applyFormatting(XmlFormatting fmt)320 void XmlWriter::applyFormatting(XmlFormatting fmt) { 321 m_needsNewline = shouldNewline(fmt); 322 } 323 writeDeclaration()324 void XmlWriter::writeDeclaration() { 325 m_os << "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"; 326 } 327 newlineIfNecessary()328 void XmlWriter::newlineIfNecessary() { 329 if( m_needsNewline ) { 330 m_os << std::endl; 331 m_needsNewline = false; 332 } 333 } 334 } 335