1 /*============================================================================= 2 Copyright (c) 2002 2004 2006 Joel de Guzman 3 Copyright (c) 2004 Eric Niebler 4 http://spirit.sourceforge.net/ 5 6 Use, modification and distribution is subject to the Boost Software 7 License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at 8 http://www.boost.org/LICENSE_1_0.txt) 9 =============================================================================*/ 10 #include "files.hpp" 11 #include <fstream> 12 #include <iterator> 13 #include <vector> 14 #include <boost/filesystem/fstream.hpp> 15 #include <boost/range/algorithm/transform.hpp> 16 #include <boost/range/algorithm/upper_bound.hpp> 17 #include <boost/unordered_map.hpp> 18 #include "for.hpp" 19 20 namespace quickbook 21 { 22 namespace 23 { 24 boost::unordered_map<fs::path, file_ptr> files; 25 } 26 27 // Read the first few bytes in a file to see it starts with a byte order 28 // mark. If it doesn't, then write the characters we've already read in. 29 // Although, given how UTF-8 works, if we've read anything in, the files 30 // probably broken. 31 32 template <typename InputIterator, typename OutputIterator> check_bom(InputIterator & begin,InputIterator end,OutputIterator out,char const * chars,int length)33 bool check_bom( 34 InputIterator& begin, 35 InputIterator end, 36 OutputIterator out, 37 char const* chars, 38 int length) 39 { 40 char const* ptr = chars; 41 42 while (begin != end && *begin == *ptr) { 43 ++begin; 44 ++ptr; 45 --length; 46 if (length == 0) return true; 47 } 48 49 // Failed to match, so write the skipped characters to storage: 50 while (chars != ptr) 51 *out++ = *chars++; 52 53 return false; 54 } 55 56 template <typename InputIterator, typename OutputIterator> read_bom(InputIterator & begin,InputIterator end,OutputIterator out)57 std::string read_bom( 58 InputIterator& begin, InputIterator end, OutputIterator out) 59 { 60 if (begin == end) return ""; 61 62 const char* utf8 = "\xef\xbb\xbf"; 63 const char* utf32be = "\0\0\xfe\xff"; 64 const char* utf32le = "\xff\xfe\0\0"; 65 66 unsigned char c = *begin; 67 switch (c) { 68 case 0xEF: { // UTF-8 69 return check_bom(begin, end, out, utf8, 3) ? "UTF-8" : ""; 70 } 71 case 0xFF: // UTF-16/UTF-32 little endian 72 return !check_bom(begin, end, out, utf32le, 2) 73 ? "" 74 : check_bom(begin, end, out, utf32le + 2, 2) ? "UTF-32" 75 : "UTF-16"; 76 case 0: // UTF-32 big endian 77 return check_bom(begin, end, out, utf32be, 4) ? "UTF-32" : ""; 78 case 0xFE: // UTF-16 big endian 79 return check_bom(begin, end, out, utf32be + 2, 2) ? "UTF-16" : ""; 80 default: 81 return ""; 82 } 83 } 84 85 // Copy a string, converting mac and windows style newlines to unix 86 // newlines. 87 88 template <typename InputIterator, typename OutputIterator> normalize(InputIterator begin,InputIterator end,OutputIterator out)89 void normalize(InputIterator begin, InputIterator end, OutputIterator out) 90 { 91 std::string encoding = read_bom(begin, end, out); 92 93 if (encoding != "UTF-8" && encoding != "") 94 throw load_error(encoding + " is not supported. Please use UTF-8."); 95 96 while (begin != end) { 97 if (*begin == '\r') { 98 *out++ = '\n'; 99 ++begin; 100 if (begin != end && *begin == '\n') ++begin; 101 } 102 else { 103 *out++ = *begin++; 104 } 105 } 106 } 107 load(fs::path const & filename,unsigned qbk_version)108 file_ptr load(fs::path const& filename, unsigned qbk_version) 109 { 110 boost::unordered_map<fs::path, file_ptr>::iterator pos = 111 files.find(filename); 112 113 if (pos == files.end()) { 114 fs::ifstream in(filename, std::ios_base::in); 115 116 if (!in) throw load_error("Could not open input file."); 117 118 // Turn off white space skipping on the stream 119 in.unsetf(std::ios::skipws); 120 121 std::string source; 122 normalize( 123 std::istream_iterator<char>(in), std::istream_iterator<char>(), 124 std::back_inserter(source)); 125 126 if (in.bad()) throw load_error("Error reading input file."); 127 128 bool inserted; 129 130 boost::tie(pos, inserted) = files.emplace( 131 filename, new file(filename, source, qbk_version)); 132 133 assert(inserted); 134 } 135 136 return pos->second; 137 } 138 operator <<(std::ostream & out,file_position const & x)139 std::ostream& operator<<(std::ostream& out, file_position const& x) 140 { 141 return out << "line: " << x.line << ", column: " << x.column; 142 } 143 relative_position(string_iterator begin,string_iterator iterator)144 file_position relative_position( 145 string_iterator begin, string_iterator iterator) 146 { 147 file_position pos; 148 string_iterator line_begin = begin; 149 150 while (begin != iterator) { 151 if (*begin == '\r') { 152 ++begin; 153 ++pos.line; 154 line_begin = begin; 155 } 156 else if (*begin == '\n') { 157 ++begin; 158 ++pos.line; 159 line_begin = begin; 160 if (begin == iterator) break; 161 if (*begin == '\r') { 162 ++begin; 163 line_begin = begin; 164 } 165 } 166 else { 167 ++begin; 168 } 169 } 170 171 pos.column = iterator - line_begin + 1; 172 return pos; 173 } 174 position_of(string_iterator iterator) const175 file_position file::position_of(string_iterator iterator) const 176 { 177 return relative_position(source().begin(), iterator); 178 } 179 180 // Mapped files. 181 182 struct mapped_file_section 183 { 184 enum section_types 185 { 186 normal, 187 empty, 188 indented 189 }; 190 191 std::string::size_type original_pos; 192 std::string::size_type our_pos; 193 section_types section_type; 194 mapped_file_sectionquickbook::mapped_file_section195 explicit mapped_file_section( 196 std::string::size_type original_pos_, 197 std::string::size_type our_pos_, 198 section_types section_type_ = normal) 199 : original_pos(original_pos_) 200 , our_pos(our_pos_) 201 , section_type(section_type_) 202 { 203 } 204 }; 205 206 struct mapped_section_original_cmp 207 { operator ()quickbook::mapped_section_original_cmp208 bool operator()( 209 mapped_file_section const& x, mapped_file_section const& y) 210 { 211 return x.original_pos < y.original_pos; 212 } 213 operator ()quickbook::mapped_section_original_cmp214 bool operator()( 215 mapped_file_section const& x, std::string::size_type const& y) 216 { 217 return x.original_pos < y; 218 } 219 operator ()quickbook::mapped_section_original_cmp220 bool operator()( 221 std::string::size_type const& x, mapped_file_section const& y) 222 { 223 return x < y.original_pos; 224 } 225 }; 226 227 struct mapped_section_pos_cmp 228 { operator ()quickbook::mapped_section_pos_cmp229 bool operator()( 230 mapped_file_section const& x, mapped_file_section const& y) 231 { 232 return x.our_pos < y.our_pos; 233 } 234 operator ()quickbook::mapped_section_pos_cmp235 bool operator()( 236 mapped_file_section const& x, std::string::size_type const& y) 237 { 238 return x.our_pos < y; 239 } 240 operator ()quickbook::mapped_section_pos_cmp241 bool operator()( 242 std::string::size_type const& x, mapped_file_section const& y) 243 { 244 return x < y.our_pos; 245 } 246 }; 247 248 struct mapped_file : file 249 { mapped_filequickbook::mapped_file250 explicit mapped_file(file_ptr original_) 251 : file(*original_, std::string()) 252 , original(original_) 253 , mapped_sections() 254 { 255 } 256 257 file_ptr original; 258 std::vector<mapped_file_section> mapped_sections; 259 add_empty_mapped_file_sectionquickbook::mapped_file260 void add_empty_mapped_file_section(string_iterator pos) 261 { 262 std::string::size_type original_pos = 263 pos - original->source().begin(); 264 265 if (mapped_sections.empty() || 266 mapped_sections.back().section_type != 267 mapped_file_section::empty || 268 mapped_sections.back().original_pos != original_pos) { 269 mapped_sections.push_back(mapped_file_section( 270 original_pos, source().size(), mapped_file_section::empty)); 271 } 272 } 273 add_mapped_file_sectionquickbook::mapped_file274 void add_mapped_file_section(string_iterator pos) 275 { 276 mapped_sections.push_back(mapped_file_section( 277 pos - original->source().begin(), source().size())); 278 } 279 add_indented_mapped_file_sectionquickbook::mapped_file280 void add_indented_mapped_file_section(string_iterator pos) 281 { 282 mapped_sections.push_back(mapped_file_section( 283 pos - original->source().begin(), source().size(), 284 mapped_file_section::indented)); 285 } 286 to_original_posquickbook::mapped_file287 std::string::size_type to_original_pos( 288 std::vector<mapped_file_section>::const_iterator section, 289 std::string::size_type pos) const 290 { 291 switch (section->section_type) { 292 case mapped_file_section::normal: 293 return pos - section->our_pos + section->original_pos; 294 295 case mapped_file_section::empty: 296 return section->original_pos; 297 298 case mapped_file_section::indented: { 299 // Will contain the start of the current line. 300 quickbook::string_view::size_type our_line = section->our_pos; 301 302 // Will contain the number of lines in the block before 303 // the current line. 304 unsigned newline_count = 0; 305 306 for (quickbook::string_view::size_type i = section->our_pos; 307 i != pos; ++i) { 308 if (source()[i] == '\n') { 309 our_line = i + 1; 310 ++newline_count; 311 } 312 } 313 314 // The start of the line in the original source. 315 quickbook::string_view::size_type original_line = 316 section->original_pos; 317 318 while (newline_count > 0) { 319 if (original->source()[original_line] == '\n') 320 --newline_count; 321 ++original_line; 322 } 323 324 // The start of line content (i.e. after indentation). 325 our_line = skip_indentation(source(), our_line); 326 327 // The position is in the middle of indentation, so 328 // just return the start of the whitespace, which should 329 // be good enough. 330 if (our_line > pos) return original_line; 331 332 original_line = 333 skip_indentation(original->source(), original_line); 334 335 // Confirm that we are actually in the same position. 336 assert(original->source()[original_line] == source()[our_line]); 337 338 // Calculate the position 339 return original_line + (pos - our_line); 340 } 341 default: 342 assert(false); 343 return section->original_pos; 344 } 345 } 346 find_sectionquickbook::mapped_file347 std::vector<mapped_file_section>::const_iterator find_section( 348 string_iterator pos) const 349 { 350 std::vector<mapped_file_section>::const_iterator section = 351 boost::upper_bound( 352 mapped_sections, 353 std::string::size_type(pos - source().begin()), 354 mapped_section_pos_cmp()); 355 assert(section != mapped_sections.begin()); 356 --section; 357 358 return section; 359 } 360 361 virtual file_position position_of(string_iterator) const; 362 363 private: skip_indentationquickbook::mapped_file364 static std::string::size_type skip_indentation( 365 quickbook::string_view src, std::string::size_type i) 366 { 367 while (i != src.size() && (src[i] == ' ' || src[i] == '\t')) 368 ++i; 369 return i; 370 } 371 }; 372 373 namespace 374 { 375 std::list<mapped_file> mapped_files; 376 } 377 378 struct mapped_file_builder_data 379 { mapped_file_builder_dataquickbook::mapped_file_builder_data380 mapped_file_builder_data() { reset(); } resetquickbook::mapped_file_builder_data381 void reset() { new_file.reset(); } 382 383 boost::intrusive_ptr<mapped_file> new_file; 384 }; 385 mapped_file_builder()386 mapped_file_builder::mapped_file_builder() : data(0) {} ~mapped_file_builder()387 mapped_file_builder::~mapped_file_builder() { delete data; } 388 start(file_ptr f)389 void mapped_file_builder::start(file_ptr f) 390 { 391 if (!data) { 392 data = new mapped_file_builder_data; 393 } 394 395 assert(!data->new_file); 396 data->new_file = new mapped_file(f); 397 } 398 release()399 file_ptr mapped_file_builder::release() 400 { 401 file_ptr r = data->new_file; 402 data->reset(); 403 return r; 404 } 405 clear()406 void mapped_file_builder::clear() { data->reset(); } 407 empty() const408 bool mapped_file_builder::empty() const 409 { 410 return data->new_file->source().empty(); 411 } 412 get_pos() const413 mapped_file_builder::pos_type mapped_file_builder::get_pos() const 414 { 415 return data->new_file->source().size(); 416 } 417 add_at_pos(quickbook::string_view x,iterator pos)418 void mapped_file_builder::add_at_pos(quickbook::string_view x, iterator pos) 419 { 420 data->new_file->add_empty_mapped_file_section(pos); 421 data->new_file->source_.append(x.begin(), x.end()); 422 } 423 add(quickbook::string_view x)424 void mapped_file_builder::add(quickbook::string_view x) 425 { 426 data->new_file->add_mapped_file_section(x.begin()); 427 data->new_file->source_.append(x.begin(), x.end()); 428 } 429 add(mapped_file_builder const & x)430 void mapped_file_builder::add(mapped_file_builder const& x) 431 { 432 add(x, 0, x.data->new_file->source_.size()); 433 } 434 add(mapped_file_builder const & x,pos_type begin,pos_type end)435 void mapped_file_builder::add( 436 mapped_file_builder const& x, pos_type begin, pos_type end) 437 { 438 assert(data->new_file->original == x.data->new_file->original); 439 assert(begin <= x.data->new_file->source_.size()); 440 assert(end <= x.data->new_file->source_.size()); 441 442 if (begin != end) { 443 std::vector<mapped_file_section>::const_iterator i = 444 x.data->new_file->find_section( 445 x.data->new_file->source().begin() + begin); 446 447 std::string::size_type size = data->new_file->source_.size(); 448 449 data->new_file->mapped_sections.push_back(mapped_file_section( 450 x.data->new_file->to_original_pos(i, begin), size, 451 i->section_type)); 452 453 for (++i; i != x.data->new_file->mapped_sections.end() && 454 i->our_pos < end; 455 ++i) { 456 data->new_file->mapped_sections.push_back(mapped_file_section( 457 i->original_pos, i->our_pos - begin + size, 458 i->section_type)); 459 } 460 461 data->new_file->source_.append( 462 x.data->new_file->source_.begin() + begin, 463 x.data->new_file->source_.begin() + end); 464 } 465 } 466 indentation_count(quickbook::string_view x)467 quickbook::string_view::size_type indentation_count( 468 quickbook::string_view x) 469 { 470 unsigned count = 0; 471 472 QUICKBOOK_FOR (auto c, x) { 473 switch (c) { 474 case ' ': 475 ++count; 476 break; 477 case '\t': 478 // hardcoded tab to 4 for now 479 count = count - (count % 4) + 4; 480 break; 481 default: 482 assert(false); 483 } 484 } 485 486 return count; 487 } 488 unindent_and_add(quickbook::string_view x)489 void mapped_file_builder::unindent_and_add(quickbook::string_view x) 490 { 491 // I wanted to do everything using a string_ref, but unfortunately 492 // they don't have all the overloads used in here. So... 493 std::string const program(x.begin(), x.end()); 494 495 // Erase leading blank lines and newlines: 496 std::string::size_type text_start = 497 program.find_first_not_of(" \t\r\n"); 498 if (text_start == std::string::npos) return; 499 500 text_start = program.find_last_of("\r\n", text_start); 501 text_start = text_start == std::string::npos ? 0 : text_start + 1; 502 503 assert(text_start < program.size()); 504 505 // Get the first line indentation 506 std::string::size_type indent = 507 program.find_first_not_of(" \t", text_start) - text_start; 508 quickbook::string_view::size_type full_indent = indentation_count( 509 quickbook::string_view(&program[text_start], indent)); 510 511 std::string::size_type pos = text_start; 512 513 // Calculate the minimum indent from the rest of the lines 514 // Detecting a mix of spaces and tabs. 515 while (std::string::npos != 516 (pos = program.find_first_of("\r\n", pos))) { 517 pos = program.find_first_not_of("\r\n", pos); 518 if (std::string::npos == pos) break; 519 520 std::string::size_type n = program.find_first_not_of(" \t", pos); 521 if (n == std::string::npos) break; 522 523 char ch = program[n]; 524 if (ch == '\r' || ch == '\n') continue; // ignore empty lines 525 526 indent = (std::min)(indent, n - pos); 527 full_indent = (std::min)( 528 full_indent, indentation_count(quickbook::string_view( 529 &program[pos], n - pos))); 530 } 531 532 // Detect if indentation is mixed. 533 bool mixed_indentation = false; 534 quickbook::string_view first_indent(&program[text_start], indent); 535 pos = text_start; 536 537 while (std::string::npos != 538 (pos = program.find_first_of("\r\n", pos))) { 539 pos = program.find_first_not_of("\r\n", pos); 540 if (std::string::npos == pos) break; 541 542 std::string::size_type n = program.find_first_not_of(" \t", pos); 543 if (n == std::string::npos || n - pos < indent) continue; 544 545 if (quickbook::string_view(&program[pos], indent) != first_indent) { 546 mixed_indentation = true; 547 break; 548 } 549 } 550 551 // Trim white spaces from column 0..indent 552 std::string unindented_program; 553 std::string::size_type copy_start = text_start; 554 pos = text_start; 555 556 do { 557 if (std::string::npos == 558 (pos = program.find_first_not_of("\r\n", pos))) 559 break; 560 561 unindented_program.append( 562 program.begin() + copy_start, program.begin() + pos); 563 copy_start = pos; 564 565 // Find the end of the indentation. 566 std::string::size_type next = program.find_first_not_of(" \t", pos); 567 if (next == std::string::npos) next = program.size(); 568 569 if (mixed_indentation) { 570 string_view::size_type length = indentation_count( 571 quickbook::string_view(&program[pos], next - pos)); 572 573 if (length > full_indent) { 574 std::string new_indentation(length - full_indent, ' '); 575 unindented_program.append(new_indentation); 576 } 577 578 copy_start = next; 579 } 580 else { 581 copy_start = (std::min)(pos + indent, next); 582 } 583 584 pos = next; 585 } while (std::string::npos != 586 (pos = program.find_first_of("\r\n", pos))); 587 588 unindented_program.append(program.begin() + copy_start, program.end()); 589 590 data->new_file->add_indented_mapped_file_section(x.begin()); 591 data->new_file->source_.append(unindented_program); 592 } 593 position_of(string_iterator pos) const594 file_position mapped_file::position_of(string_iterator pos) const 595 { 596 return original->position_of( 597 original->source().begin() + 598 to_original_pos(find_section(pos), pos - source().begin())); 599 } 600 } 601