/*============================================================================= Copyright (c) 2002 2004 2006 Joel de Guzman Copyright (c) 2004 Eric Niebler http://spirit.sourceforge.net/ Use, modification and distribution is subject to the Boost Software License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) =============================================================================*/ #include "files.hpp" #include <fstream> #include <iterator> #include <vector> #include <boost/filesystem/fstream.hpp> #include <boost/range/algorithm/transform.hpp> #include <boost/range/algorithm/upper_bound.hpp> #include <boost/unordered_map.hpp> #include "for.hpp" namespace quickbook { namespace { boost::unordered_map<fs::path, file_ptr> files; } // Read the first few bytes in a file to see it starts with a byte order // mark. If it doesn't, then write the characters we've already read in. // Although, given how UTF-8 works, if we've read anything in, the files // probably broken. template <typename InputIterator, typename OutputIterator> bool check_bom( InputIterator& begin, InputIterator end, OutputIterator out, char const* chars, int length) { char const* ptr = chars; while (begin != end && *begin == *ptr) { ++begin; ++ptr; --length; if (length == 0) return true; } // Failed to match, so write the skipped characters to storage: while (chars != ptr) *out++ = *chars++; return false; } template <typename InputIterator, typename OutputIterator> std::string read_bom( InputIterator& begin, InputIterator end, OutputIterator out) { if (begin == end) return ""; const char* utf8 = "\xef\xbb\xbf"; const char* utf32be = "\0\0\xfe\xff"; const char* utf32le = "\xff\xfe\0\0"; unsigned char c = *begin; switch (c) { case 0xEF: { // UTF-8 return check_bom(begin, end, out, utf8, 3) ? "UTF-8" : ""; } case 0xFF: // UTF-16/UTF-32 little endian return !check_bom(begin, end, out, utf32le, 2) ? "" : check_bom(begin, end, out, utf32le + 2, 2) ? "UTF-32" : "UTF-16"; case 0: // UTF-32 big endian return check_bom(begin, end, out, utf32be, 4) ? "UTF-32" : ""; case 0xFE: // UTF-16 big endian return check_bom(begin, end, out, utf32be + 2, 2) ? "UTF-16" : ""; default: return ""; } } // Copy a string, converting mac and windows style newlines to unix // newlines. template <typename InputIterator, typename OutputIterator> void normalize(InputIterator begin, InputIterator end, OutputIterator out) { std::string encoding = read_bom(begin, end, out); if (encoding != "UTF-8" && encoding != "") throw load_error(encoding + " is not supported. Please use UTF-8."); while (begin != end) { if (*begin == '\r') { *out++ = '\n'; ++begin; if (begin != end && *begin == '\n') ++begin; } else { *out++ = *begin++; } } } file_ptr load(fs::path const& filename, unsigned qbk_version) { boost::unordered_map<fs::path, file_ptr>::iterator pos = files.find(filename); if (pos == files.end()) { fs::ifstream in(filename, std::ios_base::in); if (!in) throw load_error("Could not open input file."); // Turn off white space skipping on the stream in.unsetf(std::ios::skipws); std::string source; normalize( std::istream_iterator<char>(in), std::istream_iterator<char>(), std::back_inserter(source)); if (in.bad()) throw load_error("Error reading input file."); bool inserted; boost::tie(pos, inserted) = files.emplace( filename, new file(filename, source, qbk_version)); assert(inserted); } return pos->second; } std::ostream& operator<<(std::ostream& out, file_position const& x) { return out << "line: " << x.line << ", column: " << x.column; } file_position relative_position( string_iterator begin, string_iterator iterator) { file_position pos; string_iterator line_begin = begin; while (begin != iterator) { if (*begin == '\r') { ++begin; ++pos.line; line_begin = begin; } else if (*begin == '\n') { ++begin; ++pos.line; line_begin = begin; if (begin == iterator) break; if (*begin == '\r') { ++begin; line_begin = begin; } } else { ++begin; } } pos.column = iterator - line_begin + 1; return pos; } file_position file::position_of(string_iterator iterator) const { return relative_position(source().begin(), iterator); } // Mapped files. struct mapped_file_section { enum section_types { normal, empty, indented }; std::string::size_type original_pos; std::string::size_type our_pos; section_types section_type; explicit mapped_file_section( std::string::size_type original_pos_, std::string::size_type our_pos_, section_types section_type_ = normal) : original_pos(original_pos_) , our_pos(our_pos_) , section_type(section_type_) { } }; struct mapped_section_original_cmp { bool operator()( mapped_file_section const& x, mapped_file_section const& y) { return x.original_pos < y.original_pos; } bool operator()( mapped_file_section const& x, std::string::size_type const& y) { return x.original_pos < y; } bool operator()( std::string::size_type const& x, mapped_file_section const& y) { return x < y.original_pos; } }; struct mapped_section_pos_cmp { bool operator()( mapped_file_section const& x, mapped_file_section const& y) { return x.our_pos < y.our_pos; } bool operator()( mapped_file_section const& x, std::string::size_type const& y) { return x.our_pos < y; } bool operator()( std::string::size_type const& x, mapped_file_section const& y) { return x < y.our_pos; } }; struct mapped_file : file { explicit mapped_file(file_ptr original_) : file(*original_, std::string()) , original(original_) , mapped_sections() { } file_ptr original; std::vector<mapped_file_section> mapped_sections; void add_empty_mapped_file_section(string_iterator pos) { std::string::size_type original_pos = pos - original->source().begin(); if (mapped_sections.empty() || mapped_sections.back().section_type != mapped_file_section::empty || mapped_sections.back().original_pos != original_pos) { mapped_sections.push_back(mapped_file_section( original_pos, source().size(), mapped_file_section::empty)); } } void add_mapped_file_section(string_iterator pos) { mapped_sections.push_back(mapped_file_section( pos - original->source().begin(), source().size())); } void add_indented_mapped_file_section(string_iterator pos) { mapped_sections.push_back(mapped_file_section( pos - original->source().begin(), source().size(), mapped_file_section::indented)); } std::string::size_type to_original_pos( std::vector<mapped_file_section>::const_iterator section, std::string::size_type pos) const { switch (section->section_type) { case mapped_file_section::normal: return pos - section->our_pos + section->original_pos; case mapped_file_section::empty: return section->original_pos; case mapped_file_section::indented: { // Will contain the start of the current line. quickbook::string_view::size_type our_line = section->our_pos; // Will contain the number of lines in the block before // the current line. unsigned newline_count = 0; for (quickbook::string_view::size_type i = section->our_pos; i != pos; ++i) { if (source()[i] == '\n') { our_line = i + 1; ++newline_count; } } // The start of the line in the original source. quickbook::string_view::size_type original_line = section->original_pos; while (newline_count > 0) { if (original->source()[original_line] == '\n') --newline_count; ++original_line; } // The start of line content (i.e. after indentation). our_line = skip_indentation(source(), our_line); // The position is in the middle of indentation, so // just return the start of the whitespace, which should // be good enough. if (our_line > pos) return original_line; original_line = skip_indentation(original->source(), original_line); // Confirm that we are actually in the same position. assert(original->source()[original_line] == source()[our_line]); // Calculate the position return original_line + (pos - our_line); } default: assert(false); return section->original_pos; } } std::vector<mapped_file_section>::const_iterator find_section( string_iterator pos) const { std::vector<mapped_file_section>::const_iterator section = boost::upper_bound( mapped_sections, std::string::size_type(pos - source().begin()), mapped_section_pos_cmp()); assert(section != mapped_sections.begin()); --section; return section; } virtual file_position position_of(string_iterator) const; private: static std::string::size_type skip_indentation( quickbook::string_view src, std::string::size_type i) { while (i != src.size() && (src[i] == ' ' || src[i] == '\t')) ++i; return i; } }; namespace { std::list<mapped_file> mapped_files; } struct mapped_file_builder_data { mapped_file_builder_data() { reset(); } void reset() { new_file.reset(); } boost::intrusive_ptr<mapped_file> new_file; }; mapped_file_builder::mapped_file_builder() : data(0) {} mapped_file_builder::~mapped_file_builder() { delete data; } void mapped_file_builder::start(file_ptr f) { if (!data) { data = new mapped_file_builder_data; } assert(!data->new_file); data->new_file = new mapped_file(f); } file_ptr mapped_file_builder::release() { file_ptr r = data->new_file; data->reset(); return r; } void mapped_file_builder::clear() { data->reset(); } bool mapped_file_builder::empty() const { return data->new_file->source().empty(); } mapped_file_builder::pos_type mapped_file_builder::get_pos() const { return data->new_file->source().size(); } void mapped_file_builder::add_at_pos(quickbook::string_view x, iterator pos) { data->new_file->add_empty_mapped_file_section(pos); data->new_file->source_.append(x.begin(), x.end()); } void mapped_file_builder::add(quickbook::string_view x) { data->new_file->add_mapped_file_section(x.begin()); data->new_file->source_.append(x.begin(), x.end()); } void mapped_file_builder::add(mapped_file_builder const& x) { add(x, 0, x.data->new_file->source_.size()); } void mapped_file_builder::add( mapped_file_builder const& x, pos_type begin, pos_type end) { assert(data->new_file->original == x.data->new_file->original); assert(begin <= x.data->new_file->source_.size()); assert(end <= x.data->new_file->source_.size()); if (begin != end) { std::vector<mapped_file_section>::const_iterator i = x.data->new_file->find_section( x.data->new_file->source().begin() + begin); std::string::size_type size = data->new_file->source_.size(); data->new_file->mapped_sections.push_back(mapped_file_section( x.data->new_file->to_original_pos(i, begin), size, i->section_type)); for (++i; i != x.data->new_file->mapped_sections.end() && i->our_pos < end; ++i) { data->new_file->mapped_sections.push_back(mapped_file_section( i->original_pos, i->our_pos - begin + size, i->section_type)); } data->new_file->source_.append( x.data->new_file->source_.begin() + begin, x.data->new_file->source_.begin() + end); } } quickbook::string_view::size_type indentation_count( quickbook::string_view x) { unsigned count = 0; QUICKBOOK_FOR (auto c, x) { switch (c) { case ' ': ++count; break; case '\t': // hardcoded tab to 4 for now count = count - (count % 4) + 4; break; default: assert(false); } } return count; } void mapped_file_builder::unindent_and_add(quickbook::string_view x) { // I wanted to do everything using a string_ref, but unfortunately // they don't have all the overloads used in here. So... std::string const program(x.begin(), x.end()); // Erase leading blank lines and newlines: std::string::size_type text_start = program.find_first_not_of(" \t\r\n"); if (text_start == std::string::npos) return; text_start = program.find_last_of("\r\n", text_start); text_start = text_start == std::string::npos ? 0 : text_start + 1; assert(text_start < program.size()); // Get the first line indentation std::string::size_type indent = program.find_first_not_of(" \t", text_start) - text_start; quickbook::string_view::size_type full_indent = indentation_count( quickbook::string_view(&program[text_start], indent)); std::string::size_type pos = text_start; // Calculate the minimum indent from the rest of the lines // Detecting a mix of spaces and tabs. while (std::string::npos != (pos = program.find_first_of("\r\n", pos))) { pos = program.find_first_not_of("\r\n", pos); if (std::string::npos == pos) break; std::string::size_type n = program.find_first_not_of(" \t", pos); if (n == std::string::npos) break; char ch = program[n]; if (ch == '\r' || ch == '\n') continue; // ignore empty lines indent = (std::min)(indent, n - pos); full_indent = (std::min)( full_indent, indentation_count(quickbook::string_view( &program[pos], n - pos))); } // Detect if indentation is mixed. bool mixed_indentation = false; quickbook::string_view first_indent(&program[text_start], indent); pos = text_start; while (std::string::npos != (pos = program.find_first_of("\r\n", pos))) { pos = program.find_first_not_of("\r\n", pos); if (std::string::npos == pos) break; std::string::size_type n = program.find_first_not_of(" \t", pos); if (n == std::string::npos || n - pos < indent) continue; if (quickbook::string_view(&program[pos], indent) != first_indent) { mixed_indentation = true; break; } } // Trim white spaces from column 0..indent std::string unindented_program; std::string::size_type copy_start = text_start; pos = text_start; do { if (std::string::npos == (pos = program.find_first_not_of("\r\n", pos))) break; unindented_program.append( program.begin() + copy_start, program.begin() + pos); copy_start = pos; // Find the end of the indentation. std::string::size_type next = program.find_first_not_of(" \t", pos); if (next == std::string::npos) next = program.size(); if (mixed_indentation) { string_view::size_type length = indentation_count( quickbook::string_view(&program[pos], next - pos)); if (length > full_indent) { std::string new_indentation(length - full_indent, ' '); unindented_program.append(new_indentation); } copy_start = next; } else { copy_start = (std::min)(pos + indent, next); } pos = next; } while (std::string::npos != (pos = program.find_first_of("\r\n", pos))); unindented_program.append(program.begin() + copy_start, program.end()); data->new_file->add_indented_mapped_file_section(x.begin()); data->new_file->source_.append(unindented_program); } file_position mapped_file::position_of(string_iterator pos) const { return original->position_of( original->source().begin() + to_original_pos(find_section(pos), pos - source().begin())); } }