• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*=============================================================================
2     Copyright (c) 2002 2004 2006 Joel de Guzman
3     Copyright (c) 2004 Eric Niebler
4     http://spirit.sourceforge.net/
5 
6     Use, modification and distribution is subject to the Boost Software
7     License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
8     http://www.boost.org/LICENSE_1_0.txt)
9 =============================================================================*/
10 #include "files.hpp"
11 #include <fstream>
12 #include <iterator>
13 #include <vector>
14 #include <boost/filesystem/fstream.hpp>
15 #include <boost/range/algorithm/transform.hpp>
16 #include <boost/range/algorithm/upper_bound.hpp>
17 #include <boost/unordered_map.hpp>
18 #include "for.hpp"
19 
20 namespace quickbook
21 {
22     namespace
23     {
24         boost::unordered_map<fs::path, file_ptr> files;
25     }
26 
27     // Read the first few bytes in a file to see it starts with a byte order
28     // mark. If it doesn't, then write the characters we've already read in.
29     // Although, given how UTF-8 works, if we've read anything in, the files
30     // probably broken.
31 
32     template <typename InputIterator, typename OutputIterator>
check_bom(InputIterator & begin,InputIterator end,OutputIterator out,char const * chars,int length)33     bool check_bom(
34         InputIterator& begin,
35         InputIterator end,
36         OutputIterator out,
37         char const* chars,
38         int length)
39     {
40         char const* ptr = chars;
41 
42         while (begin != end && *begin == *ptr) {
43             ++begin;
44             ++ptr;
45             --length;
46             if (length == 0) return true;
47         }
48 
49         // Failed to match, so write the skipped characters to storage:
50         while (chars != ptr)
51             *out++ = *chars++;
52 
53         return false;
54     }
55 
56     template <typename InputIterator, typename OutputIterator>
read_bom(InputIterator & begin,InputIterator end,OutputIterator out)57     std::string read_bom(
58         InputIterator& begin, InputIterator end, OutputIterator out)
59     {
60         if (begin == end) return "";
61 
62         const char* utf8 = "\xef\xbb\xbf";
63         const char* utf32be = "\0\0\xfe\xff";
64         const char* utf32le = "\xff\xfe\0\0";
65 
66         unsigned char c = *begin;
67         switch (c) {
68         case 0xEF: { // UTF-8
69             return check_bom(begin, end, out, utf8, 3) ? "UTF-8" : "";
70         }
71         case 0xFF: // UTF-16/UTF-32 little endian
72             return !check_bom(begin, end, out, utf32le, 2)
73                        ? ""
74                        : check_bom(begin, end, out, utf32le + 2, 2) ? "UTF-32"
75                                                                     : "UTF-16";
76         case 0: // UTF-32 big endian
77             return check_bom(begin, end, out, utf32be, 4) ? "UTF-32" : "";
78         case 0xFE: // UTF-16 big endian
79             return check_bom(begin, end, out, utf32be + 2, 2) ? "UTF-16" : "";
80         default:
81             return "";
82         }
83     }
84 
85     // Copy a string, converting mac and windows style newlines to unix
86     // newlines.
87 
88     template <typename InputIterator, typename OutputIterator>
normalize(InputIterator begin,InputIterator end,OutputIterator out)89     void normalize(InputIterator begin, InputIterator end, OutputIterator out)
90     {
91         std::string encoding = read_bom(begin, end, out);
92 
93         if (encoding != "UTF-8" && encoding != "")
94             throw load_error(encoding + " is not supported. Please use UTF-8.");
95 
96         while (begin != end) {
97             if (*begin == '\r') {
98                 *out++ = '\n';
99                 ++begin;
100                 if (begin != end && *begin == '\n') ++begin;
101             }
102             else {
103                 *out++ = *begin++;
104             }
105         }
106     }
107 
load(fs::path const & filename,unsigned qbk_version)108     file_ptr load(fs::path const& filename, unsigned qbk_version)
109     {
110         boost::unordered_map<fs::path, file_ptr>::iterator pos =
111             files.find(filename);
112 
113         if (pos == files.end()) {
114             fs::ifstream in(filename, std::ios_base::in);
115 
116             if (!in) throw load_error("Could not open input file.");
117 
118             // Turn off white space skipping on the stream
119             in.unsetf(std::ios::skipws);
120 
121             std::string source;
122             normalize(
123                 std::istream_iterator<char>(in), std::istream_iterator<char>(),
124                 std::back_inserter(source));
125 
126             if (in.bad()) throw load_error("Error reading input file.");
127 
128             bool inserted;
129 
130             boost::tie(pos, inserted) = files.emplace(
131                 filename, new file(filename, source, qbk_version));
132 
133             assert(inserted);
134         }
135 
136         return pos->second;
137     }
138 
operator <<(std::ostream & out,file_position const & x)139     std::ostream& operator<<(std::ostream& out, file_position const& x)
140     {
141         return out << "line: " << x.line << ", column: " << x.column;
142     }
143 
relative_position(string_iterator begin,string_iterator iterator)144     file_position relative_position(
145         string_iterator begin, string_iterator iterator)
146     {
147         file_position pos;
148         string_iterator line_begin = begin;
149 
150         while (begin != iterator) {
151             if (*begin == '\r') {
152                 ++begin;
153                 ++pos.line;
154                 line_begin = begin;
155             }
156             else if (*begin == '\n') {
157                 ++begin;
158                 ++pos.line;
159                 line_begin = begin;
160                 if (begin == iterator) break;
161                 if (*begin == '\r') {
162                     ++begin;
163                     line_begin = begin;
164                 }
165             }
166             else {
167                 ++begin;
168             }
169         }
170 
171         pos.column = iterator - line_begin + 1;
172         return pos;
173     }
174 
position_of(string_iterator iterator) const175     file_position file::position_of(string_iterator iterator) const
176     {
177         return relative_position(source().begin(), iterator);
178     }
179 
180     // Mapped files.
181 
182     struct mapped_file_section
183     {
184         enum section_types
185         {
186             normal,
187             empty,
188             indented
189         };
190 
191         std::string::size_type original_pos;
192         std::string::size_type our_pos;
193         section_types section_type;
194 
mapped_file_sectionquickbook::mapped_file_section195         explicit mapped_file_section(
196             std::string::size_type original_pos_,
197             std::string::size_type our_pos_,
198             section_types section_type_ = normal)
199             : original_pos(original_pos_)
200             , our_pos(our_pos_)
201             , section_type(section_type_)
202         {
203         }
204     };
205 
206     struct mapped_section_original_cmp
207     {
operator ()quickbook::mapped_section_original_cmp208         bool operator()(
209             mapped_file_section const& x, mapped_file_section const& y)
210         {
211             return x.original_pos < y.original_pos;
212         }
213 
operator ()quickbook::mapped_section_original_cmp214         bool operator()(
215             mapped_file_section const& x, std::string::size_type const& y)
216         {
217             return x.original_pos < y;
218         }
219 
operator ()quickbook::mapped_section_original_cmp220         bool operator()(
221             std::string::size_type const& x, mapped_file_section const& y)
222         {
223             return x < y.original_pos;
224         }
225     };
226 
227     struct mapped_section_pos_cmp
228     {
operator ()quickbook::mapped_section_pos_cmp229         bool operator()(
230             mapped_file_section const& x, mapped_file_section const& y)
231         {
232             return x.our_pos < y.our_pos;
233         }
234 
operator ()quickbook::mapped_section_pos_cmp235         bool operator()(
236             mapped_file_section const& x, std::string::size_type const& y)
237         {
238             return x.our_pos < y;
239         }
240 
operator ()quickbook::mapped_section_pos_cmp241         bool operator()(
242             std::string::size_type const& x, mapped_file_section const& y)
243         {
244             return x < y.our_pos;
245         }
246     };
247 
248     struct mapped_file : file
249     {
mapped_filequickbook::mapped_file250         explicit mapped_file(file_ptr original_)
251             : file(*original_, std::string())
252             , original(original_)
253             , mapped_sections()
254         {
255         }
256 
257         file_ptr original;
258         std::vector<mapped_file_section> mapped_sections;
259 
add_empty_mapped_file_sectionquickbook::mapped_file260         void add_empty_mapped_file_section(string_iterator pos)
261         {
262             std::string::size_type original_pos =
263                 pos - original->source().begin();
264 
265             if (mapped_sections.empty() ||
266                 mapped_sections.back().section_type !=
267                     mapped_file_section::empty ||
268                 mapped_sections.back().original_pos != original_pos) {
269                 mapped_sections.push_back(mapped_file_section(
270                     original_pos, source().size(), mapped_file_section::empty));
271             }
272         }
273 
add_mapped_file_sectionquickbook::mapped_file274         void add_mapped_file_section(string_iterator pos)
275         {
276             mapped_sections.push_back(mapped_file_section(
277                 pos - original->source().begin(), source().size()));
278         }
279 
add_indented_mapped_file_sectionquickbook::mapped_file280         void add_indented_mapped_file_section(string_iterator pos)
281         {
282             mapped_sections.push_back(mapped_file_section(
283                 pos - original->source().begin(), source().size(),
284                 mapped_file_section::indented));
285         }
286 
to_original_posquickbook::mapped_file287         std::string::size_type to_original_pos(
288             std::vector<mapped_file_section>::const_iterator section,
289             std::string::size_type pos) const
290         {
291             switch (section->section_type) {
292             case mapped_file_section::normal:
293                 return pos - section->our_pos + section->original_pos;
294 
295             case mapped_file_section::empty:
296                 return section->original_pos;
297 
298             case mapped_file_section::indented: {
299                 // Will contain the start of the current line.
300                 quickbook::string_view::size_type our_line = section->our_pos;
301 
302                 // Will contain the number of lines in the block before
303                 // the current line.
304                 unsigned newline_count = 0;
305 
306                 for (quickbook::string_view::size_type i = section->our_pos;
307                      i != pos; ++i) {
308                     if (source()[i] == '\n') {
309                         our_line = i + 1;
310                         ++newline_count;
311                     }
312                 }
313 
314                 // The start of the line in the original source.
315                 quickbook::string_view::size_type original_line =
316                     section->original_pos;
317 
318                 while (newline_count > 0) {
319                     if (original->source()[original_line] == '\n')
320                         --newline_count;
321                     ++original_line;
322                 }
323 
324                 // The start of line content (i.e. after indentation).
325                 our_line = skip_indentation(source(), our_line);
326 
327                 // The position is in the middle of indentation, so
328                 // just return the start of the whitespace, which should
329                 // be good enough.
330                 if (our_line > pos) return original_line;
331 
332                 original_line =
333                     skip_indentation(original->source(), original_line);
334 
335                 // Confirm that we are actually in the same position.
336                 assert(original->source()[original_line] == source()[our_line]);
337 
338                 // Calculate the position
339                 return original_line + (pos - our_line);
340             }
341             default:
342                 assert(false);
343                 return section->original_pos;
344             }
345         }
346 
find_sectionquickbook::mapped_file347         std::vector<mapped_file_section>::const_iterator find_section(
348             string_iterator pos) const
349         {
350             std::vector<mapped_file_section>::const_iterator section =
351                 boost::upper_bound(
352                     mapped_sections,
353                     std::string::size_type(pos - source().begin()),
354                     mapped_section_pos_cmp());
355             assert(section != mapped_sections.begin());
356             --section;
357 
358             return section;
359         }
360 
361         virtual file_position position_of(string_iterator) const;
362 
363       private:
skip_indentationquickbook::mapped_file364         static std::string::size_type skip_indentation(
365             quickbook::string_view src, std::string::size_type i)
366         {
367             while (i != src.size() && (src[i] == ' ' || src[i] == '\t'))
368                 ++i;
369             return i;
370         }
371     };
372 
373     namespace
374     {
375         std::list<mapped_file> mapped_files;
376     }
377 
378     struct mapped_file_builder_data
379     {
mapped_file_builder_dataquickbook::mapped_file_builder_data380         mapped_file_builder_data() { reset(); }
resetquickbook::mapped_file_builder_data381         void reset() { new_file.reset(); }
382 
383         boost::intrusive_ptr<mapped_file> new_file;
384     };
385 
mapped_file_builder()386     mapped_file_builder::mapped_file_builder() : data(0) {}
~mapped_file_builder()387     mapped_file_builder::~mapped_file_builder() { delete data; }
388 
start(file_ptr f)389     void mapped_file_builder::start(file_ptr f)
390     {
391         if (!data) {
392             data = new mapped_file_builder_data;
393         }
394 
395         assert(!data->new_file);
396         data->new_file = new mapped_file(f);
397     }
398 
release()399     file_ptr mapped_file_builder::release()
400     {
401         file_ptr r = data->new_file;
402         data->reset();
403         return r;
404     }
405 
clear()406     void mapped_file_builder::clear() { data->reset(); }
407 
empty() const408     bool mapped_file_builder::empty() const
409     {
410         return data->new_file->source().empty();
411     }
412 
get_pos() const413     mapped_file_builder::pos_type mapped_file_builder::get_pos() const
414     {
415         return data->new_file->source().size();
416     }
417 
add_at_pos(quickbook::string_view x,iterator pos)418     void mapped_file_builder::add_at_pos(quickbook::string_view x, iterator pos)
419     {
420         data->new_file->add_empty_mapped_file_section(pos);
421         data->new_file->source_.append(x.begin(), x.end());
422     }
423 
add(quickbook::string_view x)424     void mapped_file_builder::add(quickbook::string_view x)
425     {
426         data->new_file->add_mapped_file_section(x.begin());
427         data->new_file->source_.append(x.begin(), x.end());
428     }
429 
add(mapped_file_builder const & x)430     void mapped_file_builder::add(mapped_file_builder const& x)
431     {
432         add(x, 0, x.data->new_file->source_.size());
433     }
434 
add(mapped_file_builder const & x,pos_type begin,pos_type end)435     void mapped_file_builder::add(
436         mapped_file_builder const& x, pos_type begin, pos_type end)
437     {
438         assert(data->new_file->original == x.data->new_file->original);
439         assert(begin <= x.data->new_file->source_.size());
440         assert(end <= x.data->new_file->source_.size());
441 
442         if (begin != end) {
443             std::vector<mapped_file_section>::const_iterator i =
444                 x.data->new_file->find_section(
445                     x.data->new_file->source().begin() + begin);
446 
447             std::string::size_type size = data->new_file->source_.size();
448 
449             data->new_file->mapped_sections.push_back(mapped_file_section(
450                 x.data->new_file->to_original_pos(i, begin), size,
451                 i->section_type));
452 
453             for (++i; i != x.data->new_file->mapped_sections.end() &&
454                       i->our_pos < end;
455                  ++i) {
456                 data->new_file->mapped_sections.push_back(mapped_file_section(
457                     i->original_pos, i->our_pos - begin + size,
458                     i->section_type));
459             }
460 
461             data->new_file->source_.append(
462                 x.data->new_file->source_.begin() + begin,
463                 x.data->new_file->source_.begin() + end);
464         }
465     }
466 
indentation_count(quickbook::string_view x)467     quickbook::string_view::size_type indentation_count(
468         quickbook::string_view x)
469     {
470         unsigned count = 0;
471 
472         QUICKBOOK_FOR (auto c, x) {
473             switch (c) {
474             case ' ':
475                 ++count;
476                 break;
477             case '\t':
478                 // hardcoded tab to 4 for now
479                 count = count - (count % 4) + 4;
480                 break;
481             default:
482                 assert(false);
483             }
484         }
485 
486         return count;
487     }
488 
unindent_and_add(quickbook::string_view x)489     void mapped_file_builder::unindent_and_add(quickbook::string_view x)
490     {
491         // I wanted to do everything using a string_ref, but unfortunately
492         // they don't have all the overloads used in here. So...
493         std::string const program(x.begin(), x.end());
494 
495         // Erase leading blank lines and newlines:
496         std::string::size_type text_start =
497             program.find_first_not_of(" \t\r\n");
498         if (text_start == std::string::npos) return;
499 
500         text_start = program.find_last_of("\r\n", text_start);
501         text_start = text_start == std::string::npos ? 0 : text_start + 1;
502 
503         assert(text_start < program.size());
504 
505         // Get the first line indentation
506         std::string::size_type indent =
507             program.find_first_not_of(" \t", text_start) - text_start;
508         quickbook::string_view::size_type full_indent = indentation_count(
509             quickbook::string_view(&program[text_start], indent));
510 
511         std::string::size_type pos = text_start;
512 
513         // Calculate the minimum indent from the rest of the lines
514         // Detecting a mix of spaces and tabs.
515         while (std::string::npos !=
516                (pos = program.find_first_of("\r\n", pos))) {
517             pos = program.find_first_not_of("\r\n", pos);
518             if (std::string::npos == pos) break;
519 
520             std::string::size_type n = program.find_first_not_of(" \t", pos);
521             if (n == std::string::npos) break;
522 
523             char ch = program[n];
524             if (ch == '\r' || ch == '\n') continue; // ignore empty lines
525 
526             indent = (std::min)(indent, n - pos);
527             full_indent = (std::min)(
528                 full_indent, indentation_count(quickbook::string_view(
529                                  &program[pos], n - pos)));
530         }
531 
532         // Detect if indentation is mixed.
533         bool mixed_indentation = false;
534         quickbook::string_view first_indent(&program[text_start], indent);
535         pos = text_start;
536 
537         while (std::string::npos !=
538                (pos = program.find_first_of("\r\n", pos))) {
539             pos = program.find_first_not_of("\r\n", pos);
540             if (std::string::npos == pos) break;
541 
542             std::string::size_type n = program.find_first_not_of(" \t", pos);
543             if (n == std::string::npos || n - pos < indent) continue;
544 
545             if (quickbook::string_view(&program[pos], indent) != first_indent) {
546                 mixed_indentation = true;
547                 break;
548             }
549         }
550 
551         // Trim white spaces from column 0..indent
552         std::string unindented_program;
553         std::string::size_type copy_start = text_start;
554         pos = text_start;
555 
556         do {
557             if (std::string::npos ==
558                 (pos = program.find_first_not_of("\r\n", pos)))
559                 break;
560 
561             unindented_program.append(
562                 program.begin() + copy_start, program.begin() + pos);
563             copy_start = pos;
564 
565             // Find the end of the indentation.
566             std::string::size_type next = program.find_first_not_of(" \t", pos);
567             if (next == std::string::npos) next = program.size();
568 
569             if (mixed_indentation) {
570                 string_view::size_type length = indentation_count(
571                     quickbook::string_view(&program[pos], next - pos));
572 
573                 if (length > full_indent) {
574                     std::string new_indentation(length - full_indent, ' ');
575                     unindented_program.append(new_indentation);
576                 }
577 
578                 copy_start = next;
579             }
580             else {
581                 copy_start = (std::min)(pos + indent, next);
582             }
583 
584             pos = next;
585         } while (std::string::npos !=
586                  (pos = program.find_first_of("\r\n", pos)));
587 
588         unindented_program.append(program.begin() + copy_start, program.end());
589 
590         data->new_file->add_indented_mapped_file_section(x.begin());
591         data->new_file->source_.append(unindented_program);
592     }
593 
position_of(string_iterator pos) const594     file_position mapped_file::position_of(string_iterator pos) const
595     {
596         return original->position_of(
597             original->source().begin() +
598             to_original_pos(find_section(pos), pos - source().begin()));
599     }
600 }
601