• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2008 John Maddock
2 //
3 // Use, modification and distribution are subject to the
4 // Boost Software License, Version 1.0.
5 // (See accompanying file LICENSE_1_0.txt
6 // or copy at http://www.boost.org/LICENSE_1_0.txt)
7 
8 #include "auto_index.hpp"
9 
10 bool need_defaults = true;
11 
install_default_scanners()12 void install_default_scanners()
13 {
14    need_defaults = false;
15    //
16    // Set the default scanners if they're not defined already:
17    //
18    file_scanner s;
19    s.type = "class_name";
20    if(file_scanner_set.find(s) == file_scanner_set.end())
21    {
22       add_file_scanner(
23          "class_name",  // Index type
24          // Header file scanner regex:
25          // possibly leading whitespace:
26          "^[[:space:]]*"
27          // possible template declaration:
28          "(template[[:space:]]*<[^;:{]+>[[:space:]]*)?"
29          // class or struct:
30          "(class|struct)[[:space:]]*"
31          // leading declspec macros etc:
32          "("
33             "\\<\\w+\\>"
34             "("
35                "[[:blank:]]*\\([^)]*\\)"
36             ")?"
37             "[[:space:]]*"
38          ")*"
39          // the class name
40          "(\\<\\w*\\>)[[:space:]]*"
41          // template specialisation parameters
42          "(<[^;:{]+>)?[[:space:]]*"
43          // terminate in { or :
44          "(\\{|:[^;\\{()]*\\{)",
45 
46          "(?:class|struct)[^;{]+\\\\<\\5\\\\>[^;{]+\\\\{",  // Format string to create indexing regex.
47          "\\5",   // Format string to create index term.
48          "",  // Filter regex for section id's.
49          ""   // Filter regex for filenames.
50          );
51    }
52 
53    s.type = "typedef_name";
54    if(file_scanner_set.find(s) == file_scanner_set.end())
55    {
56       add_file_scanner(
57          "typedef_name",  // Index type
58          "typedef[^;{}#]+?(\\w+)\\s*;", // scanner regex
59          "typedef[^;]+\\\\<\\1\\\\>\\\\s*;",  // Format string to create indexing regex.
60          "\\1",   // Format string to create index term.
61          "",  // Filter regex for section id's.
62          ""   // Filter regex for filenames.
63          );
64    }
65 
66    s.type = "macro_name";
67    if(file_scanner_set.find(s) == file_scanner_set.end())
68    {
69       add_file_scanner(
70          "macro_name",  // Index type
71          "^\\s*#\\s*define\\s+(\\w+)", // scanner regex
72          "\\\\<\\1\\\\>",  // Format string to create indexing regex.
73          "\\1",   // Format string to create index term.
74          "",  // Filter regex for section id's.
75          ""   // Filter regex for filenames.
76          );
77    }
78 
79    s.type = "function_name";
80    if(file_scanner_set.find(s) == file_scanner_set.end())
81    {
82       add_file_scanner(
83          "function_name",  // Index type
84          "\\w++(?:\\s*+<[^>]++>)?[\\s&*]+?(\\w+)\\s*(?:BOOST_[[:upper:]_]+\\s*)?\\([^;{}]*\\)\\s*[;{]", // scanner regex
85          "\\\\<\\\\w+\\\\>(?:\\\\s+<[^>]*>)*[\\\\s&*]+\\\\<\\1\\\\>\\\\s*\\\\([^;{]*\\\\)",  // Format string to create indexing regex.
86          "\\1",   // Format string to create index term.
87          "",  // Filter regex for section id's.
88          ""   // Filter regex for filenames.
89          );
90    }
91 }
92 
93 //
94 // Helper to dump file contents into a std::string:
95 //
load_file(std::string & s,std::istream & is)96 void load_file(std::string& s, std::istream& is)
97 {
98    s.erase();
99    if(is.bad()) return;
100    s.reserve(is.rdbuf()->in_avail());
101    char c;
102    while(is.get(c))
103    {
104       if(s.capacity() == s.size())
105          s.reserve(s.capacity() * 3);
106       s.append(1, c);
107    }
108 }
109 //
110 // Helper to convert string from external source into valid XML:
111 //
escape_to_xml(const std::string & in)112 std::string escape_to_xml(const std::string& in)
113 {
114    std::string result;
115    for(std::string::size_type i = 0; i < in.size(); ++i)
116    {
117       switch(in[i])
118       {
119       case '&':
120          result.append("&amp;");
121          break;
122       case '<':
123          result.append("&lt;");
124          break;
125       case '>':
126          result.append("&gt;");
127          break;
128       case '"':
129          result.append("&quot;");
130          break;
131       default:
132          result.append(1, in[i]);
133       }
134    }
135    return result;
136 }
137 //
138 // Scan a source file for things to index:
139 //
scan_file(const std::string & file)140 void scan_file(const std::string& file)
141 {
142    if(need_defaults)
143       install_default_scanners();
144    if(verbose)
145       std::cout << "Scanning file... " << file << std::endl;
146    std::string text;
147    std::ifstream is(file.c_str());
148    if(!is.peek() || !is.good())
149       throw std::runtime_error(std::string("Unable to read from file: ") + file);
150    load_file(text, is);
151 
152    for(file_scanner_set_type::iterator pscan = file_scanner_set.begin(); pscan != file_scanner_set.end(); ++pscan)
153    {
154       bool need_debug = false;
155       if(!debug.empty() && regex_match(pscan->type, ::debug))
156       {
157          need_debug = true;
158          std::cout << "Processing scanner " << pscan->type << " on file " << file << std::endl;
159          std::cout << "Scanner regex:" << pscan->scanner << std::endl;
160          std::cout << "Scanner formatter (search regex):" << pscan->format_string << std::endl;
161          std::cout << "Scanner formatter (index term):" << pscan->term_formatter << std::endl;
162          std::cout << "Scanner file name filter:" << pscan->file_name_filter << std::endl;
163          std::cout << "Scanner section id filter:" << pscan->section_filter << std::endl;
164       }
165       if(!pscan->file_name_filter.empty())
166       {
167          if(!regex_match(file, pscan->file_name_filter))
168          {
169             if(need_debug)
170             {
171                std::cout << "File failed to match file name filter, this file will be skipped..." << std::endl;
172             }
173             continue;  // skip this file
174          }
175       }
176       if(verbose && !need_debug)
177          std::cout << "Scanning for type \"" << (*pscan).type << "\" ... " << std::endl;
178       boost::sregex_iterator i(text.begin(), text.end(), (*pscan).scanner), j;
179       while(i != j)
180       {
181          try
182          {
183             index_info info;
184             info.term = escape_to_xml(i->format(pscan->term_formatter));
185             info.search_text = i->format(pscan->format_string);
186             info.category = pscan->type;
187             if(!pscan->section_filter.empty())
188                info.search_id = pscan->section_filter;
189             std::pair<std::set<index_info>::iterator, bool> pos = index_terms.insert(info);
190             if(pos.second)
191             {
192                if(verbose || need_debug)
193                   std::cout << "Indexing " << info.term << " as type " << info.category << std::endl;
194                if(need_debug)
195                   std::cout << "Search regex will be: \"" << info.search_text << "\"" <<
196                   " ID constraint is: \"" << info.search_id << "\""
197                   << "Found text was: " << i->str() << std::endl;
198                if(pos.first->search_text != info.search_text)
199                {
200                   //
201                   // Merge the search terms:
202                   //
203                   const_cast<boost::regex&>(pos.first->search_text) =
204                      "(?:" + pos.first->search_text.str() + ")|(?:" + info.search_text.str() + ")";
205                }
206                if(pos.first->search_id != info.search_id)
207                {
208                   //
209                   // Merge the ID constraints:
210                   //
211                   const_cast<boost::regex&>(pos.first->search_id) =
212                      "(?:" + pos.first->search_id.str() + ")|(?:" + info.search_id.str() + ")";
213                }
214             }
215          }
216          catch(const boost::regex_error& e)
217          {
218             std::cerr << "Unable to create regular expression from found index term:\""
219                << i->format(pscan->term_formatter) << "\" In file " << file << std::endl;
220             std::cerr << e.what() << std::endl;
221          }
222          catch(const std::exception& e)
223          {
224             std::cerr << "Unable to create index term:\""
225                << i->format(pscan->term_formatter) << "\" In file " << file << std::endl;
226             std::cerr << e.what() << std::endl;
227             throw;
228          }
229          ++i;
230       }
231    }
232 }
233 //
234 // Scan a whole directory for files to search:
235 //
scan_dir(const std::string & dir,const std::string & mask,bool recurse)236 void scan_dir(const std::string& dir, const std::string& mask, bool recurse)
237 {
238    using namespace boost::filesystem;
239    boost::regex e(mask);
240    directory_iterator i(dir), j;
241 
242    while(i != j)
243    {
244       if(regex_match(i->path().filename().string(), e))
245       {
246          scan_file(i->path().string());
247       }
248       else if(recurse && is_directory(i->status()))
249       {
250          scan_dir(i->path().string(), mask, recurse);
251       }
252       ++i;
253    }
254 }
255 //
256 // Remove quotes from a string:
257 //
unquote(const std::string & s)258 std::string unquote(const std::string& s)
259 {
260    std::string result(s);
261    if((s.size() >= 2) && (*s.begin() == '\"') && (*s.rbegin() == '\"'))
262    {
263       result.erase(result.begin());
264       result.erase(result.end() - 1);
265    }
266    return result;
267 }
268 //
269 // Load and process a script file:
270 //
process_script(const std::string & script)271 void process_script(const std::string& script)
272 {
273    static const boost::regex comment_parser(
274       "\\s*(?:#.*)?$"
275       );
276    static const boost::regex scan_parser(
277       "!scan[[:space:]]+"
278       "([^\"[:space:]]+|\"(?:[^\"\\\\]|\\\\.)+\")\\s*"
279       );
280    static const boost::regex scan_dir_parser(
281       "!scan-path[[:space:]]+"
282       "([^\"[:space:]]+|\"(?:[^\"\\\\]|\\\\.)+\")"
283       "[[:space:]]+"
284       "([^\"[:space:]]+|\"(?:[^\"\\\\]|\\\\.)+\")"
285       "(?:"
286          "[[:space:]]+"
287          "([^\"[:space:]]+|\"(?:[^\"\\\\]|\\\\.)+\")"
288       ")?\\s*"
289       );
290    static const boost::regex entry_parser(
291       "([^\"[:space:]]+|\"(?:[^\"\\\\]|\\\\.)+\")"
292       "(?:"
293          "[[:space:]]+"
294          "([^\"[:space:]]+|\"(?:[^\"\\\\]|\\\\.)*\")"
295          "(?:"
296             "[[:space:]]+"
297             "([^\"[:space:]]+|\"(?:[^\"\\\\]|\\\\.)*\")"
298             "(?:"
299                "[[:space:]]+"
300                "([^\"[:space:]]+|\"(?:[^\"\\\\]|\\\\.)*\")"
301             ")?"
302          ")?"
303       ")?"
304       "[[:space:]]*");
305    static const boost::regex rewrite_parser(
306       "!(rewrite-name|rewrite-id)\\s+"
307       "([^\"[:space:]]+|\"(?:[^\"\\\\]|\\\\.)+\")\\s+"
308       "([^\"[:space:]]+|\"(?:[^\"\\\\]|\\\\.)+\")\\s*"
309       );
310    static const boost::regex debug_parser(
311       "!debug\\s+"
312       "([^\"[:space:]]+|\"(?:[^\"\\\\]|\\\\.)+\")\\s*"
313       );
314    static const boost::regex define_scanner_parser(
315       "!define-scanner\\s+"
316       "([^\"[:space:]]+|\"(?:[^\"\\\\]|\\\\.)+\")\\s+"  // type, index 1
317       "([^\"[:space:]]+|\"(?:[^\"\\\\]|\\\\.)+\")\\s+"  // scanner regex, index 2
318       "([^\"[:space:]]+|\"(?:[^\"\\\\]|\\\\.)+\")\\s+"  // format string, index 3
319       "([^\"[:space:]]+|\"(?:[^\"\\\\]|\\\\.)+\")"  // format string for name, index 4
320       "(?:"
321          "\\s+([^\"[:space:]]+|\"(?:[^\"\\\\]|\\\\.)+\")" // id-filter, index 5
322          "(?:"
323             "\\s+([^\"[:space:]]+|\"(?:[^\"\\\\]|\\\\.)+\")" // filename-filter, index 6
324          ")?"
325       ")?"
326       "\\s*"
327       );
328    static const boost::regex error_parser("!.*");
329 
330    if(verbose)
331       std::cout << "Processing script " << script << std::endl;
332    boost::smatch what;
333    std::string line;
334    std::ifstream is(script.c_str());
335    if(is.bad() || !exists(boost::filesystem::path(script)))
336    {
337       throw std::runtime_error(std::string("Could not open script file: ") + script);
338    }
339    while(std::getline(is, line).good())
340    {
341       if(regex_match(line, what, comment_parser))
342       {
343          // Nothing to do here...
344       }
345       else if(regex_match(line, what, scan_parser))
346       {
347          std::string f = unquote(what[1].str());
348          if(!boost::filesystem::path(f).is_complete())
349          {
350             if(prefix.size())
351             {
352                boost::filesystem::path base(prefix);
353                base /= f;
354                f = base.string();
355             }
356             else
357             {
358                boost::filesystem::path base(script);
359                base.remove_filename();
360                base /= f;
361                f = base.string();
362             }
363          }
364          if(!exists(boost::filesystem::path(f)))
365             throw std::runtime_error("Error the file requested for scanning does not exist: " + f);
366          scan_file(f);
367       }
368       else if(regex_match(line, what, debug_parser))
369       {
370          debug = unquote(what[1].str());
371       }
372       else if(regex_match(line, what, define_scanner_parser))
373       {
374          add_file_scanner(unquote(what.str(1)), unquote(what.str(2)), unquote(what.str(3)),
375             unquote(what.str(4)), unquote(what.str(5)), unquote(what.str(6)));
376       }
377       else if(regex_match(line, what, scan_dir_parser))
378       {
379          std::string d = unquote(what[1].str());
380          std::string m = unquote(what[2].str());
381          bool r = unquote(what[3].str()) == "true";
382          if(!boost::filesystem::path(d).is_complete())
383          {
384             if(prefix.size())
385             {
386                boost::filesystem::path base(prefix);
387                base /= d;
388                d = base.string();
389             }
390             else
391             {
392                boost::filesystem::path base(script);
393                base.remove_filename();
394                base /= d;
395                d = base.string();
396             }
397          }
398          if(verbose)
399             std::cout << "Scanning directory " << d << std::endl;
400          if(!exists(boost::filesystem::path(d)))
401             throw std::runtime_error("Error the path requested for scanning does not exist: " + d);
402          scan_dir(d, m, r);
403       }
404       else if(regex_match(line, what, rewrite_parser))
405       {
406          bool id = what[1] == "rewrite-id";
407          std::string a = unquote(what[2].str());
408          std::string b = unquote(what[3].str());
409          id_rewrite_list.push_back(id_rewrite_rule(a, b, id));
410       }
411       else if(line.compare(0, 9, "!exclude ") == 0)
412       {
413          static const boost::regex delim("([^\"[:space:]]+|\"(?:[^\"\\\\]|\\\\.)+\")");
414          boost::sregex_token_iterator i(line.begin() + 9, line.end(), delim, 0), j;
415          while(i != j)
416          {
417             index_info info;
418             info.term = escape_to_xml(unquote(*i));
419             // Erase all entries that have a category in our scanner set,
420             // plus any entry with no category at all:
421             index_terms.erase(info);
422             for(file_scanner_set_type::iterator pscan = file_scanner_set.begin(); pscan != file_scanner_set.end(); ++pscan)
423             {
424                info.category = (*pscan).type;
425                index_terms.erase(info);
426             }
427             ++i;
428          }
429       }
430       else if(regex_match(line, error_parser))
431       {
432          std::cerr << "Error: Unable to process line: " << line << std::endl;
433       }
434       else if(regex_match(line, what, entry_parser))
435       {
436          try{
437             // what[1] is the Index entry
438             // what[2] is the regex to search for (optional)
439             // what[3] is a section id that must be matched
440             // in order for the term to be indexed (optional)
441             // what[4] is the index category to place the term in (optional).
442             index_info info;
443             info.term = escape_to_xml(unquote(what.str(1)));
444             std::string s = unquote(what.str(2));
445             if(s.size())
446                info.search_text = boost::regex(s, boost::regex::icase|boost::regex::perl);
447             else
448                info.search_text = boost::regex("\\<" + what.str(1) + "\\>", boost::regex::icase|boost::regex::perl);
449 
450             s = unquote(what.str(3));
451             if(s.size())
452                info.search_id = s;
453             if(what[4].matched)
454                info.category = unquote(what.str(4));
455             std::pair<std::set<index_info>::iterator, bool> pos = index_terms.insert(info);
456             if(pos.second)
457             {
458                if(pos.first->search_text != info.search_text)
459                {
460                   //
461                   // Merge the search terms:
462                   //
463                   const_cast<boost::regex&>(pos.first->search_text) =
464                      "(?:" + pos.first->search_text.str() + ")|(?:" + info.search_text.str() + ")";
465                }
466                if(pos.first->search_id != info.search_id)
467                {
468                   //
469                   // Merge the ID constraints:
470                   //
471                   const_cast<boost::regex&>(pos.first->search_id) =
472                      "(?:" + pos.first->search_id.str() + ")|(?:" + info.search_id.str() + ")";
473                }
474             }
475          }
476          catch(const boost::regex_error&)
477          {
478             std::cerr << "Unable to process regular expression in script line:\n  \""
479                << line << "\"" << std::endl;
480             throw;
481          }
482          catch(const std::exception&)
483          {
484             std::cerr << "Unable to process script line:\n  \""
485                << line << "\"" << std::endl;
486             throw;
487          }
488       }
489       else
490       {
491          std::cerr << "Error: Unable to process line: " << line << std::endl;
492       }
493    }
494 }
495 
496