• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *          Copyright Andrey Semashev 2007 - 2015.
3  * Distributed under the Boost Software License, Version 1.0.
4  *    (See accompanying file LICENSE_1_0.txt or copy at
5  *          http://www.boost.org/LICENSE_1_0.txt)
6  */
7 /*!
8  * \file   named_scope_format_parser.cpp
9  * \author Andrey Semashev
10  * \date   14.11.2012
11  *
12  * \brief  This header is the Boost.Log library implementation, see the library documentation
13  *         at http://www.boost.org/doc/libs/release/libs/log/doc/html/index.html.
14  */
15 
16 #include <boost/log/detail/config.hpp>
17 #include <cstddef>
18 #include <cstring>
19 #include <string>
20 #include <vector>
21 #include <limits>
22 #include <algorithm>
23 #include <boost/cstdint.hpp>
24 #include <boost/move/core.hpp>
25 #include <boost/move/utility_core.hpp>
26 #include <boost/spirit/include/karma_uint.hpp>
27 #include <boost/spirit/include/karma_generate.hpp>
28 #include <boost/log/attributes/named_scope.hpp>
29 #include <boost/log/expressions/formatters/named_scope.hpp>
30 #include <boost/log/utility/formatting_ostream.hpp>
31 #include <boost/log/detail/header.hpp>
32 
33 namespace karma = boost::spirit::karma;
34 
35 namespace boost {
36 
37 BOOST_LOG_OPEN_NAMESPACE
38 
39 namespace expressions {
40 
41 namespace aux {
42 
43 BOOST_LOG_ANONYMOUS_NAMESPACE {
44 
45 //! The function skips any spaces from the current position
46 BOOST_FORCEINLINE const char* skip_spaces(const char* p, const char* end)
47 {
48     while (p < end && *p == ' ')
49         ++p;
50     return p;
51 }
52 
53 //! The function checks if the given character can be part of a function/type/namespace name
54 BOOST_FORCEINLINE bool is_name_character(char c)
55 {
56     return (c >= '0' && c <= '9') || (c >= 'A' && c <= 'Z') || c == '_' || (c >= 'a' && c <= 'z');
57 }
58 
59 //! The function checks if there is 'operator' keyword at the specified position
60 BOOST_FORCEINLINE bool is_operator_keyword(const char* p)
61 {
62     return std::memcmp(p, "operator", 8) == 0;
63 }
64 
65 //! The function tries to parse operator signature
66 bool detect_operator(const char* begin, const char* end, const char* operator_keyword, const char*& operator_end)
67 {
68     if (end - operator_keyword < 9 || !is_operator_keyword(operator_keyword))
69         return false;
70     // Check that it's not a function name ending with 'operator', like detect_operator
71     if (operator_keyword > begin && is_name_character(*(operator_keyword - 1)))
72         return false;
73 
74     const char* p = skip_spaces(operator_keyword + 8, end);
75     if (p == end)
76         return false;
77 
78     // Check to see where the operator token ends
79     switch (*p)
80     {
81     case '(':
82         // Handle operator()
83         p = skip_spaces(++p, end);
84         if (p < end && *p == ')')
85         {
86             operator_end = p + 1;
87             return true;
88         }
89 
90         return false;
91 
92     case '[':
93         // Handle operator[]
94         p = skip_spaces(++p, end);
95         if (p < end && *p == ']')
96         {
97             operator_end = p + 1;
98             return true;
99         }
100 
101         return false;
102 
103     case '>':
104     case '<':
105         // Handle operator<=, operator>=, operator<<, operator>>, operator<<=, operator>>=
106         if (end - p >= 3 && (p[0] == p[1] && p[2] == '='))
107             operator_end = p + 3;
108         else if (end - p >= 2 && (p[0] == p[1] || p[1] == '='))
109             operator_end = p + 2;
110         else
111             operator_end = p + 1;
112 
113         return true;
114 
115     case '-':
116         // Handle operator->, operator->*
117         if (end - p >= 2 && p[1] == '>')
118         {
119             if (end - p >= 3 && p[2] == '*')
120                 operator_end = p + 3;
121             else
122                 operator_end = p + 2;
123 
124             return true;
125         }
126         // Fall through to other cases involving '-'
127         BOOST_FALLTHROUGH;
128 
129     case '=':
130     case '|':
131     case '&':
132     case '+':
133         // Handle operator=, operator==, operator+=, operator++, operator||, operator&&, etc.
134         if (end - p >= 2 && (p[0] == p[1] || p[1] == '='))
135             operator_end = p + 2;
136         else
137             operator_end = p + 1;
138 
139         return true;
140 
141     case '*':
142     case '/':
143     case '%':
144     case '^':
145         // Handle operator*, operator*=, etc.
146         if (end - p >= 2 && p[1] == '=')
147             operator_end = p + 2;
148         else
149             operator_end = p + 1;
150 
151         return true;
152 
153     case ',':
154     case '~':
155     case '!':
156         // Handle operator,, operator~, etc.
157         operator_end = p + 1;
158         return true;
159 
160     case '"':
161         // Handle operator""
162         if (end - p >= 2 && p[0] == p[1])
163         {
164             p = skip_spaces(p + 2, end);
165             // Skip through the literal suffix
166             while (p < end && is_name_character(*p))
167                 ++p;
168             operator_end = p;
169             return true;
170         }
171 
172         return false;
173 
174     default:
175         // Handle type conversion operators. We can't find the end of the type reliably here.
176         operator_end = p;
177         return true;
178     }
179 }
180 
181 //! The function skips all template parameters
182 inline const char* skip_template_parameters(const char* begin, const char* end)
183 {
184     unsigned int depth = 1;
185     const char* p = begin;
186     while (depth > 0 && p != end)
187     {
188         switch (*p)
189         {
190         case '>':
191             --depth;
192             break;
193 
194         case '<':
195             ++depth;
196             break;
197 
198         case 'o':
199             {
200                 // Skip operators (e.g. when an operator is a non-type template parameter)
201                 const char* operator_end;
202                 if (detect_operator(begin, end, p, operator_end))
203                 {
204                     p = operator_end;
205                     continue;
206                 }
207             }
208             break;
209 
210         default:
211             break;
212         }
213 
214         ++p;
215     }
216 
217     return p;
218 }
219 
220 //! The function seeks for the opening parenthesis and also tries to find the function name beginning
221 inline const char* find_opening_parenthesis(const char* begin, const char* end, const char*& first_name_begin, const char*& last_name_begin)
222 {
223     enum sequence_state
224     {
225         not_started,      // no significant (non-space) characters have been encountered so far
226         started,          // some name has started; the name is a contiguous sequence of characters that may constitute a function or scope name
227         continued,        // the previous characters were the scope operator ("::"), so the name is not finished yet
228         ended,            // the name has ended; in particular, this means that there were significant characters previously in the string
229         operator_detected // operator has been found in the string, don't parse for scopes anymore; this is needed for conversion operators
230     };
231     sequence_state state = not_started;
232 
233     const char* p = begin;
234     while (p != end)
235     {
236         char c = *p;
237         switch (c)
238         {
239         case '(':
240             if (state == not_started)
241             {
242                 // If the opening brace is the first meaningful character in the string then this can't be a function signature.
243                 // Pretend we didn't find the paranthesis to fail the parsing process.
244                 return end;
245             }
246             return p;
247 
248         case '<':
249             if (state == not_started)
250             {
251                 // Template parameters cannot start as the first meaningful character in the signature.
252                 // Pretend we didn't find the paranthesis to fail the parsing process.
253                 return end;
254             }
255             p = skip_template_parameters(p + 1, end);
256             if (state != operator_detected)
257                 state = ended;
258             continue;
259 
260         case ' ':
261             if (state == started)
262                 state = ended;
263             break;
264 
265         case ':':
266             ++p;
267             if (p != end && *p == ':')
268             {
269                 if (state == not_started)
270                 {
271                     // Include the starting "::" in the full name
272                     first_name_begin = p - 1;
273                 }
274                 if (state != operator_detected)
275                     state = continued;
276                 ++p;
277             }
278             else if (state != operator_detected)
279             {
280                 // Weird case, a single colon. Maybe, some compilers would put things like "public:" in front of the signature.
281                 state = ended;
282             }
283             continue;
284 
285         case 'o':
286             {
287                 const char* operator_end;
288                 if (detect_operator(begin, end, p, operator_end))
289                 {
290                     if (state == not_started || state == ended)
291                         first_name_begin = p;
292                     last_name_begin = p;
293                     p = operator_end;
294                     state = operator_detected;
295                     continue;
296                 }
297             }
298             // Fall through to process this character as other characters
299             BOOST_FALLTHROUGH;
300 
301         default:
302             if (state != operator_detected)
303             {
304                 if (is_name_character(c) || c == '~') // check for '~' in case of a destructor
305                 {
306                     if (state != started)
307                     {
308                         if (state == not_started || state == ended)
309                             first_name_begin = p;
310                         last_name_begin = p;
311                         state = started;
312                     }
313                 }
314                 else
315                 {
316                     state = ended;
317                 }
318             }
319             break;
320         }
321 
322         ++p;
323     }
324 
325     return p;
326 }
327 
328 //! The function seeks for the closing parenthesis
329 inline const char* find_closing_parenthesis(const char* begin, const char* end, char& first_char)
330 {
331     bool found_first_meaningful_char = false;
332     unsigned int depth = 1;
333     const char* p = begin;
334     while (p != end)
335     {
336         char c = *p;
337         switch (c)
338         {
339         case ')':
340             --depth;
341             if (depth == 0)
342                 return p;
343             break;
344 
345         case '(':
346             ++depth;
347             break;
348 
349         case '<':
350             p = skip_template_parameters(p + 1, end);
351             continue;
352 
353         case 'o':
354             {
355                 const char* operator_end;
356                 if (detect_operator(begin, end, p, operator_end))
357                 {
358                     p = operator_end;
359                     continue;
360                 }
361             }
362             // Fall through to process this character as other characters
363             BOOST_FALLTHROUGH;
364 
365         default:
366             if (!found_first_meaningful_char && c != ' ')
367             {
368                 found_first_meaningful_char = true;
369                 first_char = c;
370             }
371             break;
372         }
373 
374         ++p;
375     }
376 
377     return p;
378 }
379 
380 bool parse_function_name(const char*& begin, const char*& end, bool include_scope)
381 {
382     // The algorithm tries to match several patterns to recognize function signatures. The most obvious is:
383     //
384     // A B(C)
385     //
386     // or just:
387     //
388     // B(C)
389     //
390     // in case of constructors, destructors and type conversion operators. The algorithm looks for the opening parenthesis and while doing that
391     // it detects the beginning of B. As a result B is the function name.
392     //
393     // The first significant complication is function and array return types, in which case the syntax becomes nested:
394     //
395     // A (*B(C))(D)
396     // A (&B(C))[D]
397     //
398     // In addition to that MSVC adds calling convention, such as __cdecl, to function types. In order to detect these cases the algorithm
399     // seeks for the closing parenthesis after the opening one. If there is an opening parenthesis or square bracket after the closing parenthesis
400     // then this is a function or array return type. The case of arrays is additionally complicated by GCC output:
401     //
402     // A B(C) [D]
403     //
404     // where D is template parameters description and is not part of the signature. To discern this special case from the array return type, the algorithm
405     // checks for the first significant character within the parenthesis. This character is '&' in case of arrays and something else otherwise.
406     //
407     // Speaking of template parameters, the parsing algorithm ignores them completely, assuming they are part of the name being parsed. This includes
408     // any possible parenthesis, nested template parameters and even operators, which may be present there as non-type template parameters.
409     //
410     // Operators pose another problem. This is especially the case for type conversion operators, and even more so for conversion operators to
411     // function types. In this latter case at least MSVC is known to produce incomprehensible strings which we cannot parse. In other cases it is
412     // too difficult to parse the type correctly. So we cheat a little. Whenever we find "operator", we know that we've found the function name
413     // already, and the name ends at the opening parenthesis. For other operators we are able to parse them correctly but that doesn't really matter.
414     //
415     // Note that the algorithm should be tolerant to different flavors of the input strings from different compilers, so we can't rely on spaces
416     // delimiting function names and other elements. Also, the algorithm should behave well in case of the fallback string generated by
417     // BOOST_CURRENT_FUNCTION (which is "(unknown)" currently). In case of any parsing failure the algorithm should return false, in which case the
418     // full original string will be used as the output.
419 
420     const char* b = begin;
421     const char* e = end;
422     while (b != e)
423     {
424         // Find the opening parenthesis. While looking for it, also find the function name.
425         // first_name_begin is the beginning of the function scope, last_name_begin is the actual function name.
426         const char* first_name_begin = NULL, *last_name_begin = NULL;
427         const char* paren_open = find_opening_parenthesis(b, e, first_name_begin, last_name_begin);
428         if (paren_open == e)
429             return false;
430         // Find the closing parenthesis. Also peek at the first character in the parenthesis, which we'll use to detect array return types.
431         char first_char_in_parenthesis = 0;
432         const char* paren_close = find_closing_parenthesis(paren_open + 1, e, first_char_in_parenthesis);
433         if (paren_close == e)
434             return false;
435 
436         const char* p = skip_spaces(paren_close + 1, e);
437 
438         // Detect function and array return types
439         if (p < e && (*p == '(' || (*p == '[' && first_char_in_parenthesis == '&')))
440         {
441             // This is a function or array return type, the actual function name is within the parenthesis.
442             // Re-parse the string within the parenthesis as a function signature.
443             b = paren_open + 1;
444             e = paren_close;
445             continue;
446         }
447 
448         // We found something that looks like a function signature
449         if (include_scope)
450         {
451             if (!first_name_begin)
452                 return false;
453 
454             begin = first_name_begin;
455         }
456         else
457         {
458             if (!last_name_begin)
459                 return false;
460 
461             begin = last_name_begin;
462         }
463 
464         end = paren_open;
465 
466         return true;
467     }
468 
469     return false;
470 }
471 
472 template< typename CharT >
473 class named_scope_formatter
474 {
475     BOOST_COPYABLE_AND_MOVABLE_ALT(named_scope_formatter)
476 
477 public:
478     typedef void result_type;
479 
480     typedef CharT char_type;
481     typedef std::basic_string< char_type > string_type;
482     typedef basic_formatting_ostream< char_type > stream_type;
483     typedef attributes::named_scope::value_type::value_type value_type;
484 
485     struct literal
486     {
487         typedef void result_type;
488 
489         explicit literal(string_type& lit) { m_literal.swap(lit); }
490 
491         result_type operator() (stream_type& strm, value_type const&) const
492         {
493             strm << m_literal;
494         }
495 
496     private:
497         string_type m_literal;
498     };
499 
500     struct scope_name
501     {
502         typedef void result_type;
503 
504         result_type operator() (stream_type& strm, value_type const& value) const
505         {
506             strm << value.scope_name;
507         }
508     };
509 
510     struct function_name
511     {
512         typedef void result_type;
513 
514         explicit function_name(bool include_scope) : m_include_scope(include_scope)
515         {
516         }
517 
518         result_type operator() (stream_type& strm, value_type const& value) const
519         {
520             if (value.type == attributes::named_scope_entry::function)
521             {
522                 const char* begin = value.scope_name.c_str();
523                 const char* end = begin + value.scope_name.size();
524                 if (parse_function_name(begin, end, m_include_scope))
525                 {
526                     strm.write(begin, end - begin);
527                     return;
528                 }
529             }
530 
531             strm << value.scope_name;
532         }
533 
534     private:
535         const bool m_include_scope;
536     };
537 
538     struct full_file_name
539     {
540         typedef void result_type;
541 
542         result_type operator() (stream_type& strm, value_type const& value) const
543         {
544             strm << value.file_name;
545         }
546     };
547 
548     struct file_name
549     {
550         typedef void result_type;
551 
552         result_type operator() (stream_type& strm, value_type const& value) const
553         {
554             std::size_t n = value.file_name.size(), i = n;
555             for (; i > 0; --i)
556             {
557                 const char c = value.file_name[i - 1];
558 #if defined(BOOST_WINDOWS)
559                 if (c == '\\')
560                     break;
561 #endif
562                 if (c == '/')
563                     break;
564             }
565             strm.write(value.file_name.c_str() + i, n - i);
566         }
567     };
568 
569     struct line_number
570     {
571         typedef void result_type;
572 
573         result_type operator() (stream_type& strm, value_type const& value) const
574         {
575             strm.flush();
576 
577             char_type buf[std::numeric_limits< unsigned int >::digits10 + 2];
578             char_type* p = buf;
579 
580             typedef karma::uint_generator< unsigned int, 10 > uint_gen;
581             karma::generate(p, uint_gen(), value.line);
582 
583             typedef typename stream_type::streambuf_type streambuf_type;
584             static_cast< streambuf_type* >(strm.rdbuf())->append(buf, static_cast< std::size_t >(p - buf));
585         }
586     };
587 
588 private:
589     typedef boost::log::aux::light_function< void (stream_type&, value_type const&) > formatter_type;
590     typedef std::vector< formatter_type > formatters;
591 
592 private:
593     formatters m_formatters;
594 
595 public:
596     BOOST_DEFAULTED_FUNCTION(named_scope_formatter(), {})
597     named_scope_formatter(named_scope_formatter const& that) : m_formatters(that.m_formatters) {}
598     named_scope_formatter(BOOST_RV_REF(named_scope_formatter) that) BOOST_NOEXCEPT { m_formatters.swap(that.m_formatters); }
599 
600     named_scope_formatter& operator= (named_scope_formatter that) BOOST_NOEXCEPT
601     {
602         this->swap(that);
603         return *this;
604     }
605 
606     result_type operator() (stream_type& strm, value_type const& value) const
607     {
608         for (typename formatters::const_iterator it = m_formatters.begin(), end = m_formatters.end(); strm.good() && it != end; ++it)
609         {
610             (*it)(strm, value);
611         }
612     }
613 
614 #if !defined(BOOST_NO_CXX11_RVALUE_REFERENCES)
615     template< typename FunT >
616     void add_formatter(FunT&& fun)
617     {
618         m_formatters.emplace_back(boost::forward< FunT >(fun));
619     }
620 #else
621     template< typename FunT >
622     void add_formatter(FunT const& fun)
623     {
624         m_formatters.push_back(formatter_type(fun));
625     }
626 #endif
627 
628     void swap(named_scope_formatter& that)
629     {
630         m_formatters.swap(that.m_formatters);
631     }
632 };
633 
634 //! Parses the named scope format string and constructs the formatter function
635 template< typename CharT >
636 BOOST_FORCEINLINE boost::log::aux::light_function< void (basic_formatting_ostream< CharT >&, attributes::named_scope::value_type::value_type const&) >
637 do_parse_named_scope_format(const CharT* begin, const CharT* end)
638 {
639     typedef CharT char_type;
640     typedef boost::log::aux::light_function< void (basic_formatting_ostream< char_type >&, attributes::named_scope::value_type::value_type const&) > result_type;
641     typedef named_scope_formatter< char_type > formatter_type;
642     formatter_type fmt;
643 
644     std::basic_string< char_type > literal;
645 
646     while (begin != end)
647     {
648         const char_type* p = std::find(begin, end, static_cast< char_type >('%'));
649         literal.append(begin, p);
650 
651         if ((end - p) >= 2)
652         {
653             switch (p[1])
654             {
655             case '%':
656                 literal.push_back(static_cast< char_type >('%'));
657                 break;
658 
659             case 'n':
660                 if (!literal.empty())
661                     fmt.add_formatter(typename formatter_type::literal(literal));
662                 fmt.add_formatter(typename formatter_type::scope_name());
663                 break;
664 
665             case 'c':
666                 if (!literal.empty())
667                     fmt.add_formatter(typename formatter_type::literal(literal));
668                 fmt.add_formatter(typename formatter_type::function_name(true));
669                 break;
670 
671             case 'C':
672                 if (!literal.empty())
673                     fmt.add_formatter(typename formatter_type::literal(literal));
674                 fmt.add_formatter(typename formatter_type::function_name(false));
675                 break;
676 
677             case 'f':
678                 if (!literal.empty())
679                     fmt.add_formatter(typename formatter_type::literal(literal));
680                 fmt.add_formatter(typename formatter_type::full_file_name());
681                 break;
682 
683             case 'F':
684                 if (!literal.empty())
685                     fmt.add_formatter(typename formatter_type::literal(literal));
686                 fmt.add_formatter(typename formatter_type::file_name());
687                 break;
688 
689             case 'l':
690                 if (!literal.empty())
691                     fmt.add_formatter(typename formatter_type::literal(literal));
692                 fmt.add_formatter(typename formatter_type::line_number());
693                 break;
694 
695             default:
696                 literal.append(p, p + 2);
697                 break;
698             }
699 
700             begin = p + 2;
701         }
702         else
703         {
704             if (p != end)
705                 literal.push_back(static_cast< char_type >('%')); // a single '%' character at the end of the string
706             begin = end;
707         }
708     }
709 
710     if (!literal.empty())
711         fmt.add_formatter(typename formatter_type::literal(literal));
712 
713     return result_type(boost::move(fmt));
714 }
715 
716 } // namespace
717 
718 
719 #ifdef BOOST_LOG_USE_CHAR
720 
721 //! Parses the named scope format string and constructs the formatter function
722 BOOST_LOG_API boost::log::aux::light_function< void (basic_formatting_ostream< char >&, attributes::named_scope::value_type::value_type const&) >
723 parse_named_scope_format(const char* begin, const char* end)
724 {
725     return do_parse_named_scope_format(begin, end);
726 }
727 
728 #endif // BOOST_LOG_USE_CHAR
729 
730 #ifdef BOOST_LOG_USE_WCHAR_T
731 
732 //! Parses the named scope format string and constructs the formatter function
733 BOOST_LOG_API boost::log::aux::light_function< void (basic_formatting_ostream< wchar_t >&, attributes::named_scope::value_type::value_type const&) >
parse_named_scope_format(const wchar_t * begin,const wchar_t * end)734 parse_named_scope_format(const wchar_t* begin, const wchar_t* end)
735 {
736     return do_parse_named_scope_format(begin, end);
737 }
738 
739 #endif // BOOST_LOG_USE_WCHAR_T
740 
741 } // namespace aux
742 
743 } // namespace expressions
744 
745 BOOST_LOG_CLOSE_NAMESPACE // namespace log
746 
747 } // namespace boost
748 
749 #include <boost/log/detail/footer.hpp>
750