• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2004-9 Trustees of Indiana University
2 
3 // Distributed under the Boost Software License, Version 1.0.
4 // (See accompanying file LICENSE_1_0.txt or copy at
5 // http://www.boost.org/LICENSE_1_0.txt)
6 
7 //
8 // read_graphviz_new.cpp -
9 //   Initialize a model of the BGL's MutableGraph concept and an associated
10 //  collection of property maps using a graph expressed in the GraphViz
11 // DOT Language.
12 //
13 //   Based on the grammar found at:
14 //   https://web.archive.org/web/20041213234742/http://www.graphviz.org/cvs/doc/info/lang.html
15 //
16 //   Jeremiah rewrite used grammar found at:
17 //   http://www.graphviz.org/doc/info/lang.html
18 //   and page 34 or http://www.graphviz.org/pdf/dotguide.pdf
19 //
20 //   See documentation for this code at:
21 //     http://www.boost.org/libs/graph/doc/read_graphviz.html
22 //
23 
24 // Author: Jeremiah Willcock
25 //         Ronald Garcia
26 //
27 
28 #define BOOST_GRAPH_SOURCE
29 #include <boost/assert.hpp>
30 #include <boost/ref.hpp>
31 #include <boost/function/function2.hpp>
32 #include <boost/property_map/dynamic_property_map.hpp>
33 #include <boost/graph/graph_traits.hpp>
34 #include <boost/detail/workaround.hpp>
35 #include <boost/algorithm/string/case_conv.hpp>
36 #include <algorithm>
37 #include <exception> // for std::exception
38 #include <string>
39 #include <vector>
40 #include <set>
41 #include <utility>
42 #include <map>
43 #include <iostream>
44 #include <cstdlib>
45 #include <boost/throw_exception.hpp>
46 #include <boost/regex.hpp>
47 #include <boost/function.hpp>
48 #include <boost/bind.hpp>
49 #include <boost/graph/dll_import_export.hpp>
50 #include <boost/graph/graphviz.hpp>
51 
52 namespace boost
53 {
54 
55 namespace read_graphviz_detail
56 {
57     struct token
58     {
59         enum token_type
60         {
61             kw_strict,
62             kw_graph,
63             kw_digraph,
64             kw_node,
65             kw_edge,
66             kw_subgraph,
67             left_brace,
68             right_brace,
69             semicolon,
70             equal,
71             left_bracket,
72             right_bracket,
73             comma,
74             colon,
75             dash_greater,
76             dash_dash,
77             plus,
78             left_paren,
79             right_paren,
80             at,
81             identifier,
82             quoted_string, // Only used internally in tokenizer
83             eof,
84             invalid
85         };
86         token_type type;
87         std::string normalized_value; // May have double-quotes removed and/or
88                                       // some escapes replaced
tokenboost::read_graphviz_detail::token89         token(token_type type, const std::string& normalized_value)
90         : type(type), normalized_value(normalized_value)
91         {
92         }
tokenboost::read_graphviz_detail::token93         token() : type(invalid), normalized_value("") {}
operator <<(std::ostream & o,const token & t)94         friend std::ostream& operator<<(std::ostream& o, const token& t)
95         {
96             switch (t.type)
97             {
98             case token::kw_strict:
99                 o << "<strict>";
100                 break;
101             case token::kw_graph:
102                 o << "<graph>";
103                 break;
104             case token::kw_digraph:
105                 o << "<digraph>";
106                 break;
107             case token::kw_node:
108                 o << "<node>";
109                 break;
110             case token::kw_edge:
111                 o << "<edge>";
112                 break;
113             case token::kw_subgraph:
114                 o << "<subgraph>";
115                 break;
116             case token::left_brace:
117                 o << "<left_brace>";
118                 break;
119             case token::right_brace:
120                 o << "<right_brace>";
121                 break;
122             case token::semicolon:
123                 o << "<semicolon>";
124                 break;
125             case token::equal:
126                 o << "<equal>";
127                 break;
128             case token::left_bracket:
129                 o << "<left_bracket>";
130                 break;
131             case token::right_bracket:
132                 o << "<right_bracket>";
133                 break;
134             case token::comma:
135                 o << "<comma>";
136                 break;
137             case token::colon:
138                 o << "<colon>";
139                 break;
140             case token::dash_greater:
141                 o << "<dash-greater>";
142                 break;
143             case token::dash_dash:
144                 o << "<dash-dash>";
145                 break;
146             case token::plus:
147                 o << "<plus>";
148                 break;
149             case token::left_paren:
150                 o << "<left_paren>";
151                 break;
152             case token::right_paren:
153                 o << "<right_paren>";
154                 break;
155             case token::at:
156                 o << "<at>";
157                 break;
158             case token::identifier:
159                 o << "<identifier>";
160                 break;
161             case token::quoted_string:
162                 o << "<quoted_string>";
163                 break;
164             case token::eof:
165                 o << "<eof>";
166                 break;
167             default:
168                 o << "<invalid type>";
169                 break;
170             }
171             o << " '" << t.normalized_value << "'";
172             return o;
173         }
174     };
175 
lex_error(const std::string & errmsg,char bad_char)176     bad_graphviz_syntax lex_error(const std::string& errmsg, char bad_char)
177     {
178         if (bad_char == '\0')
179         {
180             return bad_graphviz_syntax(errmsg + " (at end of input)");
181         }
182         else
183         {
184             return bad_graphviz_syntax(
185                 errmsg + " (char is '" + bad_char + "')");
186         }
187     }
188 
parse_error(const std::string & errmsg,const token & bad_token)189     bad_graphviz_syntax parse_error(
190         const std::string& errmsg, const token& bad_token)
191     {
192         return bad_graphviz_syntax(errmsg + " (token is \""
193             + boost::lexical_cast< std::string >(bad_token) + "\")");
194     }
195 
196     struct tokenizer
197     {
198         std::string::const_iterator begin, end;
199         std::vector< token > lookahead;
200         // Precomputed regexes
201         boost::regex stuff_to_skip;
202         boost::regex basic_id_token;
203         boost::regex punctuation_token;
204         boost::regex number_token;
205         boost::regex quoted_string_token;
206         boost::regex xml_tag_token;
207         boost::regex cdata;
208 
tokenizerboost::read_graphviz_detail::tokenizer209         tokenizer(const std::string& str) : begin(str.begin()), end(str.end())
210         {
211             std::string end_of_token = "(?=(?:\\W))";
212             std::string whitespace = "(?:\\s+)";
213             std::string slash_slash_comment = "(?://.*?$)";
214             std::string slash_star_comment = "(?:/\\*.*?\\*/)";
215             std::string hash_comment = "(?:^#.*?$)";
216             std::string backslash_newline = "(?:[\\\\][\\n])";
217             stuff_to_skip = "\\A(?:" + whitespace + "|" + slash_slash_comment
218                 + "|" + slash_star_comment + "|" + hash_comment + "|"
219                 + backslash_newline + ")*";
220             basic_id_token = "\\A([[:alpha:]_](?:\\w*))";
221             punctuation_token = "\\A([][{};=,:+()@]|[-][>-])";
222             number_token = "\\A([-]?(?:(?:\\.\\d+)|(?:\\d+(?:\\.\\d*)?)))";
223             quoted_string_token = "\\A(\"(?:[^\"\\\\]|(?:[\\\\].))*\")";
224             xml_tag_token
225                 = "\\A<(/?)(?:[^!?'\"]|(?:'[^']*?')|(?:\"[^\"]*?\"))*?(/?)>";
226             cdata = "\\A\\Q<![CDATA[\\E.*?\\Q]]>\\E";
227         }
228 
skipboost::read_graphviz_detail::tokenizer229         void skip()
230         {
231             boost::match_results< std::string::const_iterator > results;
232 #ifndef NDEBUG
233             bool found =
234 #endif
235                 boost::regex_search(begin, end, results, stuff_to_skip);
236 #ifndef NDEBUG
237             BOOST_ASSERT(found);
238 #endif
239             boost::sub_match< std::string::const_iterator > sm1
240                 = results.suffix();
241             BOOST_ASSERT(sm1.second == end);
242             begin = sm1.first;
243         }
244 
get_token_rawboost::read_graphviz_detail::tokenizer245         token get_token_raw()
246         {
247             if (!lookahead.empty())
248             {
249                 token t = lookahead.front();
250                 lookahead.erase(lookahead.begin());
251                 return t;
252             }
253             skip();
254             if (begin == end)
255                 return token(token::eof, "");
256             // Look for keywords first
257             bool found;
258             boost::match_results< std::string::const_iterator > results;
259             found = boost::regex_search(begin, end, results, basic_id_token);
260             if (found)
261             {
262                 std::string str = results[1].str();
263                 std::string str_lower = boost::algorithm::to_lower_copy(str);
264                 begin = results.suffix().first;
265                 if (str_lower == "strict")
266                 {
267                     return token(token::kw_strict, str);
268                 }
269                 else if (str_lower == "graph")
270                 {
271                     return token(token::kw_graph, str);
272                 }
273                 else if (str_lower == "digraph")
274                 {
275                     return token(token::kw_digraph, str);
276                 }
277                 else if (str_lower == "node")
278                 {
279                     return token(token::kw_node, str);
280                 }
281                 else if (str_lower == "edge")
282                 {
283                     return token(token::kw_edge, str);
284                 }
285                 else if (str_lower == "subgraph")
286                 {
287                     return token(token::kw_subgraph, str);
288                 }
289                 else
290                 {
291                     return token(token::identifier, str);
292                 }
293             }
294             found = boost::regex_search(begin, end, results, punctuation_token);
295             if (found)
296             {
297                 std::string str = results[1].str();
298                 begin = results.suffix().first;
299                 switch (str[0])
300                 {
301                 case '[':
302                     return token(token::left_bracket, str);
303                 case ']':
304                     return token(token::right_bracket, str);
305                 case '{':
306                     return token(token::left_brace, str);
307                 case '}':
308                     return token(token::right_brace, str);
309                 case ';':
310                     return token(token::semicolon, str);
311                 case '=':
312                     return token(token::equal, str);
313                 case ',':
314                     return token(token::comma, str);
315                 case ':':
316                     return token(token::colon, str);
317                 case '+':
318                     return token(token::plus, str);
319                 case '(':
320                     return token(token::left_paren, str);
321                 case ')':
322                     return token(token::right_paren, str);
323                 case '@':
324                     return token(token::at, str);
325                 case '-':
326                 {
327                     switch (str[1])
328                     {
329                     case '-':
330                         return token(token::dash_dash, str);
331                     case '>':
332                         return token(token::dash_greater, str);
333                     default:
334                         BOOST_ASSERT(!"Definition of punctuation_token does "
335                                       "not match switch statement");
336                     }
337                 }
338                 default:
339                     BOOST_ASSERT(!"Definition of punctuation_token does not "
340                                   "match switch statement");
341                 }
342             }
343             found = boost::regex_search(begin, end, results, number_token);
344             if (found)
345             {
346                 std::string str = results[1].str();
347                 begin = results.suffix().first;
348                 return token(token::identifier, str);
349             }
350             found
351                 = boost::regex_search(begin, end, results, quoted_string_token);
352             if (found)
353             {
354                 std::string str = results[1].str();
355                 begin = results.suffix().first;
356                 // Remove the beginning and ending quotes
357                 BOOST_ASSERT(str.size() >= 2);
358                 str.erase(str.begin());
359                 str.erase(str.end() - 1);
360                 // Unescape quotes in the middle, but nothing else (see format
361                 // spec)
362                 for (size_t i = 0; i + 1 < str.size() /* May change */; ++i)
363                 {
364                     if (str[i] == '\\' && str[i + 1] == '"')
365                     {
366                         str.erase(str.begin() + i);
367                         // Don't need to adjust i
368                     }
369                     else if (str[i] == '\\' && str[i + 1] == '\n')
370                     {
371                         str.erase(str.begin() + i);
372                         str.erase(str.begin() + i);
373                         --i; // Invert ++ that will be applied
374                     }
375                 }
376                 return token(token::quoted_string, str);
377             }
378             if (*begin == '<')
379             {
380                 std::string::const_iterator saved_begin = begin;
381                 int counter = 0;
382                 do
383                 {
384                     if (begin == end)
385                         throw_lex_error("Unclosed HTML string");
386                     if (*begin != '<')
387                     {
388                         ++begin;
389                         continue;
390                     }
391                     found = boost::regex_search(
392                         begin, end, results, xml_tag_token);
393                     if (found)
394                     {
395                         begin = results.suffix().first;
396                         if (results[1].str() == "/")
397                         { // Close tag
398                             --counter;
399                         }
400                         else if (results[2].str() == "/")
401                         { // Empty tag
402                         }
403                         else
404                         { // Open tag
405                             ++counter;
406                         }
407                         continue;
408                     }
409                     found = boost::regex_search(begin, end, results, cdata);
410                     if (found)
411                     {
412                         begin = results.suffix().first;
413                         continue;
414                     }
415                     throw_lex_error("Invalid contents in HTML string");
416                 } while (counter > 0);
417                 return token(
418                     token::identifier, std::string(saved_begin, begin));
419             }
420             else
421             {
422                 throw_lex_error("Invalid character");
423                 return token();
424             }
425         }
426 
peek_token_rawboost::read_graphviz_detail::tokenizer427         token peek_token_raw()
428         {
429             if (lookahead.empty())
430             {
431                 token t = get_token_raw();
432                 lookahead.push_back(t);
433             }
434             return lookahead.front();
435         }
436 
get_tokenboost::read_graphviz_detail::tokenizer437         token get_token()
438         { // Handle string concatenation
439             token t = get_token_raw();
440             if (t.type != token::quoted_string)
441                 return t;
442             std::string str = t.normalized_value;
443             while (peek_token_raw().type == token::plus)
444             {
445                 get_token_raw();
446                 token t2 = get_token_raw();
447                 if (t2.type != token::quoted_string)
448                 {
449                     throw_lex_error(
450                         "Must have quoted string after string concatenation");
451                 }
452                 str += t2.normalized_value;
453             }
454             return token(
455                 token::identifier, str); // Note that quoted_string does not get
456                                          // passed to the parser
457         }
458 
throw_lex_errorboost::read_graphviz_detail::tokenizer459         void throw_lex_error(const std::string& errmsg)
460         {
461             boost::throw_exception(
462                 lex_error(errmsg, (begin == end ? '\0' : *begin)));
463         }
464     };
465 
466     struct edge_endpoint
467     {
468         bool is_subgraph;
469         node_and_port node_ep;
470         subgraph_name subgraph_ep;
471 
nodeboost::read_graphviz_detail::edge_endpoint472         static edge_endpoint node(const node_and_port& ep)
473         {
474             edge_endpoint r;
475             r.is_subgraph = false;
476             r.node_ep = ep;
477             return r;
478         }
479 
subgraphboost::read_graphviz_detail::edge_endpoint480         static edge_endpoint subgraph(const subgraph_name& ep)
481         {
482             edge_endpoint r;
483             r.is_subgraph = true;
484             r.subgraph_ep = ep;
485             return r;
486         }
487     };
488 
489     struct node_or_subgraph_ref
490     {
491         bool is_subgraph;
492         std::string
493             name; // Name for subgraphs or nodes, "___root___" for root graph
494     };
495 
noderef(const node_name & n)496     static node_or_subgraph_ref noderef(const node_name& n)
497     {
498         node_or_subgraph_ref r;
499         r.is_subgraph = false;
500         r.name = n;
501         return r;
502     }
503 
subgraphref(const subgraph_name & n)504     static node_or_subgraph_ref subgraphref(const subgraph_name& n)
505     {
506         node_or_subgraph_ref r;
507         r.is_subgraph = true;
508         r.name = n;
509         return r;
510     }
511 
512     typedef std::vector< node_or_subgraph_ref > subgraph_member_list;
513 
514     struct subgraph_info
515     {
516         properties def_node_props;
517         properties def_edge_props;
518         subgraph_member_list members;
519     };
520 
521     struct parser
522     {
523         tokenizer the_tokenizer;
524         std::vector< token > lookahead;
525         parser_result& r;
526         std::map< subgraph_name, subgraph_info > subgraphs;
527         std::string current_subgraph_name;
528         int sgcounter; // Counter for anonymous subgraphs
529         std::set< std::pair< node_name, node_name > >
530             existing_edges; // Used for checking in strict graphs
531 
currentboost::read_graphviz_detail::parser532         subgraph_info& current() { return subgraphs[current_subgraph_name]; }
current_graph_propsboost::read_graphviz_detail::parser533         properties& current_graph_props()
534         {
535             return r.graph_props[current_subgraph_name];
536         }
current_membersboost::read_graphviz_detail::parser537         subgraph_member_list& current_members() { return current().members; }
538 
parserboost::read_graphviz_detail::parser539         parser(const std::string& gr, parser_result& result)
540         : the_tokenizer(gr), lookahead(), r(result), sgcounter(0)
541         {
542             current_subgraph_name = "___root___";
543             current() = subgraph_info(); // Initialize root graph
544             current_graph_props().clear();
545             current_members().clear();
546         }
547 
getboost::read_graphviz_detail::parser548         token get()
549         {
550             if (lookahead.empty())
551             {
552                 token t = the_tokenizer.get_token();
553                 return t;
554             }
555             else
556             {
557                 token t = lookahead.front();
558                 lookahead.erase(lookahead.begin());
559                 return t;
560             }
561         }
562 
peekboost::read_graphviz_detail::parser563         token peek()
564         {
565             if (lookahead.empty())
566             {
567                 lookahead.push_back(the_tokenizer.get_token());
568             }
569             return lookahead.front();
570         }
571 
errorboost::read_graphviz_detail::parser572         void error(const std::string& str)
573         {
574             boost::throw_exception(parse_error(str, peek()));
575         }
576 
parse_graphboost::read_graphviz_detail::parser577         void parse_graph(bool want_directed)
578         {
579             bool is_strict = false;
580             bool is_directed = false;
581             std::string name;
582             if (peek().type == token::kw_strict)
583             {
584                 get();
585                 is_strict = true;
586             }
587             switch (peek().type)
588             {
589             case token::kw_graph:
590                 is_directed = false;
591                 break;
592             case token::kw_digraph:
593                 is_directed = true;
594                 break;
595             default:
596                 error("Wanted \"graph\" or \"digraph\"");
597             }
598             r.graph_is_directed = is_directed; // Used to check edges
599             r.graph_is_strict = is_strict;
600             if (want_directed != r.graph_is_directed)
601             {
602                 if (want_directed)
603                 {
604                     boost::throw_exception(boost::undirected_graph_error());
605                 }
606                 else
607                 {
608                     boost::throw_exception(boost::directed_graph_error());
609                 }
610             }
611             get();
612             switch (peek().type)
613             {
614             case token::identifier:
615                 name = peek().normalized_value;
616                 get();
617                 break;
618             case token::left_brace:
619                 break;
620             default:
621                 error("Wanted a graph name or left brace");
622             }
623             if (peek().type == token::left_brace)
624                 get();
625             else
626                 error("Wanted a left brace to start the graph");
627             parse_stmt_list();
628             if (peek().type == token::right_brace)
629                 get();
630             else
631                 error("Wanted a right brace to end the graph");
632             if (peek().type == token::eof)
633             {
634             }
635             else
636                 error("Wanted end of file");
637         }
638 
parse_stmt_listboost::read_graphviz_detail::parser639         void parse_stmt_list()
640         {
641             while (true)
642             {
643                 if (peek().type == token::right_brace)
644                     return;
645                 parse_stmt();
646                 if (peek().type == token::semicolon)
647                     get();
648             }
649         }
650 
parse_stmtboost::read_graphviz_detail::parser651         void parse_stmt()
652         {
653             switch (peek().type)
654             {
655             case token::kw_node:
656             case token::kw_edge:
657             case token::kw_graph:
658                 parse_attr_stmt();
659                 break;
660             case token::kw_subgraph:
661             case token::left_brace:
662             case token::identifier:
663             {
664                 token id = get();
665                 if (id.type == token::identifier && peek().type == token::equal)
666                 { // Graph property
667                     get();
668                     if (peek().type != token::identifier)
669                         error("Wanted identifier as right side of =");
670                     token id2 = get();
671                     current_graph_props()[id.normalized_value]
672                         = id2.normalized_value;
673                 }
674                 else
675                 {
676                     edge_endpoint ep = parse_endpoint_rest(id);
677                     if (peek().type == token::dash_dash
678                         || peek().type == token::dash_greater)
679                     { // Edge
680                         parse_edge_stmt(ep);
681                     }
682                     else
683                     {
684                         if (!ep.is_subgraph)
685                         { // Only nodes can have attribute lists
686                             // This node already exists because of its first
687                             // mention (properties set to defaults by
688                             // parse_node_and_port, called by
689                             // parse_endpoint_rest)
690                             properties this_node_props;
691                             if (peek().type == token::left_bracket)
692                             {
693                                 parse_attr_list(this_node_props);
694                             }
695                             for (properties::const_iterator i
696                                  = this_node_props.begin();
697                                  i != this_node_props.end(); ++i)
698                             {
699                                 // Override old properties with same names
700                                 r.nodes[ep.node_ep.name][i->first] = i->second;
701                             }
702                             current_members().push_back(
703                                 noderef(ep.node_ep.name));
704                         }
705                         else
706                         {
707                             current_members().push_back(
708                                 subgraphref(ep.subgraph_ep));
709                         }
710                     }
711                 }
712                 break;
713             }
714             default:
715                 error("Invalid start token for statement");
716             }
717         }
718 
parse_attr_stmtboost::read_graphviz_detail::parser719         void parse_attr_stmt()
720         {
721             switch (get().type)
722             {
723             case token::kw_graph:
724                 parse_attr_list(current_graph_props());
725                 break;
726             case token::kw_node:
727                 parse_attr_list(current().def_node_props);
728                 break;
729             case token::kw_edge:
730                 parse_attr_list(current().def_edge_props);
731                 break;
732             default:
733                 BOOST_ASSERT(!"Bad attr_stmt case");
734             }
735         }
736 
parse_endpointboost::read_graphviz_detail::parser737         edge_endpoint parse_endpoint()
738         {
739             switch (peek().type)
740             {
741             case token::kw_subgraph:
742             case token::left_brace:
743             case token::identifier:
744             {
745                 token first = get();
746                 return parse_endpoint_rest(first);
747             }
748             default:
749             {
750                 error("Wanted \"subgraph\", \"{\", or identifier to start node "
751                       "or subgraph");
752                 return edge_endpoint();
753             }
754             }
755         }
756 
parse_endpoint_restboost::read_graphviz_detail::parser757         edge_endpoint parse_endpoint_rest(const token& first_token)
758         {
759             switch (first_token.type)
760             {
761             case token::kw_subgraph:
762             case token::left_brace:
763                 return edge_endpoint::subgraph(parse_subgraph(first_token));
764             default:
765                 return edge_endpoint::node(parse_node_and_port(first_token));
766             }
767         }
768 
parse_subgraphboost::read_graphviz_detail::parser769         subgraph_name parse_subgraph(const token& first_token)
770         {
771             std::string name;
772             bool is_anonymous = true;
773             if (first_token.type == token::kw_subgraph)
774             {
775                 if (peek().type == token::identifier)
776                 {
777                     name = get().normalized_value;
778                     is_anonymous = false;
779                 }
780             }
781             if (is_anonymous)
782             {
783                 name = "___subgraph_"
784                     + boost::lexical_cast< std::string >(++sgcounter);
785             }
786             if (subgraphs.find(name) == subgraphs.end())
787             {
788                 subgraphs[name]
789                     = current(); // Initialize properties and defaults
790                 subgraphs[name].members.clear(); // Except member list
791             }
792             if (first_token.type == token::kw_subgraph
793                 && peek().type != token::left_brace)
794             {
795                 if (is_anonymous)
796                     error("Subgraph reference needs a name");
797                 return name;
798             }
799             subgraph_name old_sg = current_subgraph_name;
800             current_subgraph_name = name;
801             if (peek().type == token::left_brace)
802                 get();
803             else
804                 error("Wanted left brace to start subgraph");
805             parse_stmt_list();
806             if (peek().type == token::right_brace)
807                 get();
808             else
809                 error("Wanted right brace to end subgraph");
810             current_subgraph_name = old_sg;
811             return name;
812         }
813 
parse_node_and_portboost::read_graphviz_detail::parser814         node_and_port parse_node_and_port(const token& name)
815         {
816             // A node ID is a node name, followed optionally by a port angle and
817             // a port location (in either order); a port location is either :id,
818             // :id:id, or :(id,id); the last two forms are treated as equivalent
819             // although I am not sure about that.
820             node_and_port id;
821             id.name = name.normalized_value;
822         parse_more:
823             switch (peek().type)
824             {
825             case token::at:
826             {
827                 get();
828                 if (peek().type != token::identifier)
829                     error("Wanted identifier as port angle");
830                 if (id.angle != "")
831                     error("Duplicate port angle");
832                 id.angle = get().normalized_value;
833                 goto parse_more;
834             }
835             case token::colon:
836             {
837                 get();
838                 if (!id.location.empty())
839                     error("Duplicate port location");
840                 switch (peek().type)
841                 {
842                 case token::identifier:
843                 {
844                     id.location.push_back(get().normalized_value);
845                     switch (peek().type)
846                     {
847                     case token::colon:
848                     {
849                         get();
850                         if (peek().type != token::identifier)
851                             error("Wanted identifier as port location");
852                         id.location.push_back(get().normalized_value);
853                         goto parse_more;
854                     }
855                     default:
856                         goto parse_more;
857                     }
858                 }
859                 case token::left_paren:
860                 {
861                     get();
862                     if (peek().type != token::identifier)
863                         error("Wanted identifier as first element of port "
864                               "location");
865                     id.location.push_back(get().normalized_value);
866                     if (peek().type != token::comma)
867                         error("Wanted comma between parts of port location");
868                     get();
869                     if (peek().type != token::identifier)
870                         error("Wanted identifier as second element of port "
871                               "location");
872                     id.location.push_back(get().normalized_value);
873                     if (peek().type != token::right_paren)
874                         error(
875                             "Wanted right parenthesis to close port location");
876                     get();
877                     goto parse_more;
878                 }
879                 default:
880                     error("Wanted identifier or left parenthesis as start of "
881                           "port location");
882                 }
883             }
884             default:
885                 break;
886             }
887             if (r.nodes.find(id.name) == r.nodes.end())
888             { // First mention
889                 r.nodes[id.name] = current().def_node_props;
890             }
891             return id;
892         }
893 
parse_edge_stmtboost::read_graphviz_detail::parser894         void parse_edge_stmt(const edge_endpoint& lhs)
895         {
896             std::vector< edge_endpoint > nodes_in_chain(1, lhs);
897             while (true)
898             {
899                 bool leave_loop = true;
900                 switch (peek().type)
901                 {
902                 case token::dash_dash:
903                 {
904                     if (r.graph_is_directed)
905                         error("Using -- in directed graph");
906                     get();
907                     nodes_in_chain.push_back(parse_endpoint());
908                     leave_loop = false;
909                     break;
910                 }
911                 case token::dash_greater:
912                 {
913                     if (!r.graph_is_directed)
914                         error("Using -> in undirected graph");
915                     get();
916                     nodes_in_chain.push_back(parse_endpoint());
917                     leave_loop = false;
918                     break;
919                 }
920                 default:
921                     leave_loop = true;
922                     break;
923                 }
924                 if (leave_loop)
925                     break;
926             }
927             properties this_edge_props = current().def_edge_props;
928             if (peek().type == token::left_bracket)
929                 parse_attr_list(this_edge_props);
930             BOOST_ASSERT(nodes_in_chain.size()
931                 >= 2); // Should be in node parser otherwise
932             for (size_t i = 0; i + 1 < nodes_in_chain.size(); ++i)
933             {
934                 do_orig_edge(
935                     nodes_in_chain[i], nodes_in_chain[i + 1], this_edge_props);
936             }
937         }
938 
939         // Do an edge from the file, the edge may need to be expanded if it
940         // connects to a subgraph
do_orig_edgeboost::read_graphviz_detail::parser941         void do_orig_edge(const edge_endpoint& src, const edge_endpoint& tgt,
942             const properties& props)
943         {
944             std::set< node_and_port > sources = get_recursive_members(src);
945             std::set< node_and_port > targets = get_recursive_members(tgt);
946             for (std::set< node_and_port >::const_iterator i = sources.begin();
947                  i != sources.end(); ++i)
948             {
949                 for (std::set< node_and_port >::const_iterator j
950                      = targets.begin();
951                      j != targets.end(); ++j)
952                 {
953                     do_edge(*i, *j, props);
954                 }
955             }
956         }
957 
958         // Get nodes in an edge_endpoint, recursively
get_recursive_membersboost::read_graphviz_detail::parser959         std::set< node_and_port > get_recursive_members(
960             const edge_endpoint& orig_ep)
961         {
962             std::set< node_and_port > result;
963             std::vector< edge_endpoint > worklist(1, orig_ep);
964             std::set< subgraph_name > done;
965             while (!worklist.empty())
966             {
967                 edge_endpoint ep = worklist.back();
968                 worklist.pop_back();
969                 if (ep.is_subgraph)
970                 {
971                     if (done.find(ep.subgraph_ep) == done.end())
972                     {
973                         done.insert(ep.subgraph_ep);
974                         std::map< subgraph_name, subgraph_info >::const_iterator
975                             info_i
976                             = subgraphs.find(ep.subgraph_ep);
977                         if (info_i != subgraphs.end())
978                         {
979                             const subgraph_member_list& members
980                                 = info_i->second.members;
981                             for (subgraph_member_list::const_iterator i
982                                  = members.begin();
983                                  i != members.end(); ++i)
984                             {
985                                 node_or_subgraph_ref ref = *i;
986                                 if (ref.is_subgraph)
987                                 {
988                                     worklist.push_back(
989                                         edge_endpoint::subgraph(ref.name));
990                                 }
991                                 else
992                                 {
993                                     node_and_port np;
994                                     np.name = ref.name;
995                                     worklist.push_back(edge_endpoint::node(np));
996                                 }
997                             }
998                         }
999                     }
1000                 }
1001                 else
1002                 {
1003                     result.insert(ep.node_ep);
1004                 }
1005             }
1006             return result;
1007         }
1008 
1009         // Do a fixed-up edge, with only nodes as endpoints
do_edgeboost::read_graphviz_detail::parser1010         void do_edge(const node_and_port& src, const node_and_port& tgt,
1011             const properties& props)
1012         {
1013             if (r.graph_is_strict)
1014             {
1015                 if (src.name == tgt.name)
1016                     return;
1017                 std::pair< node_name, node_name > tag(src.name, tgt.name);
1018                 if (existing_edges.find(tag) != existing_edges.end())
1019                 {
1020                     return; // Parallel edge
1021                 }
1022                 existing_edges.insert(tag);
1023             }
1024             edge_info e;
1025             e.source = src;
1026             e.target = tgt;
1027             e.props = props;
1028             r.edges.push_back(e);
1029         }
1030 
parse_attr_listboost::read_graphviz_detail::parser1031         void parse_attr_list(properties& props)
1032         {
1033             while (true)
1034             {
1035                 if (peek().type == token::left_bracket)
1036                     get();
1037                 else
1038                     error("Wanted left bracket to start attribute list");
1039                 while (true)
1040                 {
1041                     switch (peek().type)
1042                     {
1043                     case token::right_bracket:
1044                         break;
1045                     case token::identifier:
1046                     {
1047                         std::string lhs = get().normalized_value;
1048                         std::string rhs = "true";
1049                         if (peek().type == token::equal)
1050                         {
1051                             get();
1052                             if (peek().type != token::identifier)
1053                                 error(
1054                                     "Wanted identifier as value of attribute");
1055                             rhs = get().normalized_value;
1056                         }
1057                         props[lhs] = rhs;
1058                         break;
1059                     }
1060                     default:
1061                         error("Wanted identifier as name of attribute");
1062                     }
1063                     if (peek().type == token::comma
1064                         || peek().type == token::semicolon)
1065                         get();
1066                     else if (peek().type == token::right_bracket)
1067                         break;
1068                 }
1069                 if (peek().type == token::right_bracket)
1070                     get();
1071                 else
1072                     error("Wanted right bracket to end attribute list");
1073                 if (peek().type != token::left_bracket)
1074                     break;
1075             }
1076         }
1077     };
1078 
parse_graphviz_from_string(const std::string & str,parser_result & result,bool want_directed)1079     void parse_graphviz_from_string(
1080         const std::string& str, parser_result& result, bool want_directed)
1081     {
1082         parser p(str, result);
1083         p.parse_graph(want_directed);
1084     }
1085 
1086     // Some debugging stuff
operator <<(std::ostream & o,const node_and_port & n)1087     std::ostream& operator<<(std::ostream& o, const node_and_port& n)
1088     {
1089         o << n.name;
1090         for (size_t i = 0; i < n.location.size(); ++i)
1091         {
1092             o << ":" << n.location[i];
1093         }
1094         if (!n.angle.empty())
1095             o << "@" << n.angle;
1096         return o;
1097     }
1098 
1099     // Can't be operator<< because properties is just an std::map
props_to_string(const properties & props)1100     std::string props_to_string(const properties& props)
1101     {
1102         std::string result = "[";
1103         for (properties::const_iterator i = props.begin(); i != props.end();
1104              ++i)
1105         {
1106             if (i != props.begin())
1107                 result += ", ";
1108             result += i->first + "=" + i->second;
1109         }
1110         result += "]";
1111         return result;
1112     }
1113 
translate_results_to_graph(const parser_result & r,::boost::detail::graph::mutate_graph * mg)1114     void translate_results_to_graph(
1115         const parser_result& r, ::boost::detail::graph::mutate_graph* mg)
1116     {
1117         typedef boost::detail::graph::edge_t edge;
1118         for (std::map< node_name, properties >::const_iterator i
1119              = r.nodes.begin();
1120              i != r.nodes.end(); ++i)
1121         {
1122             // std::cerr << i->first << " " << props_to_string(i->second) <<
1123             // std::endl;
1124             mg->do_add_vertex(i->first);
1125             for (properties::const_iterator j = i->second.begin();
1126                  j != i->second.end(); ++j)
1127             {
1128                 mg->set_node_property(j->first, i->first, j->second);
1129             }
1130         }
1131         for (std::vector< edge_info >::const_iterator i = r.edges.begin();
1132              i != r.edges.end(); ++i)
1133         {
1134             const edge_info& ei = *i;
1135             // std::cerr << ei.source << " -> " << ei.target << " " <<
1136             // props_to_string(ei.props) << std::endl;
1137             edge e = edge::new_edge();
1138             mg->do_add_edge(e, ei.source.name, ei.target.name);
1139             for (properties::const_iterator j = ei.props.begin();
1140                  j != ei.props.end(); ++j)
1141             {
1142                 mg->set_edge_property(j->first, e, j->second);
1143             }
1144         }
1145         std::map< subgraph_name, properties >::const_iterator root_graph_props_i
1146             = r.graph_props.find("___root___");
1147         BOOST_ASSERT(
1148             root_graph_props_i != r.graph_props.end()); // Should not happen
1149         const properties& root_graph_props = root_graph_props_i->second;
1150         // std::cerr << "ending graph " << props_to_string(root_graph_props) <<
1151         // std::endl;
1152         for (properties::const_iterator i = root_graph_props.begin();
1153              i != root_graph_props.end(); ++i)
1154         {
1155             mg->set_graph_property(i->first, i->second);
1156         }
1157         mg->finish_building_graph();
1158     }
1159 
1160 } // end namespace read_graphviz_detail
1161 
1162 namespace detail
1163 {
1164     namespace graph
1165     {
1166 
read_graphviz_new(const std::string & str,boost::detail::graph::mutate_graph * mg)1167         BOOST_GRAPH_DECL bool read_graphviz_new(
1168             const std::string& str, boost::detail::graph::mutate_graph* mg)
1169         {
1170             read_graphviz_detail::parser_result parsed_file;
1171             read_graphviz_detail::parse_graphviz_from_string(
1172                 str, parsed_file, mg->is_directed());
1173             read_graphviz_detail::translate_results_to_graph(parsed_file, mg);
1174             return true;
1175         }
1176 
1177     } // end namespace graph
1178 } // end namespace detail
1179 
1180 } // end namespace boost
1181 
1182 // GraphViz format notes (tested using "dot version 1.13 (v16) (Mon August 23,
1183 // 2004)", grammar from references in read_graphviz_new.hpp):
1184 
1185 // Subgraphs don't nest (all a0 subgraphs are the same thing), but a node or
1186 // subgraph can have multiple parents (sources online say that the layout
1187 // algorithms can't handle non-tree structures of clusters, but it seems to
1188 // read them the same from the file).  The containment relation is required to
1189 // be a DAG, though; it appears that a node or subgraph can't be moved into an
1190 // ancestor of a subgraph where it already was (we don't enforce that but do a
1191 // DFS when finding members to prevent cycles).  Nodes get their properties by
1192 // when they are first mentioned, and can only have them overridden after that
1193 // by explicit properties on that particular node.  Closing and reopening the
1194 // same subgraph name adds to its members, and graph properties and node/edge
1195 // defaults are preserved in that subgraph.  The members of a subgraph used in
1196 // an edge are gathered when the edge is read, even if new members are added to
1197 // the subgraph later.  Ports are allowed in a lot more places in the grammar
1198 // than Dot uses.  For example, declaring a node seems to ignore ports, and I
1199 // don't think it's possible to set properties on a particular port.  Adding an
1200 // edge between two ports on the same node seems to make Dot unhappy (crashed
1201 // for me).
1202 
1203 // Test graph for GraphViz behavior on strange combinations of subgraphs and
1204 // such.  I don't have anywhere else to put this file.
1205 
1206 #if 0
1207 dIGRaph foo {
1208   node [color=blue]
1209   subgraph a -> b
1210   subgraph a {c}
1211   subgraph a -> d
1212   subgraph a {node [color=red]; e}
1213   subgraph a -> f
1214   subgraph a {g} -> h
1215   subgraph a {node [color=green]; i} -> j
1216   subgraph a {node [color=yellow]}
1217 
1218   subgraph a0 {node [color=black]; subgraph a1 {node [color=white]}}
1219   node [color=pink] zz
1220   subgraph a0 {x1}
1221   subgraph a0 {subgraph a1 {x2}}
1222 
1223   subgraph a0 -> x3
1224   subgraph a0 {subgraph a1 -> x3}
1225   x3
1226   subgraph a0 {subgraph a0 {node [color=xxx]; x2} x7}
1227   x2 [color=yyy]
1228   subgraph cluster_ax {foo; subgraph a0}
1229   subgraph a0 {foo2}
1230   subgraph cluster_ax {foo3}
1231   // subgraph a0 -> subgraph a0
1232 
1233   bgcolor=yellow
1234   subgraph cluster_a2 {y1}
1235   // y1:n -> y1:(s,3)@se
1236   y1@se [color=green]
1237   y1@n [color=red]
1238 }
1239 #endif
1240