1 // Copyright 2004-9 Trustees of Indiana University 2 3 // Distributed under the Boost Software License, Version 1.0. 4 // (See accompanying file LICENSE_1_0.txt or copy at 5 // http://www.boost.org/LICENSE_1_0.txt) 6 7 // 8 // read_graphviz_new.cpp - 9 // Initialize a model of the BGL's MutableGraph concept and an associated 10 // collection of property maps using a graph expressed in the GraphViz 11 // DOT Language. 12 // 13 // Based on the grammar found at: 14 // https://web.archive.org/web/20041213234742/http://www.graphviz.org/cvs/doc/info/lang.html 15 // 16 // Jeremiah rewrite used grammar found at: 17 // http://www.graphviz.org/doc/info/lang.html 18 // and page 34 or http://www.graphviz.org/pdf/dotguide.pdf 19 // 20 // See documentation for this code at: 21 // http://www.boost.org/libs/graph/doc/read_graphviz.html 22 // 23 24 // Author: Jeremiah Willcock 25 // Ronald Garcia 26 // 27 28 #define BOOST_GRAPH_SOURCE 29 #include <boost/assert.hpp> 30 #include <boost/ref.hpp> 31 #include <boost/function/function2.hpp> 32 #include <boost/property_map/dynamic_property_map.hpp> 33 #include <boost/graph/graph_traits.hpp> 34 #include <boost/detail/workaround.hpp> 35 #include <boost/algorithm/string/case_conv.hpp> 36 #include <algorithm> 37 #include <exception> // for std::exception 38 #include <string> 39 #include <vector> 40 #include <set> 41 #include <utility> 42 #include <map> 43 #include <iostream> 44 #include <cstdlib> 45 #include <boost/throw_exception.hpp> 46 #include <boost/regex.hpp> 47 #include <boost/function.hpp> 48 #include <boost/bind.hpp> 49 #include <boost/graph/dll_import_export.hpp> 50 #include <boost/graph/graphviz.hpp> 51 52 namespace boost 53 { 54 55 namespace read_graphviz_detail 56 { 57 struct token 58 { 59 enum token_type 60 { 61 kw_strict, 62 kw_graph, 63 kw_digraph, 64 kw_node, 65 kw_edge, 66 kw_subgraph, 67 left_brace, 68 right_brace, 69 semicolon, 70 equal, 71 left_bracket, 72 right_bracket, 73 comma, 74 colon, 75 dash_greater, 76 dash_dash, 77 plus, 78 left_paren, 79 right_paren, 80 at, 81 identifier, 82 quoted_string, // Only used internally in tokenizer 83 eof, 84 invalid 85 }; 86 token_type type; 87 std::string normalized_value; // May have double-quotes removed and/or 88 // some escapes replaced tokenboost::read_graphviz_detail::token89 token(token_type type, const std::string& normalized_value) 90 : type(type), normalized_value(normalized_value) 91 { 92 } tokenboost::read_graphviz_detail::token93 token() : type(invalid), normalized_value("") {} operator <<(std::ostream & o,const token & t)94 friend std::ostream& operator<<(std::ostream& o, const token& t) 95 { 96 switch (t.type) 97 { 98 case token::kw_strict: 99 o << "<strict>"; 100 break; 101 case token::kw_graph: 102 o << "<graph>"; 103 break; 104 case token::kw_digraph: 105 o << "<digraph>"; 106 break; 107 case token::kw_node: 108 o << "<node>"; 109 break; 110 case token::kw_edge: 111 o << "<edge>"; 112 break; 113 case token::kw_subgraph: 114 o << "<subgraph>"; 115 break; 116 case token::left_brace: 117 o << "<left_brace>"; 118 break; 119 case token::right_brace: 120 o << "<right_brace>"; 121 break; 122 case token::semicolon: 123 o << "<semicolon>"; 124 break; 125 case token::equal: 126 o << "<equal>"; 127 break; 128 case token::left_bracket: 129 o << "<left_bracket>"; 130 break; 131 case token::right_bracket: 132 o << "<right_bracket>"; 133 break; 134 case token::comma: 135 o << "<comma>"; 136 break; 137 case token::colon: 138 o << "<colon>"; 139 break; 140 case token::dash_greater: 141 o << "<dash-greater>"; 142 break; 143 case token::dash_dash: 144 o << "<dash-dash>"; 145 break; 146 case token::plus: 147 o << "<plus>"; 148 break; 149 case token::left_paren: 150 o << "<left_paren>"; 151 break; 152 case token::right_paren: 153 o << "<right_paren>"; 154 break; 155 case token::at: 156 o << "<at>"; 157 break; 158 case token::identifier: 159 o << "<identifier>"; 160 break; 161 case token::quoted_string: 162 o << "<quoted_string>"; 163 break; 164 case token::eof: 165 o << "<eof>"; 166 break; 167 default: 168 o << "<invalid type>"; 169 break; 170 } 171 o << " '" << t.normalized_value << "'"; 172 return o; 173 } 174 }; 175 lex_error(const std::string & errmsg,char bad_char)176 bad_graphviz_syntax lex_error(const std::string& errmsg, char bad_char) 177 { 178 if (bad_char == '\0') 179 { 180 return bad_graphviz_syntax(errmsg + " (at end of input)"); 181 } 182 else 183 { 184 return bad_graphviz_syntax( 185 errmsg + " (char is '" + bad_char + "')"); 186 } 187 } 188 parse_error(const std::string & errmsg,const token & bad_token)189 bad_graphviz_syntax parse_error( 190 const std::string& errmsg, const token& bad_token) 191 { 192 return bad_graphviz_syntax(errmsg + " (token is \"" 193 + boost::lexical_cast< std::string >(bad_token) + "\")"); 194 } 195 196 struct tokenizer 197 { 198 std::string::const_iterator begin, end; 199 std::vector< token > lookahead; 200 // Precomputed regexes 201 boost::regex stuff_to_skip; 202 boost::regex basic_id_token; 203 boost::regex punctuation_token; 204 boost::regex number_token; 205 boost::regex quoted_string_token; 206 boost::regex xml_tag_token; 207 boost::regex cdata; 208 tokenizerboost::read_graphviz_detail::tokenizer209 tokenizer(const std::string& str) : begin(str.begin()), end(str.end()) 210 { 211 std::string end_of_token = "(?=(?:\\W))"; 212 std::string whitespace = "(?:\\s+)"; 213 std::string slash_slash_comment = "(?://.*?$)"; 214 std::string slash_star_comment = "(?:/\\*.*?\\*/)"; 215 std::string hash_comment = "(?:^#.*?$)"; 216 std::string backslash_newline = "(?:[\\\\][\\n])"; 217 stuff_to_skip = "\\A(?:" + whitespace + "|" + slash_slash_comment 218 + "|" + slash_star_comment + "|" + hash_comment + "|" 219 + backslash_newline + ")*"; 220 basic_id_token = "\\A([[:alpha:]_](?:\\w*))"; 221 punctuation_token = "\\A([][{};=,:+()@]|[-][>-])"; 222 number_token = "\\A([-]?(?:(?:\\.\\d+)|(?:\\d+(?:\\.\\d*)?)))"; 223 quoted_string_token = "\\A(\"(?:[^\"\\\\]|(?:[\\\\].))*\")"; 224 xml_tag_token 225 = "\\A<(/?)(?:[^!?'\"]|(?:'[^']*?')|(?:\"[^\"]*?\"))*?(/?)>"; 226 cdata = "\\A\\Q<![CDATA[\\E.*?\\Q]]>\\E"; 227 } 228 skipboost::read_graphviz_detail::tokenizer229 void skip() 230 { 231 boost::match_results< std::string::const_iterator > results; 232 #ifndef NDEBUG 233 bool found = 234 #endif 235 boost::regex_search(begin, end, results, stuff_to_skip); 236 #ifndef NDEBUG 237 BOOST_ASSERT(found); 238 #endif 239 boost::sub_match< std::string::const_iterator > sm1 240 = results.suffix(); 241 BOOST_ASSERT(sm1.second == end); 242 begin = sm1.first; 243 } 244 get_token_rawboost::read_graphviz_detail::tokenizer245 token get_token_raw() 246 { 247 if (!lookahead.empty()) 248 { 249 token t = lookahead.front(); 250 lookahead.erase(lookahead.begin()); 251 return t; 252 } 253 skip(); 254 if (begin == end) 255 return token(token::eof, ""); 256 // Look for keywords first 257 bool found; 258 boost::match_results< std::string::const_iterator > results; 259 found = boost::regex_search(begin, end, results, basic_id_token); 260 if (found) 261 { 262 std::string str = results[1].str(); 263 std::string str_lower = boost::algorithm::to_lower_copy(str); 264 begin = results.suffix().first; 265 if (str_lower == "strict") 266 { 267 return token(token::kw_strict, str); 268 } 269 else if (str_lower == "graph") 270 { 271 return token(token::kw_graph, str); 272 } 273 else if (str_lower == "digraph") 274 { 275 return token(token::kw_digraph, str); 276 } 277 else if (str_lower == "node") 278 { 279 return token(token::kw_node, str); 280 } 281 else if (str_lower == "edge") 282 { 283 return token(token::kw_edge, str); 284 } 285 else if (str_lower == "subgraph") 286 { 287 return token(token::kw_subgraph, str); 288 } 289 else 290 { 291 return token(token::identifier, str); 292 } 293 } 294 found = boost::regex_search(begin, end, results, punctuation_token); 295 if (found) 296 { 297 std::string str = results[1].str(); 298 begin = results.suffix().first; 299 switch (str[0]) 300 { 301 case '[': 302 return token(token::left_bracket, str); 303 case ']': 304 return token(token::right_bracket, str); 305 case '{': 306 return token(token::left_brace, str); 307 case '}': 308 return token(token::right_brace, str); 309 case ';': 310 return token(token::semicolon, str); 311 case '=': 312 return token(token::equal, str); 313 case ',': 314 return token(token::comma, str); 315 case ':': 316 return token(token::colon, str); 317 case '+': 318 return token(token::plus, str); 319 case '(': 320 return token(token::left_paren, str); 321 case ')': 322 return token(token::right_paren, str); 323 case '@': 324 return token(token::at, str); 325 case '-': 326 { 327 switch (str[1]) 328 { 329 case '-': 330 return token(token::dash_dash, str); 331 case '>': 332 return token(token::dash_greater, str); 333 default: 334 BOOST_ASSERT(!"Definition of punctuation_token does " 335 "not match switch statement"); 336 } 337 } 338 default: 339 BOOST_ASSERT(!"Definition of punctuation_token does not " 340 "match switch statement"); 341 } 342 } 343 found = boost::regex_search(begin, end, results, number_token); 344 if (found) 345 { 346 std::string str = results[1].str(); 347 begin = results.suffix().first; 348 return token(token::identifier, str); 349 } 350 found 351 = boost::regex_search(begin, end, results, quoted_string_token); 352 if (found) 353 { 354 std::string str = results[1].str(); 355 begin = results.suffix().first; 356 // Remove the beginning and ending quotes 357 BOOST_ASSERT(str.size() >= 2); 358 str.erase(str.begin()); 359 str.erase(str.end() - 1); 360 // Unescape quotes in the middle, but nothing else (see format 361 // spec) 362 for (size_t i = 0; i + 1 < str.size() /* May change */; ++i) 363 { 364 if (str[i] == '\\' && str[i + 1] == '"') 365 { 366 str.erase(str.begin() + i); 367 // Don't need to adjust i 368 } 369 else if (str[i] == '\\' && str[i + 1] == '\n') 370 { 371 str.erase(str.begin() + i); 372 str.erase(str.begin() + i); 373 --i; // Invert ++ that will be applied 374 } 375 } 376 return token(token::quoted_string, str); 377 } 378 if (*begin == '<') 379 { 380 std::string::const_iterator saved_begin = begin; 381 int counter = 0; 382 do 383 { 384 if (begin == end) 385 throw_lex_error("Unclosed HTML string"); 386 if (*begin != '<') 387 { 388 ++begin; 389 continue; 390 } 391 found = boost::regex_search( 392 begin, end, results, xml_tag_token); 393 if (found) 394 { 395 begin = results.suffix().first; 396 if (results[1].str() == "/") 397 { // Close tag 398 --counter; 399 } 400 else if (results[2].str() == "/") 401 { // Empty tag 402 } 403 else 404 { // Open tag 405 ++counter; 406 } 407 continue; 408 } 409 found = boost::regex_search(begin, end, results, cdata); 410 if (found) 411 { 412 begin = results.suffix().first; 413 continue; 414 } 415 throw_lex_error("Invalid contents in HTML string"); 416 } while (counter > 0); 417 return token( 418 token::identifier, std::string(saved_begin, begin)); 419 } 420 else 421 { 422 throw_lex_error("Invalid character"); 423 return token(); 424 } 425 } 426 peek_token_rawboost::read_graphviz_detail::tokenizer427 token peek_token_raw() 428 { 429 if (lookahead.empty()) 430 { 431 token t = get_token_raw(); 432 lookahead.push_back(t); 433 } 434 return lookahead.front(); 435 } 436 get_tokenboost::read_graphviz_detail::tokenizer437 token get_token() 438 { // Handle string concatenation 439 token t = get_token_raw(); 440 if (t.type != token::quoted_string) 441 return t; 442 std::string str = t.normalized_value; 443 while (peek_token_raw().type == token::plus) 444 { 445 get_token_raw(); 446 token t2 = get_token_raw(); 447 if (t2.type != token::quoted_string) 448 { 449 throw_lex_error( 450 "Must have quoted string after string concatenation"); 451 } 452 str += t2.normalized_value; 453 } 454 return token( 455 token::identifier, str); // Note that quoted_string does not get 456 // passed to the parser 457 } 458 throw_lex_errorboost::read_graphviz_detail::tokenizer459 void throw_lex_error(const std::string& errmsg) 460 { 461 boost::throw_exception( 462 lex_error(errmsg, (begin == end ? '\0' : *begin))); 463 } 464 }; 465 466 struct edge_endpoint 467 { 468 bool is_subgraph; 469 node_and_port node_ep; 470 subgraph_name subgraph_ep; 471 nodeboost::read_graphviz_detail::edge_endpoint472 static edge_endpoint node(const node_and_port& ep) 473 { 474 edge_endpoint r; 475 r.is_subgraph = false; 476 r.node_ep = ep; 477 return r; 478 } 479 subgraphboost::read_graphviz_detail::edge_endpoint480 static edge_endpoint subgraph(const subgraph_name& ep) 481 { 482 edge_endpoint r; 483 r.is_subgraph = true; 484 r.subgraph_ep = ep; 485 return r; 486 } 487 }; 488 489 struct node_or_subgraph_ref 490 { 491 bool is_subgraph; 492 std::string 493 name; // Name for subgraphs or nodes, "___root___" for root graph 494 }; 495 noderef(const node_name & n)496 static node_or_subgraph_ref noderef(const node_name& n) 497 { 498 node_or_subgraph_ref r; 499 r.is_subgraph = false; 500 r.name = n; 501 return r; 502 } 503 subgraphref(const subgraph_name & n)504 static node_or_subgraph_ref subgraphref(const subgraph_name& n) 505 { 506 node_or_subgraph_ref r; 507 r.is_subgraph = true; 508 r.name = n; 509 return r; 510 } 511 512 typedef std::vector< node_or_subgraph_ref > subgraph_member_list; 513 514 struct subgraph_info 515 { 516 properties def_node_props; 517 properties def_edge_props; 518 subgraph_member_list members; 519 }; 520 521 struct parser 522 { 523 tokenizer the_tokenizer; 524 std::vector< token > lookahead; 525 parser_result& r; 526 std::map< subgraph_name, subgraph_info > subgraphs; 527 std::string current_subgraph_name; 528 int sgcounter; // Counter for anonymous subgraphs 529 std::set< std::pair< node_name, node_name > > 530 existing_edges; // Used for checking in strict graphs 531 currentboost::read_graphviz_detail::parser532 subgraph_info& current() { return subgraphs[current_subgraph_name]; } current_graph_propsboost::read_graphviz_detail::parser533 properties& current_graph_props() 534 { 535 return r.graph_props[current_subgraph_name]; 536 } current_membersboost::read_graphviz_detail::parser537 subgraph_member_list& current_members() { return current().members; } 538 parserboost::read_graphviz_detail::parser539 parser(const std::string& gr, parser_result& result) 540 : the_tokenizer(gr), lookahead(), r(result), sgcounter(0) 541 { 542 current_subgraph_name = "___root___"; 543 current() = subgraph_info(); // Initialize root graph 544 current_graph_props().clear(); 545 current_members().clear(); 546 } 547 getboost::read_graphviz_detail::parser548 token get() 549 { 550 if (lookahead.empty()) 551 { 552 token t = the_tokenizer.get_token(); 553 return t; 554 } 555 else 556 { 557 token t = lookahead.front(); 558 lookahead.erase(lookahead.begin()); 559 return t; 560 } 561 } 562 peekboost::read_graphviz_detail::parser563 token peek() 564 { 565 if (lookahead.empty()) 566 { 567 lookahead.push_back(the_tokenizer.get_token()); 568 } 569 return lookahead.front(); 570 } 571 errorboost::read_graphviz_detail::parser572 void error(const std::string& str) 573 { 574 boost::throw_exception(parse_error(str, peek())); 575 } 576 parse_graphboost::read_graphviz_detail::parser577 void parse_graph(bool want_directed) 578 { 579 bool is_strict = false; 580 bool is_directed = false; 581 std::string name; 582 if (peek().type == token::kw_strict) 583 { 584 get(); 585 is_strict = true; 586 } 587 switch (peek().type) 588 { 589 case token::kw_graph: 590 is_directed = false; 591 break; 592 case token::kw_digraph: 593 is_directed = true; 594 break; 595 default: 596 error("Wanted \"graph\" or \"digraph\""); 597 } 598 r.graph_is_directed = is_directed; // Used to check edges 599 r.graph_is_strict = is_strict; 600 if (want_directed != r.graph_is_directed) 601 { 602 if (want_directed) 603 { 604 boost::throw_exception(boost::undirected_graph_error()); 605 } 606 else 607 { 608 boost::throw_exception(boost::directed_graph_error()); 609 } 610 } 611 get(); 612 switch (peek().type) 613 { 614 case token::identifier: 615 name = peek().normalized_value; 616 get(); 617 break; 618 case token::left_brace: 619 break; 620 default: 621 error("Wanted a graph name or left brace"); 622 } 623 if (peek().type == token::left_brace) 624 get(); 625 else 626 error("Wanted a left brace to start the graph"); 627 parse_stmt_list(); 628 if (peek().type == token::right_brace) 629 get(); 630 else 631 error("Wanted a right brace to end the graph"); 632 if (peek().type == token::eof) 633 { 634 } 635 else 636 error("Wanted end of file"); 637 } 638 parse_stmt_listboost::read_graphviz_detail::parser639 void parse_stmt_list() 640 { 641 while (true) 642 { 643 if (peek().type == token::right_brace) 644 return; 645 parse_stmt(); 646 if (peek().type == token::semicolon) 647 get(); 648 } 649 } 650 parse_stmtboost::read_graphviz_detail::parser651 void parse_stmt() 652 { 653 switch (peek().type) 654 { 655 case token::kw_node: 656 case token::kw_edge: 657 case token::kw_graph: 658 parse_attr_stmt(); 659 break; 660 case token::kw_subgraph: 661 case token::left_brace: 662 case token::identifier: 663 { 664 token id = get(); 665 if (id.type == token::identifier && peek().type == token::equal) 666 { // Graph property 667 get(); 668 if (peek().type != token::identifier) 669 error("Wanted identifier as right side of ="); 670 token id2 = get(); 671 current_graph_props()[id.normalized_value] 672 = id2.normalized_value; 673 } 674 else 675 { 676 edge_endpoint ep = parse_endpoint_rest(id); 677 if (peek().type == token::dash_dash 678 || peek().type == token::dash_greater) 679 { // Edge 680 parse_edge_stmt(ep); 681 } 682 else 683 { 684 if (!ep.is_subgraph) 685 { // Only nodes can have attribute lists 686 // This node already exists because of its first 687 // mention (properties set to defaults by 688 // parse_node_and_port, called by 689 // parse_endpoint_rest) 690 properties this_node_props; 691 if (peek().type == token::left_bracket) 692 { 693 parse_attr_list(this_node_props); 694 } 695 for (properties::const_iterator i 696 = this_node_props.begin(); 697 i != this_node_props.end(); ++i) 698 { 699 // Override old properties with same names 700 r.nodes[ep.node_ep.name][i->first] = i->second; 701 } 702 current_members().push_back( 703 noderef(ep.node_ep.name)); 704 } 705 else 706 { 707 current_members().push_back( 708 subgraphref(ep.subgraph_ep)); 709 } 710 } 711 } 712 break; 713 } 714 default: 715 error("Invalid start token for statement"); 716 } 717 } 718 parse_attr_stmtboost::read_graphviz_detail::parser719 void parse_attr_stmt() 720 { 721 switch (get().type) 722 { 723 case token::kw_graph: 724 parse_attr_list(current_graph_props()); 725 break; 726 case token::kw_node: 727 parse_attr_list(current().def_node_props); 728 break; 729 case token::kw_edge: 730 parse_attr_list(current().def_edge_props); 731 break; 732 default: 733 BOOST_ASSERT(!"Bad attr_stmt case"); 734 } 735 } 736 parse_endpointboost::read_graphviz_detail::parser737 edge_endpoint parse_endpoint() 738 { 739 switch (peek().type) 740 { 741 case token::kw_subgraph: 742 case token::left_brace: 743 case token::identifier: 744 { 745 token first = get(); 746 return parse_endpoint_rest(first); 747 } 748 default: 749 { 750 error("Wanted \"subgraph\", \"{\", or identifier to start node " 751 "or subgraph"); 752 return edge_endpoint(); 753 } 754 } 755 } 756 parse_endpoint_restboost::read_graphviz_detail::parser757 edge_endpoint parse_endpoint_rest(const token& first_token) 758 { 759 switch (first_token.type) 760 { 761 case token::kw_subgraph: 762 case token::left_brace: 763 return edge_endpoint::subgraph(parse_subgraph(first_token)); 764 default: 765 return edge_endpoint::node(parse_node_and_port(first_token)); 766 } 767 } 768 parse_subgraphboost::read_graphviz_detail::parser769 subgraph_name parse_subgraph(const token& first_token) 770 { 771 std::string name; 772 bool is_anonymous = true; 773 if (first_token.type == token::kw_subgraph) 774 { 775 if (peek().type == token::identifier) 776 { 777 name = get().normalized_value; 778 is_anonymous = false; 779 } 780 } 781 if (is_anonymous) 782 { 783 name = "___subgraph_" 784 + boost::lexical_cast< std::string >(++sgcounter); 785 } 786 if (subgraphs.find(name) == subgraphs.end()) 787 { 788 subgraphs[name] 789 = current(); // Initialize properties and defaults 790 subgraphs[name].members.clear(); // Except member list 791 } 792 if (first_token.type == token::kw_subgraph 793 && peek().type != token::left_brace) 794 { 795 if (is_anonymous) 796 error("Subgraph reference needs a name"); 797 return name; 798 } 799 subgraph_name old_sg = current_subgraph_name; 800 current_subgraph_name = name; 801 if (peek().type == token::left_brace) 802 get(); 803 else 804 error("Wanted left brace to start subgraph"); 805 parse_stmt_list(); 806 if (peek().type == token::right_brace) 807 get(); 808 else 809 error("Wanted right brace to end subgraph"); 810 current_subgraph_name = old_sg; 811 return name; 812 } 813 parse_node_and_portboost::read_graphviz_detail::parser814 node_and_port parse_node_and_port(const token& name) 815 { 816 // A node ID is a node name, followed optionally by a port angle and 817 // a port location (in either order); a port location is either :id, 818 // :id:id, or :(id,id); the last two forms are treated as equivalent 819 // although I am not sure about that. 820 node_and_port id; 821 id.name = name.normalized_value; 822 parse_more: 823 switch (peek().type) 824 { 825 case token::at: 826 { 827 get(); 828 if (peek().type != token::identifier) 829 error("Wanted identifier as port angle"); 830 if (id.angle != "") 831 error("Duplicate port angle"); 832 id.angle = get().normalized_value; 833 goto parse_more; 834 } 835 case token::colon: 836 { 837 get(); 838 if (!id.location.empty()) 839 error("Duplicate port location"); 840 switch (peek().type) 841 { 842 case token::identifier: 843 { 844 id.location.push_back(get().normalized_value); 845 switch (peek().type) 846 { 847 case token::colon: 848 { 849 get(); 850 if (peek().type != token::identifier) 851 error("Wanted identifier as port location"); 852 id.location.push_back(get().normalized_value); 853 goto parse_more; 854 } 855 default: 856 goto parse_more; 857 } 858 } 859 case token::left_paren: 860 { 861 get(); 862 if (peek().type != token::identifier) 863 error("Wanted identifier as first element of port " 864 "location"); 865 id.location.push_back(get().normalized_value); 866 if (peek().type != token::comma) 867 error("Wanted comma between parts of port location"); 868 get(); 869 if (peek().type != token::identifier) 870 error("Wanted identifier as second element of port " 871 "location"); 872 id.location.push_back(get().normalized_value); 873 if (peek().type != token::right_paren) 874 error( 875 "Wanted right parenthesis to close port location"); 876 get(); 877 goto parse_more; 878 } 879 default: 880 error("Wanted identifier or left parenthesis as start of " 881 "port location"); 882 } 883 } 884 default: 885 break; 886 } 887 if (r.nodes.find(id.name) == r.nodes.end()) 888 { // First mention 889 r.nodes[id.name] = current().def_node_props; 890 } 891 return id; 892 } 893 parse_edge_stmtboost::read_graphviz_detail::parser894 void parse_edge_stmt(const edge_endpoint& lhs) 895 { 896 std::vector< edge_endpoint > nodes_in_chain(1, lhs); 897 while (true) 898 { 899 bool leave_loop = true; 900 switch (peek().type) 901 { 902 case token::dash_dash: 903 { 904 if (r.graph_is_directed) 905 error("Using -- in directed graph"); 906 get(); 907 nodes_in_chain.push_back(parse_endpoint()); 908 leave_loop = false; 909 break; 910 } 911 case token::dash_greater: 912 { 913 if (!r.graph_is_directed) 914 error("Using -> in undirected graph"); 915 get(); 916 nodes_in_chain.push_back(parse_endpoint()); 917 leave_loop = false; 918 break; 919 } 920 default: 921 leave_loop = true; 922 break; 923 } 924 if (leave_loop) 925 break; 926 } 927 properties this_edge_props = current().def_edge_props; 928 if (peek().type == token::left_bracket) 929 parse_attr_list(this_edge_props); 930 BOOST_ASSERT(nodes_in_chain.size() 931 >= 2); // Should be in node parser otherwise 932 for (size_t i = 0; i + 1 < nodes_in_chain.size(); ++i) 933 { 934 do_orig_edge( 935 nodes_in_chain[i], nodes_in_chain[i + 1], this_edge_props); 936 } 937 } 938 939 // Do an edge from the file, the edge may need to be expanded if it 940 // connects to a subgraph do_orig_edgeboost::read_graphviz_detail::parser941 void do_orig_edge(const edge_endpoint& src, const edge_endpoint& tgt, 942 const properties& props) 943 { 944 std::set< node_and_port > sources = get_recursive_members(src); 945 std::set< node_and_port > targets = get_recursive_members(tgt); 946 for (std::set< node_and_port >::const_iterator i = sources.begin(); 947 i != sources.end(); ++i) 948 { 949 for (std::set< node_and_port >::const_iterator j 950 = targets.begin(); 951 j != targets.end(); ++j) 952 { 953 do_edge(*i, *j, props); 954 } 955 } 956 } 957 958 // Get nodes in an edge_endpoint, recursively get_recursive_membersboost::read_graphviz_detail::parser959 std::set< node_and_port > get_recursive_members( 960 const edge_endpoint& orig_ep) 961 { 962 std::set< node_and_port > result; 963 std::vector< edge_endpoint > worklist(1, orig_ep); 964 std::set< subgraph_name > done; 965 while (!worklist.empty()) 966 { 967 edge_endpoint ep = worklist.back(); 968 worklist.pop_back(); 969 if (ep.is_subgraph) 970 { 971 if (done.find(ep.subgraph_ep) == done.end()) 972 { 973 done.insert(ep.subgraph_ep); 974 std::map< subgraph_name, subgraph_info >::const_iterator 975 info_i 976 = subgraphs.find(ep.subgraph_ep); 977 if (info_i != subgraphs.end()) 978 { 979 const subgraph_member_list& members 980 = info_i->second.members; 981 for (subgraph_member_list::const_iterator i 982 = members.begin(); 983 i != members.end(); ++i) 984 { 985 node_or_subgraph_ref ref = *i; 986 if (ref.is_subgraph) 987 { 988 worklist.push_back( 989 edge_endpoint::subgraph(ref.name)); 990 } 991 else 992 { 993 node_and_port np; 994 np.name = ref.name; 995 worklist.push_back(edge_endpoint::node(np)); 996 } 997 } 998 } 999 } 1000 } 1001 else 1002 { 1003 result.insert(ep.node_ep); 1004 } 1005 } 1006 return result; 1007 } 1008 1009 // Do a fixed-up edge, with only nodes as endpoints do_edgeboost::read_graphviz_detail::parser1010 void do_edge(const node_and_port& src, const node_and_port& tgt, 1011 const properties& props) 1012 { 1013 if (r.graph_is_strict) 1014 { 1015 if (src.name == tgt.name) 1016 return; 1017 std::pair< node_name, node_name > tag(src.name, tgt.name); 1018 if (existing_edges.find(tag) != existing_edges.end()) 1019 { 1020 return; // Parallel edge 1021 } 1022 existing_edges.insert(tag); 1023 } 1024 edge_info e; 1025 e.source = src; 1026 e.target = tgt; 1027 e.props = props; 1028 r.edges.push_back(e); 1029 } 1030 parse_attr_listboost::read_graphviz_detail::parser1031 void parse_attr_list(properties& props) 1032 { 1033 while (true) 1034 { 1035 if (peek().type == token::left_bracket) 1036 get(); 1037 else 1038 error("Wanted left bracket to start attribute list"); 1039 while (true) 1040 { 1041 switch (peek().type) 1042 { 1043 case token::right_bracket: 1044 break; 1045 case token::identifier: 1046 { 1047 std::string lhs = get().normalized_value; 1048 std::string rhs = "true"; 1049 if (peek().type == token::equal) 1050 { 1051 get(); 1052 if (peek().type != token::identifier) 1053 error( 1054 "Wanted identifier as value of attribute"); 1055 rhs = get().normalized_value; 1056 } 1057 props[lhs] = rhs; 1058 break; 1059 } 1060 default: 1061 error("Wanted identifier as name of attribute"); 1062 } 1063 if (peek().type == token::comma 1064 || peek().type == token::semicolon) 1065 get(); 1066 else if (peek().type == token::right_bracket) 1067 break; 1068 } 1069 if (peek().type == token::right_bracket) 1070 get(); 1071 else 1072 error("Wanted right bracket to end attribute list"); 1073 if (peek().type != token::left_bracket) 1074 break; 1075 } 1076 } 1077 }; 1078 parse_graphviz_from_string(const std::string & str,parser_result & result,bool want_directed)1079 void parse_graphviz_from_string( 1080 const std::string& str, parser_result& result, bool want_directed) 1081 { 1082 parser p(str, result); 1083 p.parse_graph(want_directed); 1084 } 1085 1086 // Some debugging stuff operator <<(std::ostream & o,const node_and_port & n)1087 std::ostream& operator<<(std::ostream& o, const node_and_port& n) 1088 { 1089 o << n.name; 1090 for (size_t i = 0; i < n.location.size(); ++i) 1091 { 1092 o << ":" << n.location[i]; 1093 } 1094 if (!n.angle.empty()) 1095 o << "@" << n.angle; 1096 return o; 1097 } 1098 1099 // Can't be operator<< because properties is just an std::map props_to_string(const properties & props)1100 std::string props_to_string(const properties& props) 1101 { 1102 std::string result = "["; 1103 for (properties::const_iterator i = props.begin(); i != props.end(); 1104 ++i) 1105 { 1106 if (i != props.begin()) 1107 result += ", "; 1108 result += i->first + "=" + i->second; 1109 } 1110 result += "]"; 1111 return result; 1112 } 1113 translate_results_to_graph(const parser_result & r,::boost::detail::graph::mutate_graph * mg)1114 void translate_results_to_graph( 1115 const parser_result& r, ::boost::detail::graph::mutate_graph* mg) 1116 { 1117 typedef boost::detail::graph::edge_t edge; 1118 for (std::map< node_name, properties >::const_iterator i 1119 = r.nodes.begin(); 1120 i != r.nodes.end(); ++i) 1121 { 1122 // std::cerr << i->first << " " << props_to_string(i->second) << 1123 // std::endl; 1124 mg->do_add_vertex(i->first); 1125 for (properties::const_iterator j = i->second.begin(); 1126 j != i->second.end(); ++j) 1127 { 1128 mg->set_node_property(j->first, i->first, j->second); 1129 } 1130 } 1131 for (std::vector< edge_info >::const_iterator i = r.edges.begin(); 1132 i != r.edges.end(); ++i) 1133 { 1134 const edge_info& ei = *i; 1135 // std::cerr << ei.source << " -> " << ei.target << " " << 1136 // props_to_string(ei.props) << std::endl; 1137 edge e = edge::new_edge(); 1138 mg->do_add_edge(e, ei.source.name, ei.target.name); 1139 for (properties::const_iterator j = ei.props.begin(); 1140 j != ei.props.end(); ++j) 1141 { 1142 mg->set_edge_property(j->first, e, j->second); 1143 } 1144 } 1145 std::map< subgraph_name, properties >::const_iterator root_graph_props_i 1146 = r.graph_props.find("___root___"); 1147 BOOST_ASSERT( 1148 root_graph_props_i != r.graph_props.end()); // Should not happen 1149 const properties& root_graph_props = root_graph_props_i->second; 1150 // std::cerr << "ending graph " << props_to_string(root_graph_props) << 1151 // std::endl; 1152 for (properties::const_iterator i = root_graph_props.begin(); 1153 i != root_graph_props.end(); ++i) 1154 { 1155 mg->set_graph_property(i->first, i->second); 1156 } 1157 mg->finish_building_graph(); 1158 } 1159 1160 } // end namespace read_graphviz_detail 1161 1162 namespace detail 1163 { 1164 namespace graph 1165 { 1166 read_graphviz_new(const std::string & str,boost::detail::graph::mutate_graph * mg)1167 BOOST_GRAPH_DECL bool read_graphviz_new( 1168 const std::string& str, boost::detail::graph::mutate_graph* mg) 1169 { 1170 read_graphviz_detail::parser_result parsed_file; 1171 read_graphviz_detail::parse_graphviz_from_string( 1172 str, parsed_file, mg->is_directed()); 1173 read_graphviz_detail::translate_results_to_graph(parsed_file, mg); 1174 return true; 1175 } 1176 1177 } // end namespace graph 1178 } // end namespace detail 1179 1180 } // end namespace boost 1181 1182 // GraphViz format notes (tested using "dot version 1.13 (v16) (Mon August 23, 1183 // 2004)", grammar from references in read_graphviz_new.hpp): 1184 1185 // Subgraphs don't nest (all a0 subgraphs are the same thing), but a node or 1186 // subgraph can have multiple parents (sources online say that the layout 1187 // algorithms can't handle non-tree structures of clusters, but it seems to 1188 // read them the same from the file). The containment relation is required to 1189 // be a DAG, though; it appears that a node or subgraph can't be moved into an 1190 // ancestor of a subgraph where it already was (we don't enforce that but do a 1191 // DFS when finding members to prevent cycles). Nodes get their properties by 1192 // when they are first mentioned, and can only have them overridden after that 1193 // by explicit properties on that particular node. Closing and reopening the 1194 // same subgraph name adds to its members, and graph properties and node/edge 1195 // defaults are preserved in that subgraph. The members of a subgraph used in 1196 // an edge are gathered when the edge is read, even if new members are added to 1197 // the subgraph later. Ports are allowed in a lot more places in the grammar 1198 // than Dot uses. For example, declaring a node seems to ignore ports, and I 1199 // don't think it's possible to set properties on a particular port. Adding an 1200 // edge between two ports on the same node seems to make Dot unhappy (crashed 1201 // for me). 1202 1203 // Test graph for GraphViz behavior on strange combinations of subgraphs and 1204 // such. I don't have anywhere else to put this file. 1205 1206 #if 0 1207 dIGRaph foo { 1208 node [color=blue] 1209 subgraph a -> b 1210 subgraph a {c} 1211 subgraph a -> d 1212 subgraph a {node [color=red]; e} 1213 subgraph a -> f 1214 subgraph a {g} -> h 1215 subgraph a {node [color=green]; i} -> j 1216 subgraph a {node [color=yellow]} 1217 1218 subgraph a0 {node [color=black]; subgraph a1 {node [color=white]}} 1219 node [color=pink] zz 1220 subgraph a0 {x1} 1221 subgraph a0 {subgraph a1 {x2}} 1222 1223 subgraph a0 -> x3 1224 subgraph a0 {subgraph a1 -> x3} 1225 x3 1226 subgraph a0 {subgraph a0 {node [color=xxx]; x2} x7} 1227 x2 [color=yyy] 1228 subgraph cluster_ax {foo; subgraph a0} 1229 subgraph a0 {foo2} 1230 subgraph cluster_ax {foo3} 1231 // subgraph a0 -> subgraph a0 1232 1233 bgcolor=yellow 1234 subgraph cluster_a2 {y1} 1235 // y1:n -> y1:(s,3)@se 1236 y1@se [color=green] 1237 y1@n [color=red] 1238 } 1239 #endif 1240