• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2004 The Trustees of Indiana University.
2 
3 // Use, modification and distribution is subject to the Boost Software
4 // License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
5 // http://www.boost.org/LICENSE_1_0.txt)
6 
7 //  Authors: Douglas Gregor
8 //           Andrew Lumsdaine
9 
10 // This program performs betweenness centrality (BC) clustering on the
11 // actor collaboration graph available at
12 // http://www.nd.edu/~networks/resources/actor/actor.dat.gz and outputs the
13 // result of clustering in Pajek format.
14 //
15 // This program mimics the BC clustering algorithm program implemented
16 // by Shashikant Penumarthy for JUNG, so that we may compare results
17 // and timings.
18 #include <boost/graph/bc_clustering.hpp>
19 #include <boost/graph/adjacency_list.hpp>
20 #include <boost/graph/graph_traits.hpp>
21 #include <fstream>
22 #include <iostream>
23 #include <string>
24 #include <boost/tokenizer.hpp>
25 #include <boost/lexical_cast.hpp>
26 #include <map>
27 
28 using namespace boost;
29 
30 struct Actor
31 {
ActorActor32     Actor(int id = -1) : id(id) {}
33 
34     int id;
35 };
36 
37 typedef adjacency_list< vecS, vecS, undirectedS, Actor,
38     property< edge_centrality_t, double > >
39     ActorGraph;
40 typedef graph_traits< ActorGraph >::vertex_descriptor Vertex;
41 typedef graph_traits< ActorGraph >::edge_descriptor Edge;
42 
load_actor_graph(std::istream & in,ActorGraph & g)43 void load_actor_graph(std::istream& in, ActorGraph& g)
44 {
45     std::map< int, Vertex > actors;
46 
47     std::string line;
48     while (getline(in, line))
49     {
50         std::vector< Vertex > actors_in_movie;
51 
52         // Map from the actor numbers on this line to the actor vertices
53         typedef tokenizer< char_separator< char > > Tok;
54         Tok tok(line, char_separator< char >(" "));
55         for (Tok::iterator id = tok.begin(); id != tok.end(); ++id)
56         {
57             int actor_id = lexical_cast< int >(*id);
58             std::map< int, Vertex >::iterator v = actors.find(actor_id);
59             if (v == actors.end())
60             {
61                 Vertex new_vertex = add_vertex(Actor(actor_id), g);
62                 actors[actor_id] = new_vertex;
63                 actors_in_movie.push_back(new_vertex);
64             }
65             else
66             {
67                 actors_in_movie.push_back(v->second);
68             }
69         }
70 
71         for (std::vector< Vertex >::iterator i = actors_in_movie.begin();
72              i != actors_in_movie.end(); ++i)
73         {
74             for (std::vector< Vertex >::iterator j = i + 1;
75                  j != actors_in_movie.end(); ++j)
76             {
77                 if (!edge(*i, *j, g).second)
78                     add_edge(*i, *j, g);
79             }
80         }
81     }
82 }
83 
84 template < typename Graph, typename VertexIndexMap, typename VertexNameMap >
write_pajek_graph(std::ostream & out,const Graph & g,VertexIndexMap vertex_index,VertexNameMap vertex_name)85 std::ostream& write_pajek_graph(std::ostream& out, const Graph& g,
86     VertexIndexMap vertex_index, VertexNameMap vertex_name)
87 {
88     out << "*Vertices " << num_vertices(g) << '\n';
89     typedef typename graph_traits< Graph >::vertex_iterator vertex_iterator;
90     for (vertex_iterator v = vertices(g).first; v != vertices(g).second; ++v)
91     {
92         out << get(vertex_index, *v) + 1 << " \"" << get(vertex_name, *v)
93             << "\"\n";
94     }
95 
96     out << "*Edges\n";
97     typedef typename graph_traits< Graph >::edge_iterator edge_iterator;
98     for (edge_iterator e = edges(g).first; e != edges(g).second; ++e)
99     {
100         out << get(vertex_index, source(*e, g)) + 1 << ' '
101             << get(vertex_index, target(*e, g)) + 1 << " 1.0\n"; // HACK!
102     }
103     return out;
104 }
105 
106 class actor_clustering_threshold : public bc_clustering_threshold< double >
107 {
108     typedef bc_clustering_threshold< double > inherited;
109 
110 public:
actor_clustering_threshold(double threshold,const ActorGraph & g,bool normalize)111     actor_clustering_threshold(
112         double threshold, const ActorGraph& g, bool normalize)
113     : inherited(threshold, g, normalize), iter(1)
114     {
115     }
116 
operator ()(double max_centrality,Edge e,const ActorGraph & g)117     bool operator()(double max_centrality, Edge e, const ActorGraph& g)
118     {
119         std::cout << "Iter: " << iter
120                   << " Max Centrality: " << (max_centrality / dividend)
121                   << std::endl;
122         ++iter;
123         return inherited::operator()(max_centrality, e, g);
124     }
125 
126 private:
127     unsigned int iter;
128 };
129 
main(int argc,char * argv[])130 int main(int argc, char* argv[])
131 {
132     std::string in_file;
133     std::string out_file;
134     double threshold = -1.0;
135     bool normalize = false;
136 
137     // Parse command-line options
138     {
139         int on_arg = 1;
140         while (on_arg < argc)
141         {
142             std::string arg(argv[on_arg]);
143             if (arg == "-in")
144             {
145                 ++on_arg;
146                 assert(on_arg < argc);
147                 in_file = argv[on_arg];
148             }
149             else if (arg == "-out")
150             {
151                 ++on_arg;
152                 assert(on_arg < argc);
153                 out_file = argv[on_arg];
154             }
155             else if (arg == "-threshold")
156             {
157                 ++on_arg;
158                 assert(on_arg < argc);
159                 threshold = lexical_cast< double >(argv[on_arg]);
160             }
161             else if (arg == "-normalize")
162             {
163                 normalize = true;
164             }
165             else
166             {
167                 std::cerr << "Unrecognized parameter \"" << arg << "\".\n";
168                 return -1;
169             }
170             ++on_arg;
171         }
172 
173         if (in_file.empty() || out_file.empty() || threshold < 0)
174         {
175             std::cerr << "error: syntax is actor_clustering [options]\n\n"
176                       << "options are:\n"
177                       << "\t-in <infile>\tInput file\n"
178                       << "\t-out <outfile>\tOutput file\n"
179                       << "\t-threshold <value>\tA threshold value\n"
180                       << "\t-normalize\tNormalize edge centrality scores\n";
181             return -1;
182         }
183     }
184 
185     ActorGraph g;
186 
187     // Load the actor graph
188     {
189         std::cout << "Building graph." << std::endl;
190         std::ifstream in(in_file.c_str());
191         if (!in)
192         {
193             std::cerr << "Unable to open file \"" << in_file
194                       << "\" for input.\n";
195             return -2;
196         }
197         load_actor_graph(in, g);
198     }
199 
200     // Run the algorithm
201     std::cout << "Clusting..." << std::endl;
202     betweenness_centrality_clustering(g,
203         actor_clustering_threshold(threshold, g, normalize),
204         get(edge_centrality, g));
205 
206     // Output the graph
207     {
208         std::cout << "Writing graph to file: " << out_file << std::endl;
209         std::ofstream out(out_file.c_str());
210         if (!out)
211         {
212             std::cerr << "Unable to open file \"" << out_file
213                       << "\" for output.\n";
214             return -3;
215         }
216         write_pajek_graph(out, g, get(vertex_index, g), get(&Actor::id, g));
217     }
218     return 0;
219 }
220