1 // Copyright 2004 The Trustees of Indiana University.
2
3 // Use, modification and distribution is subject to the Boost Software
4 // License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
5 // http://www.boost.org/LICENSE_1_0.txt)
6
7 // Authors: Douglas Gregor
8 // Andrew Lumsdaine
9
10 // This program performs betweenness centrality (BC) clustering on the
11 // actor collaboration graph available at
12 // http://www.nd.edu/~networks/resources/actor/actor.dat.gz and outputs the
13 // result of clustering in Pajek format.
14 //
15 // This program mimics the BC clustering algorithm program implemented
16 // by Shashikant Penumarthy for JUNG, so that we may compare results
17 // and timings.
18 #include <boost/graph/bc_clustering.hpp>
19 #include <boost/graph/adjacency_list.hpp>
20 #include <boost/graph/graph_traits.hpp>
21 #include <fstream>
22 #include <iostream>
23 #include <string>
24 #include <boost/tokenizer.hpp>
25 #include <boost/lexical_cast.hpp>
26 #include <map>
27
28 using namespace boost;
29
30 struct Actor
31 {
ActorActor32 Actor(int id = -1) : id(id) {}
33
34 int id;
35 };
36
37 typedef adjacency_list< vecS, vecS, undirectedS, Actor,
38 property< edge_centrality_t, double > >
39 ActorGraph;
40 typedef graph_traits< ActorGraph >::vertex_descriptor Vertex;
41 typedef graph_traits< ActorGraph >::edge_descriptor Edge;
42
load_actor_graph(std::istream & in,ActorGraph & g)43 void load_actor_graph(std::istream& in, ActorGraph& g)
44 {
45 std::map< int, Vertex > actors;
46
47 std::string line;
48 while (getline(in, line))
49 {
50 std::vector< Vertex > actors_in_movie;
51
52 // Map from the actor numbers on this line to the actor vertices
53 typedef tokenizer< char_separator< char > > Tok;
54 Tok tok(line, char_separator< char >(" "));
55 for (Tok::iterator id = tok.begin(); id != tok.end(); ++id)
56 {
57 int actor_id = lexical_cast< int >(*id);
58 std::map< int, Vertex >::iterator v = actors.find(actor_id);
59 if (v == actors.end())
60 {
61 Vertex new_vertex = add_vertex(Actor(actor_id), g);
62 actors[actor_id] = new_vertex;
63 actors_in_movie.push_back(new_vertex);
64 }
65 else
66 {
67 actors_in_movie.push_back(v->second);
68 }
69 }
70
71 for (std::vector< Vertex >::iterator i = actors_in_movie.begin();
72 i != actors_in_movie.end(); ++i)
73 {
74 for (std::vector< Vertex >::iterator j = i + 1;
75 j != actors_in_movie.end(); ++j)
76 {
77 if (!edge(*i, *j, g).second)
78 add_edge(*i, *j, g);
79 }
80 }
81 }
82 }
83
84 template < typename Graph, typename VertexIndexMap, typename VertexNameMap >
write_pajek_graph(std::ostream & out,const Graph & g,VertexIndexMap vertex_index,VertexNameMap vertex_name)85 std::ostream& write_pajek_graph(std::ostream& out, const Graph& g,
86 VertexIndexMap vertex_index, VertexNameMap vertex_name)
87 {
88 out << "*Vertices " << num_vertices(g) << '\n';
89 typedef typename graph_traits< Graph >::vertex_iterator vertex_iterator;
90 for (vertex_iterator v = vertices(g).first; v != vertices(g).second; ++v)
91 {
92 out << get(vertex_index, *v) + 1 << " \"" << get(vertex_name, *v)
93 << "\"\n";
94 }
95
96 out << "*Edges\n";
97 typedef typename graph_traits< Graph >::edge_iterator edge_iterator;
98 for (edge_iterator e = edges(g).first; e != edges(g).second; ++e)
99 {
100 out << get(vertex_index, source(*e, g)) + 1 << ' '
101 << get(vertex_index, target(*e, g)) + 1 << " 1.0\n"; // HACK!
102 }
103 return out;
104 }
105
106 class actor_clustering_threshold : public bc_clustering_threshold< double >
107 {
108 typedef bc_clustering_threshold< double > inherited;
109
110 public:
actor_clustering_threshold(double threshold,const ActorGraph & g,bool normalize)111 actor_clustering_threshold(
112 double threshold, const ActorGraph& g, bool normalize)
113 : inherited(threshold, g, normalize), iter(1)
114 {
115 }
116
operator ()(double max_centrality,Edge e,const ActorGraph & g)117 bool operator()(double max_centrality, Edge e, const ActorGraph& g)
118 {
119 std::cout << "Iter: " << iter
120 << " Max Centrality: " << (max_centrality / dividend)
121 << std::endl;
122 ++iter;
123 return inherited::operator()(max_centrality, e, g);
124 }
125
126 private:
127 unsigned int iter;
128 };
129
main(int argc,char * argv[])130 int main(int argc, char* argv[])
131 {
132 std::string in_file;
133 std::string out_file;
134 double threshold = -1.0;
135 bool normalize = false;
136
137 // Parse command-line options
138 {
139 int on_arg = 1;
140 while (on_arg < argc)
141 {
142 std::string arg(argv[on_arg]);
143 if (arg == "-in")
144 {
145 ++on_arg;
146 assert(on_arg < argc);
147 in_file = argv[on_arg];
148 }
149 else if (arg == "-out")
150 {
151 ++on_arg;
152 assert(on_arg < argc);
153 out_file = argv[on_arg];
154 }
155 else if (arg == "-threshold")
156 {
157 ++on_arg;
158 assert(on_arg < argc);
159 threshold = lexical_cast< double >(argv[on_arg]);
160 }
161 else if (arg == "-normalize")
162 {
163 normalize = true;
164 }
165 else
166 {
167 std::cerr << "Unrecognized parameter \"" << arg << "\".\n";
168 return -1;
169 }
170 ++on_arg;
171 }
172
173 if (in_file.empty() || out_file.empty() || threshold < 0)
174 {
175 std::cerr << "error: syntax is actor_clustering [options]\n\n"
176 << "options are:\n"
177 << "\t-in <infile>\tInput file\n"
178 << "\t-out <outfile>\tOutput file\n"
179 << "\t-threshold <value>\tA threshold value\n"
180 << "\t-normalize\tNormalize edge centrality scores\n";
181 return -1;
182 }
183 }
184
185 ActorGraph g;
186
187 // Load the actor graph
188 {
189 std::cout << "Building graph." << std::endl;
190 std::ifstream in(in_file.c_str());
191 if (!in)
192 {
193 std::cerr << "Unable to open file \"" << in_file
194 << "\" for input.\n";
195 return -2;
196 }
197 load_actor_graph(in, g);
198 }
199
200 // Run the algorithm
201 std::cout << "Clusting..." << std::endl;
202 betweenness_centrality_clustering(g,
203 actor_clustering_threshold(threshold, g, normalize),
204 get(edge_centrality, g));
205
206 // Output the graph
207 {
208 std::cout << "Writing graph to file: " << out_file << std::endl;
209 std::ofstream out(out_file.c_str());
210 if (!out)
211 {
212 std::cerr << "Unable to open file \"" << out_file
213 << "\" for output.\n";
214 return -3;
215 }
216 write_pajek_graph(out, g, get(vertex_index, g), get(&Actor::id, g));
217 }
218 return 0;
219 }
220