1 #include <opencv2/opencv.hpp>
2
3 #include <string>
4 #include <iostream>
5 #include <fstream>
6 #include <vector>
7
8 #include <time.h>
9
10 using namespace cv;
11 using namespace cv::ml;
12 using namespace std;
13
14 void get_svm_detector(const Ptr<SVM>& svm, vector< float > & hog_detector );
15 void convert_to_ml(const std::vector< cv::Mat > & train_samples, cv::Mat& trainData );
16 void load_images( const string & prefix, const string & filename, vector< Mat > & img_lst );
17 void sample_neg( const vector< Mat > & full_neg_lst, vector< Mat > & neg_lst, const Size & size );
18 Mat get_hogdescriptor_visu(const Mat& color_origImg, vector<float>& descriptorValues, const Size & size );
19 void compute_hog( const vector< Mat > & img_lst, vector< Mat > & gradient_lst, const Size & size );
20 void train_svm( const vector< Mat > & gradient_lst, const vector< int > & labels );
21 void draw_locations( Mat & img, const vector< Rect > & locations, const Scalar & color );
22 void test_it( const Size & size );
23
get_svm_detector(const Ptr<SVM> & svm,vector<float> & hog_detector)24 void get_svm_detector(const Ptr<SVM>& svm, vector< float > & hog_detector )
25 {
26 // get the support vectors
27 Mat sv = svm->getSupportVectors();
28 const int sv_total = sv.rows;
29 // get the decision function
30 Mat alpha, svidx;
31 double rho = svm->getDecisionFunction(0, alpha, svidx);
32
33 CV_Assert( alpha.total() == 1 && svidx.total() == 1 && sv_total == 1 );
34 CV_Assert( (alpha.type() == CV_64F && alpha.at<double>(0) == 1.) ||
35 (alpha.type() == CV_32F && alpha.at<float>(0) == 1.f) );
36 CV_Assert( sv.type() == CV_32F );
37 hog_detector.clear();
38
39 hog_detector.resize(sv.cols + 1);
40 memcpy(&hog_detector[0], sv.ptr(), sv.cols*sizeof(hog_detector[0]));
41 hog_detector[sv.cols] = (float)-rho;
42 }
43
44
45 /*
46 * Convert training/testing set to be used by OpenCV Machine Learning algorithms.
47 * TrainData is a matrix of size (#samples x max(#cols,#rows) per samples), in 32FC1.
48 * Transposition of samples are made if needed.
49 */
convert_to_ml(const std::vector<cv::Mat> & train_samples,cv::Mat & trainData)50 void convert_to_ml(const std::vector< cv::Mat > & train_samples, cv::Mat& trainData )
51 {
52 //--Convert data
53 const int rows = (int)train_samples.size();
54 const int cols = (int)std::max( train_samples[0].cols, train_samples[0].rows );
55 cv::Mat tmp(1, cols, CV_32FC1); //< used for transposition if needed
56 trainData = cv::Mat(rows, cols, CV_32FC1 );
57 vector< Mat >::const_iterator itr = train_samples.begin();
58 vector< Mat >::const_iterator end = train_samples.end();
59 for( int i = 0 ; itr != end ; ++itr, ++i )
60 {
61 CV_Assert( itr->cols == 1 ||
62 itr->rows == 1 );
63 if( itr->cols == 1 )
64 {
65 transpose( *(itr), tmp );
66 tmp.copyTo( trainData.row( i ) );
67 }
68 else if( itr->rows == 1 )
69 {
70 itr->copyTo( trainData.row( i ) );
71 }
72 }
73 }
74
load_images(const string & prefix,const string & filename,vector<Mat> & img_lst)75 void load_images( const string & prefix, const string & filename, vector< Mat > & img_lst )
76 {
77 string line;
78 ifstream file;
79
80 file.open( (prefix+filename).c_str() );
81 if( !file.is_open() )
82 {
83 cerr << "Unable to open the list of images from " << filename << " filename." << endl;
84 exit( -1 );
85 }
86
87 bool end_of_parsing = false;
88 while( !end_of_parsing )
89 {
90 getline( file, line );
91 if( line == "" ) // no more file to read
92 {
93 end_of_parsing = true;
94 break;
95 }
96 Mat img = imread( (prefix+line).c_str() ); // load the image
97 if( img.empty() ) // invalid image, just skip it.
98 continue;
99 #ifdef _DEBUG
100 imshow( "image", img );
101 waitKey( 10 );
102 #endif
103 img_lst.push_back( img.clone() );
104 }
105 }
106
sample_neg(const vector<Mat> & full_neg_lst,vector<Mat> & neg_lst,const Size & size)107 void sample_neg( const vector< Mat > & full_neg_lst, vector< Mat > & neg_lst, const Size & size )
108 {
109 Rect box;
110 box.width = size.width;
111 box.height = size.height;
112
113 const int size_x = box.width;
114 const int size_y = box.height;
115
116 srand( (unsigned int)time( NULL ) );
117
118 vector< Mat >::const_iterator img = full_neg_lst.begin();
119 vector< Mat >::const_iterator end = full_neg_lst.end();
120 for( ; img != end ; ++img )
121 {
122 box.x = rand() % (img->cols - size_x);
123 box.y = rand() % (img->rows - size_y);
124 Mat roi = (*img)(box);
125 neg_lst.push_back( roi.clone() );
126 #ifdef _DEBUG
127 imshow( "img", roi.clone() );
128 waitKey( 10 );
129 #endif
130 }
131 }
132
133 // From http://www.juergenwiki.de/work/wiki/doku.php?id=public:hog_descriptor_computation_and_visualization
get_hogdescriptor_visu(const Mat & color_origImg,vector<float> & descriptorValues,const Size & size)134 Mat get_hogdescriptor_visu(const Mat& color_origImg, vector<float>& descriptorValues, const Size & size )
135 {
136 const int DIMX = size.width;
137 const int DIMY = size.height;
138 float zoomFac = 3;
139 Mat visu;
140 resize(color_origImg, visu, Size( (int)(color_origImg.cols*zoomFac), (int)(color_origImg.rows*zoomFac) ) );
141
142 int cellSize = 8;
143 int gradientBinSize = 9;
144 float radRangeForOneBin = (float)(CV_PI/(float)gradientBinSize); // dividing 180 into 9 bins, how large (in rad) is one bin?
145
146 // prepare data structure: 9 orientation / gradient strenghts for each cell
147 int cells_in_x_dir = DIMX / cellSize;
148 int cells_in_y_dir = DIMY / cellSize;
149 float*** gradientStrengths = new float**[cells_in_y_dir];
150 int** cellUpdateCounter = new int*[cells_in_y_dir];
151 for (int y=0; y<cells_in_y_dir; y++)
152 {
153 gradientStrengths[y] = new float*[cells_in_x_dir];
154 cellUpdateCounter[y] = new int[cells_in_x_dir];
155 for (int x=0; x<cells_in_x_dir; x++)
156 {
157 gradientStrengths[y][x] = new float[gradientBinSize];
158 cellUpdateCounter[y][x] = 0;
159
160 for (int bin=0; bin<gradientBinSize; bin++)
161 gradientStrengths[y][x][bin] = 0.0;
162 }
163 }
164
165 // nr of blocks = nr of cells - 1
166 // since there is a new block on each cell (overlapping blocks!) but the last one
167 int blocks_in_x_dir = cells_in_x_dir - 1;
168 int blocks_in_y_dir = cells_in_y_dir - 1;
169
170 // compute gradient strengths per cell
171 int descriptorDataIdx = 0;
172 int cellx = 0;
173 int celly = 0;
174
175 for (int blockx=0; blockx<blocks_in_x_dir; blockx++)
176 {
177 for (int blocky=0; blocky<blocks_in_y_dir; blocky++)
178 {
179 // 4 cells per block ...
180 for (int cellNr=0; cellNr<4; cellNr++)
181 {
182 // compute corresponding cell nr
183 cellx = blockx;
184 celly = blocky;
185 if (cellNr==1) celly++;
186 if (cellNr==2) cellx++;
187 if (cellNr==3)
188 {
189 cellx++;
190 celly++;
191 }
192
193 for (int bin=0; bin<gradientBinSize; bin++)
194 {
195 float gradientStrength = descriptorValues[ descriptorDataIdx ];
196 descriptorDataIdx++;
197
198 gradientStrengths[celly][cellx][bin] += gradientStrength;
199
200 } // for (all bins)
201
202
203 // note: overlapping blocks lead to multiple updates of this sum!
204 // we therefore keep track how often a cell was updated,
205 // to compute average gradient strengths
206 cellUpdateCounter[celly][cellx]++;
207
208 } // for (all cells)
209
210
211 } // for (all block x pos)
212 } // for (all block y pos)
213
214
215 // compute average gradient strengths
216 for (celly=0; celly<cells_in_y_dir; celly++)
217 {
218 for (cellx=0; cellx<cells_in_x_dir; cellx++)
219 {
220
221 float NrUpdatesForThisCell = (float)cellUpdateCounter[celly][cellx];
222
223 // compute average gradient strenghts for each gradient bin direction
224 for (int bin=0; bin<gradientBinSize; bin++)
225 {
226 gradientStrengths[celly][cellx][bin] /= NrUpdatesForThisCell;
227 }
228 }
229 }
230
231 // draw cells
232 for (celly=0; celly<cells_in_y_dir; celly++)
233 {
234 for (cellx=0; cellx<cells_in_x_dir; cellx++)
235 {
236 int drawX = cellx * cellSize;
237 int drawY = celly * cellSize;
238
239 int mx = drawX + cellSize/2;
240 int my = drawY + cellSize/2;
241
242 rectangle(visu, Point((int)(drawX*zoomFac), (int)(drawY*zoomFac)), Point((int)((drawX+cellSize)*zoomFac), (int)((drawY+cellSize)*zoomFac)), Scalar(100,100,100), 1);
243
244 // draw in each cell all 9 gradient strengths
245 for (int bin=0; bin<gradientBinSize; bin++)
246 {
247 float currentGradStrength = gradientStrengths[celly][cellx][bin];
248
249 // no line to draw?
250 if (currentGradStrength==0)
251 continue;
252
253 float currRad = bin * radRangeForOneBin + radRangeForOneBin/2;
254
255 float dirVecX = cos( currRad );
256 float dirVecY = sin( currRad );
257 float maxVecLen = (float)(cellSize/2.f);
258 float scale = 2.5; // just a visualization scale, to see the lines better
259
260 // compute line coordinates
261 float x1 = mx - dirVecX * currentGradStrength * maxVecLen * scale;
262 float y1 = my - dirVecY * currentGradStrength * maxVecLen * scale;
263 float x2 = mx + dirVecX * currentGradStrength * maxVecLen * scale;
264 float y2 = my + dirVecY * currentGradStrength * maxVecLen * scale;
265
266 // draw gradient visualization
267 line(visu, Point((int)(x1*zoomFac),(int)(y1*zoomFac)), Point((int)(x2*zoomFac),(int)(y2*zoomFac)), Scalar(0,255,0), 1);
268
269 } // for (all bins)
270
271 } // for (cellx)
272 } // for (celly)
273
274
275 // don't forget to free memory allocated by helper data structures!
276 for (int y=0; y<cells_in_y_dir; y++)
277 {
278 for (int x=0; x<cells_in_x_dir; x++)
279 {
280 delete[] gradientStrengths[y][x];
281 }
282 delete[] gradientStrengths[y];
283 delete[] cellUpdateCounter[y];
284 }
285 delete[] gradientStrengths;
286 delete[] cellUpdateCounter;
287
288 return visu;
289
290 } // get_hogdescriptor_visu
291
compute_hog(const vector<Mat> & img_lst,vector<Mat> & gradient_lst,const Size & size)292 void compute_hog( const vector< Mat > & img_lst, vector< Mat > & gradient_lst, const Size & size )
293 {
294 HOGDescriptor hog;
295 hog.winSize = size;
296 Mat gray;
297 vector< Point > location;
298 vector< float > descriptors;
299
300 vector< Mat >::const_iterator img = img_lst.begin();
301 vector< Mat >::const_iterator end = img_lst.end();
302 for( ; img != end ; ++img )
303 {
304 cvtColor( *img, gray, COLOR_BGR2GRAY );
305 hog.compute( gray, descriptors, Size( 8, 8 ), Size( 0, 0 ), location );
306 gradient_lst.push_back( Mat( descriptors ).clone() );
307 #ifdef _DEBUG
308 imshow( "gradient", get_hogdescriptor_visu( img->clone(), descriptors, size ) );
309 waitKey( 10 );
310 #endif
311 }
312 }
313
train_svm(const vector<Mat> & gradient_lst,const vector<int> & labels)314 void train_svm( const vector< Mat > & gradient_lst, const vector< int > & labels )
315 {
316
317 Mat train_data;
318 convert_to_ml( gradient_lst, train_data );
319
320 clog << "Start training...";
321 Ptr<SVM> svm = SVM::create();
322 /* Default values to train SVM */
323 svm->setCoef0(0.0);
324 svm->setDegree(3);
325 svm->setTermCriteria(TermCriteria( CV_TERMCRIT_ITER+CV_TERMCRIT_EPS, 1000, 1e-3 ));
326 svm->setGamma(0);
327 svm->setKernel(SVM::LINEAR);
328 svm->setNu(0.5);
329 svm->setP(0.1); // for EPSILON_SVR, epsilon in loss function?
330 svm->setC(0.01); // From paper, soft classifier
331 svm->setType(SVM::EPS_SVR); // C_SVC; // EPSILON_SVR; // may be also NU_SVR; // do regression task
332 svm->train(train_data, ROW_SAMPLE, Mat(labels));
333 clog << "...[done]" << endl;
334
335 svm->save( "my_people_detector.yml" );
336 }
337
draw_locations(Mat & img,const vector<Rect> & locations,const Scalar & color)338 void draw_locations( Mat & img, const vector< Rect > & locations, const Scalar & color )
339 {
340 if( !locations.empty() )
341 {
342 vector< Rect >::const_iterator loc = locations.begin();
343 vector< Rect >::const_iterator end = locations.end();
344 for( ; loc != end ; ++loc )
345 {
346 rectangle( img, *loc, color, 2 );
347 }
348 }
349 }
350
test_it(const Size & size)351 void test_it( const Size & size )
352 {
353 char key = 27;
354 Scalar reference( 0, 255, 0 );
355 Scalar trained( 0, 0, 255 );
356 Mat img, draw;
357 Ptr<SVM> svm;
358 HOGDescriptor hog;
359 HOGDescriptor my_hog;
360 my_hog.winSize = size;
361 VideoCapture video;
362 vector< Rect > locations;
363
364 // Load the trained SVM.
365 svm = StatModel::load<SVM>( "my_people_detector.yml" );
366 // Set the trained svm to my_hog
367 vector< float > hog_detector;
368 get_svm_detector( svm, hog_detector );
369 my_hog.setSVMDetector( hog_detector );
370 // Set the people detector.
371 hog.setSVMDetector( hog.getDefaultPeopleDetector() );
372 // Open the camera.
373 video.open(0);
374 if( !video.isOpened() )
375 {
376 cerr << "Unable to open the device 0" << endl;
377 exit( -1 );
378 }
379
380 bool end_of_process = false;
381 while( !end_of_process )
382 {
383 video >> img;
384 if( img.empty() )
385 break;
386
387 draw = img.clone();
388
389 locations.clear();
390 hog.detectMultiScale( img, locations );
391 draw_locations( draw, locations, reference );
392
393 locations.clear();
394 my_hog.detectMultiScale( img, locations );
395 draw_locations( draw, locations, trained );
396
397 imshow( "Video", draw );
398 key = (char)waitKey( 10 );
399 if( 27 == key )
400 end_of_process = true;
401 }
402 }
403
main(int argc,char ** argv)404 int main( int argc, char** argv )
405 {
406 if( argc != 5 )
407 {
408 cout << "Wrong number of parameters." << endl
409 << "Usage: " << argv[0] << " pos_dir pos.lst neg_dir neg.lst" << endl
410 << "example: " << argv[0] << " /INRIA_dataset/ Train/pos.lst /INRIA_dataset/ Train/neg.lst" << endl;
411 exit( -1 );
412 }
413 vector< Mat > pos_lst;
414 vector< Mat > full_neg_lst;
415 vector< Mat > neg_lst;
416 vector< Mat > gradient_lst;
417 vector< int > labels;
418
419 load_images( argv[1], argv[2], pos_lst );
420 labels.assign( pos_lst.size(), +1 );
421 const unsigned int old = (unsigned int)labels.size();
422 load_images( argv[3], argv[4], full_neg_lst );
423 sample_neg( full_neg_lst, neg_lst, Size( 96,160 ) );
424 labels.insert( labels.end(), neg_lst.size(), -1 );
425 CV_Assert( old < labels.size() );
426
427 compute_hog( pos_lst, gradient_lst, Size( 96, 160 ) );
428 compute_hog( neg_lst, gradient_lst, Size( 96, 160 ) );
429
430 train_svm( gradient_lst, labels );
431
432 test_it( Size( 96, 160 ) ); // change with your parameters
433
434 return 0;
435 }
436