1 #include "opencv2/core.hpp"
2 #include "opencv2/core/utility.hpp"
3
4 using cv::Size;
5 using cv::Mat;
6 using cv::Point;
7 using cv::FileStorage;
8 using cv::Rect;
9 using cv::Ptr;
10 using cv::FileNode;
11 using cv::Mat_;
12 using cv::Range;
13 using cv::FileNodeIterator;
14 using cv::ParallelLoopBody;
15
16
17 using cv::Size;
18 using cv::Mat;
19 using cv::Point;
20 using cv::FileStorage;
21 using cv::Rect;
22 using cv::Ptr;
23 using cv::FileNode;
24 using cv::Mat_;
25 using cv::Range;
26 using cv::FileNodeIterator;
27 using cv::ParallelLoopBody;
28
29
30 #include "boost.h"
31 #include "cascadeclassifier.h"
32 #include <queue>
33 #include "cxmisc.h"
34
35 #include "cvconfig.h"
36 #ifdef HAVE_TBB
37 # include "tbb/tbb_stddef.h"
38 # if TBB_VERSION_MAJOR*100 + TBB_VERSION_MINOR >= 202
39 # include "tbb/tbb.h"
40 # include "tbb/task.h"
41 # undef min
42 # undef max
43 # else
44 # undef HAVE_TBB
45 # endif
46 #endif
47
48 #ifdef HAVE_TBB
49 typedef tbb::blocked_range<int> BlockedRange;
50
51 template<typename Body> static inline
parallel_for(const BlockedRange & range,const Body & body)52 void parallel_for( const BlockedRange& range, const Body& body )
53 {
54 tbb::parallel_for(range, body);
55 }
56 #else
57 class BlockedRange
58 {
59 public:
BlockedRange()60 BlockedRange() : _begin(0), _end(0), _grainsize(0) {}
BlockedRange(int b,int e,int g=1)61 BlockedRange(int b, int e, int g=1) : _begin(b), _end(e), _grainsize(g) {}
begin() const62 int begin() const { return _begin; }
end() const63 int end() const { return _end; }
grainsize() const64 int grainsize() const { return _grainsize; }
65
66 protected:
67 int _begin, _end, _grainsize;
68 };
69
70 template<typename Body> static inline
parallel_for(const BlockedRange & range,const Body & body)71 void parallel_for( const BlockedRange& range, const Body& body )
72 {
73 body(range);
74 }
75 #endif
76
77 using namespace std;
78
79 static inline double
logRatio(double val)80 logRatio( double val )
81 {
82 const double eps = 1e-5;
83
84 val = max( val, eps );
85 val = min( val, 1. - eps );
86 return log( val/(1. - val) );
87 }
88
89 template<typename T, typename Idx>
90 class LessThanIdx
91 {
92 public:
LessThanIdx(const T * _arr)93 LessThanIdx( const T* _arr ) : arr(_arr) {}
operator ()(Idx a,Idx b) const94 bool operator()(Idx a, Idx b) const { return arr[a] < arr[b]; }
95 const T* arr;
96 };
97
cvAlign(int size,int align)98 static inline int cvAlign( int size, int align )
99 {
100 CV_DbgAssert( (align & (align-1)) == 0 && size < INT_MAX );
101 return (size + align - 1) & -align;
102 }
103
104 #define CV_THRESHOLD_EPS (0.00001F)
105
106 static const int MinBlockSize = 1 << 16;
107 static const int BlockSizeDelta = 1 << 10;
108
109 // TODO remove this code duplication with ml/precomp.hpp
110
icvCmpIntegers(const void * a,const void * b)111 static int CV_CDECL icvCmpIntegers( const void* a, const void* b )
112 {
113 return *(const int*)a - *(const int*)b;
114 }
115
cvPreprocessIndexArray(const CvMat * idx_arr,int data_arr_size,bool check_for_duplicates=false)116 static CvMat* cvPreprocessIndexArray( const CvMat* idx_arr, int data_arr_size, bool check_for_duplicates=false )
117 {
118 CvMat* idx = 0;
119
120 CV_FUNCNAME( "cvPreprocessIndexArray" );
121
122 __CV_BEGIN__;
123
124 int i, idx_total, idx_selected = 0, step, type, prev = INT_MIN, is_sorted = 1;
125 uchar* srcb = 0;
126 int* srci = 0;
127 int* dsti;
128
129 if( !CV_IS_MAT(idx_arr) )
130 CV_ERROR( CV_StsBadArg, "Invalid index array" );
131
132 if( idx_arr->rows != 1 && idx_arr->cols != 1 )
133 CV_ERROR( CV_StsBadSize, "the index array must be 1-dimensional" );
134
135 idx_total = idx_arr->rows + idx_arr->cols - 1;
136 srcb = idx_arr->data.ptr;
137 srci = idx_arr->data.i;
138
139 type = CV_MAT_TYPE(idx_arr->type);
140 step = CV_IS_MAT_CONT(idx_arr->type) ? 1 : idx_arr->step/CV_ELEM_SIZE(type);
141
142 switch( type )
143 {
144 case CV_8UC1:
145 case CV_8SC1:
146 // idx_arr is array of 1's and 0's -
147 // i.e. it is a mask of the selected components
148 if( idx_total != data_arr_size )
149 CV_ERROR( CV_StsUnmatchedSizes,
150 "Component mask should contain as many elements as the total number of input variables" );
151
152 for( i = 0; i < idx_total; i++ )
153 idx_selected += srcb[i*step] != 0;
154
155 if( idx_selected == 0 )
156 CV_ERROR( CV_StsOutOfRange, "No components/input_variables is selected!" );
157
158 break;
159 case CV_32SC1:
160 // idx_arr is array of integer indices of selected components
161 if( idx_total > data_arr_size )
162 CV_ERROR( CV_StsOutOfRange,
163 "index array may not contain more elements than the total number of input variables" );
164 idx_selected = idx_total;
165 // check if sorted already
166 for( i = 0; i < idx_total; i++ )
167 {
168 int val = srci[i*step];
169 if( val >= prev )
170 {
171 is_sorted = 0;
172 break;
173 }
174 prev = val;
175 }
176 break;
177 default:
178 CV_ERROR( CV_StsUnsupportedFormat, "Unsupported index array data type "
179 "(it should be 8uC1, 8sC1 or 32sC1)" );
180 }
181
182 CV_CALL( idx = cvCreateMat( 1, idx_selected, CV_32SC1 ));
183 dsti = idx->data.i;
184
185 if( type < CV_32SC1 )
186 {
187 for( i = 0; i < idx_total; i++ )
188 if( srcb[i*step] )
189 *dsti++ = i;
190 }
191 else
192 {
193 for( i = 0; i < idx_total; i++ )
194 dsti[i] = srci[i*step];
195
196 if( !is_sorted )
197 qsort( dsti, idx_total, sizeof(dsti[0]), icvCmpIntegers );
198
199 if( dsti[0] < 0 || dsti[idx_total-1] >= data_arr_size )
200 CV_ERROR( CV_StsOutOfRange, "the index array elements are out of range" );
201
202 if( check_for_duplicates )
203 {
204 for( i = 1; i < idx_total; i++ )
205 if( dsti[i] <= dsti[i-1] )
206 CV_ERROR( CV_StsBadArg, "There are duplicated index array elements" );
207 }
208 }
209
210 __CV_END__;
211
212 if( cvGetErrStatus() < 0 )
213 cvReleaseMat( &idx );
214
215 return idx;
216 }
217
218 //----------------------------- CascadeBoostParams -------------------------------------------------
219
CvCascadeBoostParams()220 CvCascadeBoostParams::CvCascadeBoostParams() : minHitRate( 0.995F), maxFalseAlarm( 0.5F )
221 {
222 boost_type = CvBoost::GENTLE;
223 use_surrogates = use_1se_rule = truncate_pruned_tree = false;
224 }
225
CvCascadeBoostParams(int _boostType,float _minHitRate,float _maxFalseAlarm,double _weightTrimRate,int _maxDepth,int _maxWeakCount)226 CvCascadeBoostParams::CvCascadeBoostParams( int _boostType,
227 float _minHitRate, float _maxFalseAlarm,
228 double _weightTrimRate, int _maxDepth, int _maxWeakCount ) :
229 CvBoostParams( _boostType, _maxWeakCount, _weightTrimRate, _maxDepth, false, 0 )
230 {
231 boost_type = CvBoost::GENTLE;
232 minHitRate = _minHitRate;
233 maxFalseAlarm = _maxFalseAlarm;
234 use_surrogates = use_1se_rule = truncate_pruned_tree = false;
235 }
236
write(FileStorage & fs) const237 void CvCascadeBoostParams::write( FileStorage &fs ) const
238 {
239 string boostTypeStr = boost_type == CvBoost::DISCRETE ? CC_DISCRETE_BOOST :
240 boost_type == CvBoost::REAL ? CC_REAL_BOOST :
241 boost_type == CvBoost::LOGIT ? CC_LOGIT_BOOST :
242 boost_type == CvBoost::GENTLE ? CC_GENTLE_BOOST : string();
243 CV_Assert( !boostTypeStr.empty() );
244 fs << CC_BOOST_TYPE << boostTypeStr;
245 fs << CC_MINHITRATE << minHitRate;
246 fs << CC_MAXFALSEALARM << maxFalseAlarm;
247 fs << CC_TRIM_RATE << weight_trim_rate;
248 fs << CC_MAX_DEPTH << max_depth;
249 fs << CC_WEAK_COUNT << weak_count;
250 }
251
read(const FileNode & node)252 bool CvCascadeBoostParams::read( const FileNode &node )
253 {
254 string boostTypeStr;
255 FileNode rnode = node[CC_BOOST_TYPE];
256 rnode >> boostTypeStr;
257 boost_type = !boostTypeStr.compare( CC_DISCRETE_BOOST ) ? CvBoost::DISCRETE :
258 !boostTypeStr.compare( CC_REAL_BOOST ) ? CvBoost::REAL :
259 !boostTypeStr.compare( CC_LOGIT_BOOST ) ? CvBoost::LOGIT :
260 !boostTypeStr.compare( CC_GENTLE_BOOST ) ? CvBoost::GENTLE : -1;
261 if (boost_type == -1)
262 CV_Error( CV_StsBadArg, "unsupported Boost type" );
263 node[CC_MINHITRATE] >> minHitRate;
264 node[CC_MAXFALSEALARM] >> maxFalseAlarm;
265 node[CC_TRIM_RATE] >> weight_trim_rate ;
266 node[CC_MAX_DEPTH] >> max_depth ;
267 node[CC_WEAK_COUNT] >> weak_count ;
268 if ( minHitRate <= 0 || minHitRate > 1 ||
269 maxFalseAlarm <= 0 || maxFalseAlarm > 1 ||
270 weight_trim_rate <= 0 || weight_trim_rate > 1 ||
271 max_depth <= 0 || weak_count <= 0 )
272 CV_Error( CV_StsBadArg, "bad parameters range");
273 return true;
274 }
275
printDefaults() const276 void CvCascadeBoostParams::printDefaults() const
277 {
278 cout << "--boostParams--" << endl;
279 cout << " [-bt <{" << CC_DISCRETE_BOOST << ", "
280 << CC_REAL_BOOST << ", "
281 << CC_LOGIT_BOOST ", "
282 << CC_GENTLE_BOOST << "(default)}>]" << endl;
283 cout << " [-minHitRate <min_hit_rate> = " << minHitRate << ">]" << endl;
284 cout << " [-maxFalseAlarmRate <max_false_alarm_rate = " << maxFalseAlarm << ">]" << endl;
285 cout << " [-weightTrimRate <weight_trim_rate = " << weight_trim_rate << ">]" << endl;
286 cout << " [-maxDepth <max_depth_of_weak_tree = " << max_depth << ">]" << endl;
287 cout << " [-maxWeakCount <max_weak_tree_count = " << weak_count << ">]" << endl;
288 }
289
printAttrs() const290 void CvCascadeBoostParams::printAttrs() const
291 {
292 string boostTypeStr = boost_type == CvBoost::DISCRETE ? CC_DISCRETE_BOOST :
293 boost_type == CvBoost::REAL ? CC_REAL_BOOST :
294 boost_type == CvBoost::LOGIT ? CC_LOGIT_BOOST :
295 boost_type == CvBoost::GENTLE ? CC_GENTLE_BOOST : string();
296 CV_Assert( !boostTypeStr.empty() );
297 cout << "boostType: " << boostTypeStr << endl;
298 cout << "minHitRate: " << minHitRate << endl;
299 cout << "maxFalseAlarmRate: " << maxFalseAlarm << endl;
300 cout << "weightTrimRate: " << weight_trim_rate << endl;
301 cout << "maxDepth: " << max_depth << endl;
302 cout << "maxWeakCount: " << weak_count << endl;
303 }
304
scanAttr(const string prmName,const string val)305 bool CvCascadeBoostParams::scanAttr( const string prmName, const string val)
306 {
307 bool res = true;
308
309 if( !prmName.compare( "-bt" ) )
310 {
311 boost_type = !val.compare( CC_DISCRETE_BOOST ) ? CvBoost::DISCRETE :
312 !val.compare( CC_REAL_BOOST ) ? CvBoost::REAL :
313 !val.compare( CC_LOGIT_BOOST ) ? CvBoost::LOGIT :
314 !val.compare( CC_GENTLE_BOOST ) ? CvBoost::GENTLE : -1;
315 if (boost_type == -1)
316 res = false;
317 }
318 else if( !prmName.compare( "-minHitRate" ) )
319 {
320 minHitRate = (float) atof( val.c_str() );
321 }
322 else if( !prmName.compare( "-maxFalseAlarmRate" ) )
323 {
324 maxFalseAlarm = (float) atof( val.c_str() );
325 }
326 else if( !prmName.compare( "-weightTrimRate" ) )
327 {
328 weight_trim_rate = (float) atof( val.c_str() );
329 }
330 else if( !prmName.compare( "-maxDepth" ) )
331 {
332 max_depth = atoi( val.c_str() );
333 }
334 else if( !prmName.compare( "-maxWeakCount" ) )
335 {
336 weak_count = atoi( val.c_str() );
337 }
338 else
339 res = false;
340
341 return res;
342 }
343
subsample_data(const CvMat * _subsample_idx)344 CvDTreeNode* CvCascadeBoostTrainData::subsample_data( const CvMat* _subsample_idx )
345 {
346 CvDTreeNode* root = 0;
347 CvMat* isubsample_idx = 0;
348 CvMat* subsample_co = 0;
349
350 bool isMakeRootCopy = true;
351
352 if( !data_root )
353 CV_Error( CV_StsError, "No training data has been set" );
354
355 if( _subsample_idx )
356 {
357 CV_Assert( (isubsample_idx = cvPreprocessIndexArray( _subsample_idx, sample_count )) != 0 );
358
359 if( isubsample_idx->cols + isubsample_idx->rows - 1 == sample_count )
360 {
361 const int* sidx = isubsample_idx->data.i;
362 for( int i = 0; i < sample_count; i++ )
363 {
364 if( sidx[i] != i )
365 {
366 isMakeRootCopy = false;
367 break;
368 }
369 }
370 }
371 else
372 isMakeRootCopy = false;
373 }
374
375 if( isMakeRootCopy )
376 {
377 // make a copy of the root node
378 CvDTreeNode temp;
379 int i;
380 root = new_node( 0, 1, 0, 0 );
381 temp = *root;
382 *root = *data_root;
383 root->num_valid = temp.num_valid;
384 if( root->num_valid )
385 {
386 for( i = 0; i < var_count; i++ )
387 root->num_valid[i] = data_root->num_valid[i];
388 }
389 root->cv_Tn = temp.cv_Tn;
390 root->cv_node_risk = temp.cv_node_risk;
391 root->cv_node_error = temp.cv_node_error;
392 }
393 else
394 {
395 int* sidx = isubsample_idx->data.i;
396 // co - array of count/offset pairs (to handle duplicated values in _subsample_idx)
397 int* co, cur_ofs = 0;
398 int workVarCount = get_work_var_count();
399 int count = isubsample_idx->rows + isubsample_idx->cols - 1;
400
401 root = new_node( 0, count, 1, 0 );
402
403 CV_Assert( (subsample_co = cvCreateMat( 1, sample_count*2, CV_32SC1 )) != 0);
404 cvZero( subsample_co );
405 co = subsample_co->data.i;
406 for( int i = 0; i < count; i++ )
407 co[sidx[i]*2]++;
408 for( int i = 0; i < sample_count; i++ )
409 {
410 if( co[i*2] )
411 {
412 co[i*2+1] = cur_ofs;
413 cur_ofs += co[i*2];
414 }
415 else
416 co[i*2+1] = -1;
417 }
418
419 cv::AutoBuffer<uchar> inn_buf(sample_count*(2*sizeof(int) + sizeof(float)));
420 // subsample ordered variables
421 for( int vi = 0; vi < numPrecalcIdx; vi++ )
422 {
423 int ci = get_var_type(vi);
424 CV_Assert( ci < 0 );
425
426 int *src_idx_buf = (int*)(uchar*)inn_buf;
427 float *src_val_buf = (float*)(src_idx_buf + sample_count);
428 int* sample_indices_buf = (int*)(src_val_buf + sample_count);
429 const int* src_idx = 0;
430 const float* src_val = 0;
431 get_ord_var_data( data_root, vi, src_val_buf, src_idx_buf, &src_val, &src_idx, sample_indices_buf );
432
433 int j = 0, idx, count_i;
434 int num_valid = data_root->get_num_valid(vi);
435 CV_Assert( num_valid == sample_count );
436
437 if (is_buf_16u)
438 {
439 unsigned short* udst_idx = (unsigned short*)(buf->data.s + root->buf_idx*get_length_subbuf() +
440 (size_t)vi*sample_count + data_root->offset);
441 for( int i = 0; i < num_valid; i++ )
442 {
443 idx = src_idx[i];
444 count_i = co[idx*2];
445 if( count_i )
446 for( cur_ofs = co[idx*2+1]; count_i > 0; count_i--, j++, cur_ofs++ )
447 udst_idx[j] = (unsigned short)cur_ofs;
448 }
449 }
450 else
451 {
452 int* idst_idx = buf->data.i + root->buf_idx*get_length_subbuf() +
453 (size_t)vi*sample_count + root->offset;
454 for( int i = 0; i < num_valid; i++ )
455 {
456 idx = src_idx[i];
457 count_i = co[idx*2];
458 if( count_i )
459 for( cur_ofs = co[idx*2+1]; count_i > 0; count_i--, j++, cur_ofs++ )
460 idst_idx[j] = cur_ofs;
461 }
462 }
463 }
464
465 // subsample cv_lables
466 const int* src_lbls = get_cv_labels(data_root, (int*)(uchar*)inn_buf);
467 if (is_buf_16u)
468 {
469 unsigned short* udst = (unsigned short*)(buf->data.s + root->buf_idx*get_length_subbuf() +
470 (size_t)(workVarCount-1)*sample_count + root->offset);
471 for( int i = 0; i < count; i++ )
472 udst[i] = (unsigned short)src_lbls[sidx[i]];
473 }
474 else
475 {
476 int* idst = buf->data.i + root->buf_idx*get_length_subbuf() +
477 (size_t)(workVarCount-1)*sample_count + root->offset;
478 for( int i = 0; i < count; i++ )
479 idst[i] = src_lbls[sidx[i]];
480 }
481
482 // subsample sample_indices
483 const int* sample_idx_src = get_sample_indices(data_root, (int*)(uchar*)inn_buf);
484 if (is_buf_16u)
485 {
486 unsigned short* sample_idx_dst = (unsigned short*)(buf->data.s + root->buf_idx*get_length_subbuf() +
487 (size_t)workVarCount*sample_count + root->offset);
488 for( int i = 0; i < count; i++ )
489 sample_idx_dst[i] = (unsigned short)sample_idx_src[sidx[i]];
490 }
491 else
492 {
493 int* sample_idx_dst = buf->data.i + root->buf_idx*get_length_subbuf() +
494 (size_t)workVarCount*sample_count + root->offset;
495 for( int i = 0; i < count; i++ )
496 sample_idx_dst[i] = sample_idx_src[sidx[i]];
497 }
498
499 for( int vi = 0; vi < var_count; vi++ )
500 root->set_num_valid(vi, count);
501 }
502
503 cvReleaseMat( &isubsample_idx );
504 cvReleaseMat( &subsample_co );
505
506 return root;
507 }
508
509 //---------------------------- CascadeBoostTrainData -----------------------------
510
CvCascadeBoostTrainData(const CvFeatureEvaluator * _featureEvaluator,const CvDTreeParams & _params)511 CvCascadeBoostTrainData::CvCascadeBoostTrainData( const CvFeatureEvaluator* _featureEvaluator,
512 const CvDTreeParams& _params )
513 {
514 is_classifier = true;
515 var_all = var_count = (int)_featureEvaluator->getNumFeatures();
516
517 featureEvaluator = _featureEvaluator;
518 shared = true;
519 set_params( _params );
520 max_c_count = MAX( 2, featureEvaluator->getMaxCatCount() );
521 var_type = cvCreateMat( 1, var_count + 2, CV_32SC1 );
522 if ( featureEvaluator->getMaxCatCount() > 0 )
523 {
524 numPrecalcIdx = 0;
525 cat_var_count = var_count;
526 ord_var_count = 0;
527 for( int vi = 0; vi < var_count; vi++ )
528 {
529 var_type->data.i[vi] = vi;
530 }
531 }
532 else
533 {
534 cat_var_count = 0;
535 ord_var_count = var_count;
536 for( int vi = 1; vi <= var_count; vi++ )
537 {
538 var_type->data.i[vi-1] = -vi;
539 }
540 }
541 var_type->data.i[var_count] = cat_var_count;
542 var_type->data.i[var_count+1] = cat_var_count+1;
543
544 int maxSplitSize = cvAlign(sizeof(CvDTreeSplit) + (MAX(0,max_c_count - 33)/32)*sizeof(int),sizeof(void*));
545 int treeBlockSize = MAX((int)sizeof(CvDTreeNode)*8, maxSplitSize);
546 treeBlockSize = MAX(treeBlockSize + BlockSizeDelta, MinBlockSize);
547 tree_storage = cvCreateMemStorage( treeBlockSize );
548 node_heap = cvCreateSet( 0, sizeof(node_heap[0]), sizeof(CvDTreeNode), tree_storage );
549 split_heap = cvCreateSet( 0, sizeof(split_heap[0]), maxSplitSize, tree_storage );
550 }
551
CvCascadeBoostTrainData(const CvFeatureEvaluator * _featureEvaluator,int _numSamples,int _precalcValBufSize,int _precalcIdxBufSize,const CvDTreeParams & _params)552 CvCascadeBoostTrainData::CvCascadeBoostTrainData( const CvFeatureEvaluator* _featureEvaluator,
553 int _numSamples,
554 int _precalcValBufSize, int _precalcIdxBufSize,
555 const CvDTreeParams& _params )
556 {
557 setData( _featureEvaluator, _numSamples, _precalcValBufSize, _precalcIdxBufSize, _params );
558 }
559
setData(const CvFeatureEvaluator * _featureEvaluator,int _numSamples,int _precalcValBufSize,int _precalcIdxBufSize,const CvDTreeParams & _params)560 void CvCascadeBoostTrainData::setData( const CvFeatureEvaluator* _featureEvaluator,
561 int _numSamples,
562 int _precalcValBufSize, int _precalcIdxBufSize,
563 const CvDTreeParams& _params )
564 {
565 int* idst = 0;
566 unsigned short* udst = 0;
567
568 uint64 effective_buf_size = 0;
569 int effective_buf_height = 0, effective_buf_width = 0;
570
571
572 clear();
573 shared = true;
574 have_labels = true;
575 have_priors = false;
576 is_classifier = true;
577
578 rng = &cv::theRNG();
579
580 set_params( _params );
581
582 CV_Assert( _featureEvaluator );
583 featureEvaluator = _featureEvaluator;
584
585 max_c_count = MAX( 2, featureEvaluator->getMaxCatCount() );
586 _resp = featureEvaluator->getCls();
587 responses = &_resp;
588 // TODO: check responses: elements must be 0 or 1
589
590 if( _precalcValBufSize < 0 || _precalcIdxBufSize < 0)
591 CV_Error( CV_StsOutOfRange, "_numPrecalcVal and _numPrecalcIdx must be positive or 0" );
592
593 var_count = var_all = featureEvaluator->getNumFeatures() * featureEvaluator->getFeatureSize();
594 sample_count = _numSamples;
595
596 is_buf_16u = false;
597 if (sample_count < 65536)
598 is_buf_16u = true;
599
600 numPrecalcVal = min( cvRound((double)_precalcValBufSize*1048576. / (sizeof(float)*sample_count)), var_count );
601 numPrecalcIdx = min( cvRound((double)_precalcIdxBufSize*1048576. /
602 ((is_buf_16u ? sizeof(unsigned short) : sizeof (int))*sample_count)), var_count );
603
604 assert( numPrecalcIdx >= 0 && numPrecalcVal >= 0 );
605
606 valCache.create( numPrecalcVal, sample_count, CV_32FC1 );
607 var_type = cvCreateMat( 1, var_count + 2, CV_32SC1 );
608
609 if ( featureEvaluator->getMaxCatCount() > 0 )
610 {
611 numPrecalcIdx = 0;
612 cat_var_count = var_count;
613 ord_var_count = 0;
614 for( int vi = 0; vi < var_count; vi++ )
615 {
616 var_type->data.i[vi] = vi;
617 }
618 }
619 else
620 {
621 cat_var_count = 0;
622 ord_var_count = var_count;
623 for( int vi = 1; vi <= var_count; vi++ )
624 {
625 var_type->data.i[vi-1] = -vi;
626 }
627 }
628 var_type->data.i[var_count] = cat_var_count;
629 var_type->data.i[var_count+1] = cat_var_count+1;
630 work_var_count = ( cat_var_count ? 0 : numPrecalcIdx ) + 1/*cv_lables*/;
631 buf_count = 2;
632
633 buf_size = -1; // the member buf_size is obsolete
634
635 effective_buf_size = (uint64)(work_var_count + 1)*(uint64)sample_count * buf_count; // this is the total size of "CvMat buf" to be allocated
636 effective_buf_width = sample_count;
637 effective_buf_height = work_var_count+1;
638
639 if (effective_buf_width >= effective_buf_height)
640 effective_buf_height *= buf_count;
641 else
642 effective_buf_width *= buf_count;
643
644 if ((uint64)effective_buf_width * (uint64)effective_buf_height != effective_buf_size)
645 {
646 CV_Error(CV_StsBadArg, "The memory buffer cannot be allocated since its size exceeds integer fields limit");
647 }
648
649 if ( is_buf_16u )
650 buf = cvCreateMat( effective_buf_height, effective_buf_width, CV_16UC1 );
651 else
652 buf = cvCreateMat( effective_buf_height, effective_buf_width, CV_32SC1 );
653
654 cat_count = cvCreateMat( 1, cat_var_count + 1, CV_32SC1 );
655
656 // precalculate valCache and set indices in buf
657 precalculate();
658
659 // now calculate the maximum size of split,
660 // create memory storage that will keep nodes and splits of the decision tree
661 // allocate root node and the buffer for the whole training data
662 int maxSplitSize = cvAlign(sizeof(CvDTreeSplit) +
663 (MAX(0,sample_count - 33)/32)*sizeof(int),sizeof(void*));
664 int treeBlockSize = MAX((int)sizeof(CvDTreeNode)*8, maxSplitSize);
665 treeBlockSize = MAX(treeBlockSize + BlockSizeDelta, MinBlockSize);
666 tree_storage = cvCreateMemStorage( treeBlockSize );
667 node_heap = cvCreateSet( 0, sizeof(*node_heap), sizeof(CvDTreeNode), tree_storage );
668
669 int nvSize = var_count*sizeof(int);
670 nvSize = cvAlign(MAX( nvSize, (int)sizeof(CvSetElem) ), sizeof(void*));
671 int tempBlockSize = nvSize;
672 tempBlockSize = MAX( tempBlockSize + BlockSizeDelta, MinBlockSize );
673 temp_storage = cvCreateMemStorage( tempBlockSize );
674 nv_heap = cvCreateSet( 0, sizeof(*nv_heap), nvSize, temp_storage );
675
676 data_root = new_node( 0, sample_count, 0, 0 );
677
678 // set sample labels
679 if (is_buf_16u)
680 udst = (unsigned short*)(buf->data.s + (size_t)work_var_count*sample_count);
681 else
682 idst = buf->data.i + (size_t)work_var_count*sample_count;
683
684 for (int si = 0; si < sample_count; si++)
685 {
686 if (udst)
687 udst[si] = (unsigned short)si;
688 else
689 idst[si] = si;
690 }
691 for( int vi = 0; vi < var_count; vi++ )
692 data_root->set_num_valid(vi, sample_count);
693 for( int vi = 0; vi < cat_var_count; vi++ )
694 cat_count->data.i[vi] = max_c_count;
695
696 cat_count->data.i[cat_var_count] = 2;
697
698 maxSplitSize = cvAlign(sizeof(CvDTreeSplit) +
699 (MAX(0,max_c_count - 33)/32)*sizeof(int),sizeof(void*));
700 split_heap = cvCreateSet( 0, sizeof(*split_heap), maxSplitSize, tree_storage );
701
702 priors = cvCreateMat( 1, get_num_classes(), CV_64F );
703 cvSet(priors, cvScalar(1));
704 priors_mult = cvCloneMat( priors );
705 counts = cvCreateMat( 1, get_num_classes(), CV_32SC1 );
706 direction = cvCreateMat( 1, sample_count, CV_8UC1 );
707 split_buf = cvCreateMat( 1, sample_count, CV_32SC1 );//TODO: make a pointer
708 }
709
free_train_data()710 void CvCascadeBoostTrainData::free_train_data()
711 {
712 CvDTreeTrainData::free_train_data();
713 valCache.release();
714 }
715
get_class_labels(CvDTreeNode * n,int * labelsBuf)716 const int* CvCascadeBoostTrainData::get_class_labels( CvDTreeNode* n, int* labelsBuf)
717 {
718 int nodeSampleCount = n->sample_count;
719 int rStep = CV_IS_MAT_CONT( responses->type ) ? 1 : responses->step / CV_ELEM_SIZE( responses->type );
720
721 int* sampleIndicesBuf = labelsBuf; //
722 const int* sampleIndices = get_sample_indices(n, sampleIndicesBuf);
723 for( int si = 0; si < nodeSampleCount; si++ )
724 {
725 int sidx = sampleIndices[si];
726 labelsBuf[si] = (int)responses->data.fl[sidx*rStep];
727 }
728 return labelsBuf;
729 }
730
get_sample_indices(CvDTreeNode * n,int * indicesBuf)731 const int* CvCascadeBoostTrainData::get_sample_indices( CvDTreeNode* n, int* indicesBuf )
732 {
733 return CvDTreeTrainData::get_cat_var_data( n, get_work_var_count(), indicesBuf );
734 }
735
get_cv_labels(CvDTreeNode * n,int * labels_buf)736 const int* CvCascadeBoostTrainData::get_cv_labels( CvDTreeNode* n, int* labels_buf )
737 {
738 return CvDTreeTrainData::get_cat_var_data( n, get_work_var_count() - 1, labels_buf );
739 }
740
get_ord_var_data(CvDTreeNode * n,int vi,float * ordValuesBuf,int * sortedIndicesBuf,const float ** ordValues,const int ** sortedIndices,int * sampleIndicesBuf)741 void CvCascadeBoostTrainData::get_ord_var_data( CvDTreeNode* n, int vi, float* ordValuesBuf, int* sortedIndicesBuf,
742 const float** ordValues, const int** sortedIndices, int* sampleIndicesBuf )
743 {
744 int nodeSampleCount = n->sample_count;
745 const int* sampleIndices = get_sample_indices(n, sampleIndicesBuf);
746
747 if ( vi < numPrecalcIdx )
748 {
749 if( !is_buf_16u )
750 *sortedIndices = buf->data.i + n->buf_idx*get_length_subbuf() + (size_t)vi*sample_count + n->offset;
751 else
752 {
753 const unsigned short* shortIndices = (const unsigned short*)(buf->data.s + n->buf_idx*get_length_subbuf() +
754 (size_t)vi*sample_count + n->offset );
755 for( int i = 0; i < nodeSampleCount; i++ )
756 sortedIndicesBuf[i] = shortIndices[i];
757
758 *sortedIndices = sortedIndicesBuf;
759 }
760
761 if( vi < numPrecalcVal )
762 {
763 for( int i = 0; i < nodeSampleCount; i++ )
764 {
765 int idx = (*sortedIndices)[i];
766 idx = sampleIndices[idx];
767 ordValuesBuf[i] = valCache.at<float>( vi, idx);
768 }
769 }
770 else
771 {
772 for( int i = 0; i < nodeSampleCount; i++ )
773 {
774 int idx = (*sortedIndices)[i];
775 idx = sampleIndices[idx];
776 ordValuesBuf[i] = (*featureEvaluator)( vi, idx);
777 }
778 }
779 }
780 else // vi >= numPrecalcIdx
781 {
782 cv::AutoBuffer<float> abuf(nodeSampleCount);
783 float* sampleValues = &abuf[0];
784
785 if ( vi < numPrecalcVal )
786 {
787 for( int i = 0; i < nodeSampleCount; i++ )
788 {
789 sortedIndicesBuf[i] = i;
790 sampleValues[i] = valCache.at<float>( vi, sampleIndices[i] );
791 }
792 }
793 else
794 {
795 for( int i = 0; i < nodeSampleCount; i++ )
796 {
797 sortedIndicesBuf[i] = i;
798 sampleValues[i] = (*featureEvaluator)( vi, sampleIndices[i]);
799 }
800 }
801 std::sort(sortedIndicesBuf, sortedIndicesBuf + nodeSampleCount, LessThanIdx<float, int>(&sampleValues[0]) );
802 for( int i = 0; i < nodeSampleCount; i++ )
803 ordValuesBuf[i] = (&sampleValues[0])[sortedIndicesBuf[i]];
804 *sortedIndices = sortedIndicesBuf;
805 }
806
807 *ordValues = ordValuesBuf;
808 }
809
get_cat_var_data(CvDTreeNode * n,int vi,int * catValuesBuf)810 const int* CvCascadeBoostTrainData::get_cat_var_data( CvDTreeNode* n, int vi, int* catValuesBuf )
811 {
812 int nodeSampleCount = n->sample_count;
813 int* sampleIndicesBuf = catValuesBuf; //
814 const int* sampleIndices = get_sample_indices(n, sampleIndicesBuf);
815
816 if ( vi < numPrecalcVal )
817 {
818 for( int i = 0; i < nodeSampleCount; i++ )
819 catValuesBuf[i] = (int) valCache.at<float>( vi, sampleIndices[i]);
820 }
821 else
822 {
823 if( vi >= numPrecalcVal && vi < var_count )
824 {
825 for( int i = 0; i < nodeSampleCount; i++ )
826 catValuesBuf[i] = (int)(*featureEvaluator)( vi, sampleIndices[i] );
827 }
828 else
829 {
830 get_cv_labels( n, catValuesBuf );
831 }
832 }
833
834 return catValuesBuf;
835 }
836
getVarValue(int vi,int si)837 float CvCascadeBoostTrainData::getVarValue( int vi, int si )
838 {
839 if ( vi < numPrecalcVal && !valCache.empty() )
840 return valCache.at<float>( vi, si );
841 return (*featureEvaluator)( vi, si );
842 }
843
844
845 struct FeatureIdxOnlyPrecalc : ParallelLoopBody
846 {
FeatureIdxOnlyPrecalcFeatureIdxOnlyPrecalc847 FeatureIdxOnlyPrecalc( const CvFeatureEvaluator* _featureEvaluator, CvMat* _buf, int _sample_count, bool _is_buf_16u )
848 {
849 featureEvaluator = _featureEvaluator;
850 sample_count = _sample_count;
851 udst = (unsigned short*)_buf->data.s;
852 idst = _buf->data.i;
853 is_buf_16u = _is_buf_16u;
854 }
operator ()FeatureIdxOnlyPrecalc855 void operator()( const Range& range ) const
856 {
857 cv::AutoBuffer<float> valCache(sample_count);
858 float* valCachePtr = (float*)valCache;
859 for ( int fi = range.start; fi < range.end; fi++)
860 {
861 for( int si = 0; si < sample_count; si++ )
862 {
863 valCachePtr[si] = (*featureEvaluator)( fi, si );
864 if ( is_buf_16u )
865 *(udst + (size_t)fi*sample_count + si) = (unsigned short)si;
866 else
867 *(idst + (size_t)fi*sample_count + si) = si;
868 }
869 if ( is_buf_16u )
870 std::sort(udst + (size_t)fi*sample_count, udst + (size_t)(fi + 1)*sample_count, LessThanIdx<float, unsigned short>(valCachePtr) );
871 else
872 std::sort(idst + (size_t)fi*sample_count, idst + (size_t)(fi + 1)*sample_count, LessThanIdx<float, int>(valCachePtr) );
873 }
874 }
875 const CvFeatureEvaluator* featureEvaluator;
876 int sample_count;
877 int* idst;
878 unsigned short* udst;
879 bool is_buf_16u;
880 };
881
882 struct FeatureValAndIdxPrecalc : ParallelLoopBody
883 {
FeatureValAndIdxPrecalcFeatureValAndIdxPrecalc884 FeatureValAndIdxPrecalc( const CvFeatureEvaluator* _featureEvaluator, CvMat* _buf, Mat* _valCache, int _sample_count, bool _is_buf_16u )
885 {
886 featureEvaluator = _featureEvaluator;
887 valCache = _valCache;
888 sample_count = _sample_count;
889 udst = (unsigned short*)_buf->data.s;
890 idst = _buf->data.i;
891 is_buf_16u = _is_buf_16u;
892 }
operator ()FeatureValAndIdxPrecalc893 void operator()( const Range& range ) const
894 {
895 for ( int fi = range.start; fi < range.end; fi++)
896 {
897 for( int si = 0; si < sample_count; si++ )
898 {
899 valCache->at<float>(fi,si) = (*featureEvaluator)( fi, si );
900 if ( is_buf_16u )
901 *(udst + (size_t)fi*sample_count + si) = (unsigned short)si;
902 else
903 *(idst + (size_t)fi*sample_count + si) = si;
904 }
905 if ( is_buf_16u )
906 std::sort(udst + (size_t)fi*sample_count, udst + (size_t)(fi + 1)*sample_count, LessThanIdx<float, unsigned short>(valCache->ptr<float>(fi)) );
907 else
908 std::sort(idst + (size_t)fi*sample_count, idst + (size_t)(fi + 1)*sample_count, LessThanIdx<float, int>(valCache->ptr<float>(fi)) );
909 }
910 }
911 const CvFeatureEvaluator* featureEvaluator;
912 Mat* valCache;
913 int sample_count;
914 int* idst;
915 unsigned short* udst;
916 bool is_buf_16u;
917 };
918
919 struct FeatureValOnlyPrecalc : ParallelLoopBody
920 {
FeatureValOnlyPrecalcFeatureValOnlyPrecalc921 FeatureValOnlyPrecalc( const CvFeatureEvaluator* _featureEvaluator, Mat* _valCache, int _sample_count )
922 {
923 featureEvaluator = _featureEvaluator;
924 valCache = _valCache;
925 sample_count = _sample_count;
926 }
operator ()FeatureValOnlyPrecalc927 void operator()( const Range& range ) const
928 {
929 for ( int fi = range.start; fi < range.end; fi++)
930 for( int si = 0; si < sample_count; si++ )
931 valCache->at<float>(fi,si) = (*featureEvaluator)( fi, si );
932 }
933 const CvFeatureEvaluator* featureEvaluator;
934 Mat* valCache;
935 int sample_count;
936 };
937
precalculate()938 void CvCascadeBoostTrainData::precalculate()
939 {
940 int minNum = MIN( numPrecalcVal, numPrecalcIdx);
941
942 double proctime = -TIME( 0 );
943 parallel_for_( Range(numPrecalcVal, numPrecalcIdx),
944 FeatureIdxOnlyPrecalc(featureEvaluator, buf, sample_count, is_buf_16u!=0) );
945 parallel_for_( Range(0, minNum),
946 FeatureValAndIdxPrecalc(featureEvaluator, buf, &valCache, sample_count, is_buf_16u!=0) );
947 parallel_for_( Range(minNum, numPrecalcVal),
948 FeatureValOnlyPrecalc(featureEvaluator, &valCache, sample_count) );
949 cout << "Precalculation time: " << (proctime + TIME( 0 )) << endl;
950 }
951
952 //-------------------------------- CascadeBoostTree ----------------------------------------
953
predict(int sampleIdx) const954 CvDTreeNode* CvCascadeBoostTree::predict( int sampleIdx ) const
955 {
956 CvDTreeNode* node = root;
957 if( !node )
958 CV_Error( CV_StsError, "The tree has not been trained yet" );
959
960 if ( ((CvCascadeBoostTrainData*)data)->featureEvaluator->getMaxCatCount() == 0 ) // ordered
961 {
962 while( node->left )
963 {
964 CvDTreeSplit* split = node->split;
965 float val = ((CvCascadeBoostTrainData*)data)->getVarValue( split->var_idx, sampleIdx );
966 node = val <= split->ord.c ? node->left : node->right;
967 }
968 }
969 else // categorical
970 {
971 while( node->left )
972 {
973 CvDTreeSplit* split = node->split;
974 int c = (int)((CvCascadeBoostTrainData*)data)->getVarValue( split->var_idx, sampleIdx );
975 node = CV_DTREE_CAT_DIR(c, split->subset) < 0 ? node->left : node->right;
976 }
977 }
978 return node;
979 }
980
write(FileStorage & fs,const Mat & featureMap)981 void CvCascadeBoostTree::write( FileStorage &fs, const Mat& featureMap )
982 {
983 int maxCatCount = ((CvCascadeBoostTrainData*)data)->featureEvaluator->getMaxCatCount();
984 int subsetN = (maxCatCount + 31)/32;
985 queue<CvDTreeNode*> internalNodesQueue;
986 int size = (int)pow( 2.f, (float)ensemble->get_params().max_depth);
987 std::vector<float> leafVals(size);
988 int leafValIdx = 0;
989 int internalNodeIdx = 1;
990 CvDTreeNode* tempNode;
991
992 CV_DbgAssert( root );
993 internalNodesQueue.push( root );
994
995 fs << "{";
996 fs << CC_INTERNAL_NODES << "[:";
997 while (!internalNodesQueue.empty())
998 {
999 tempNode = internalNodesQueue.front();
1000 CV_Assert( tempNode->left );
1001 if ( !tempNode->left->left && !tempNode->left->right) // left node is leaf
1002 {
1003 leafVals[-leafValIdx] = (float)tempNode->left->value;
1004 fs << leafValIdx-- ;
1005 }
1006 else
1007 {
1008 internalNodesQueue.push( tempNode->left );
1009 fs << internalNodeIdx++;
1010 }
1011 CV_Assert( tempNode->right );
1012 if ( !tempNode->right->left && !tempNode->right->right) // right node is leaf
1013 {
1014 leafVals[-leafValIdx] = (float)tempNode->right->value;
1015 fs << leafValIdx--;
1016 }
1017 else
1018 {
1019 internalNodesQueue.push( tempNode->right );
1020 fs << internalNodeIdx++;
1021 }
1022 int fidx = tempNode->split->var_idx;
1023 fidx = featureMap.empty() ? fidx : featureMap.at<int>(0, fidx);
1024 fs << fidx;
1025 if ( !maxCatCount )
1026 fs << tempNode->split->ord.c;
1027 else
1028 for( int i = 0; i < subsetN; i++ )
1029 fs << tempNode->split->subset[i];
1030 internalNodesQueue.pop();
1031 }
1032 fs << "]"; // CC_INTERNAL_NODES
1033
1034 fs << CC_LEAF_VALUES << "[:";
1035 for (int ni = 0; ni < -leafValIdx; ni++)
1036 fs << leafVals[ni];
1037 fs << "]"; // CC_LEAF_VALUES
1038 fs << "}";
1039 }
1040
read(const FileNode & node,CvBoost * _ensemble,CvDTreeTrainData * _data)1041 void CvCascadeBoostTree::read( const FileNode &node, CvBoost* _ensemble,
1042 CvDTreeTrainData* _data )
1043 {
1044 int maxCatCount = ((CvCascadeBoostTrainData*)_data)->featureEvaluator->getMaxCatCount();
1045 int subsetN = (maxCatCount + 31)/32;
1046 int step = 3 + ( maxCatCount>0 ? subsetN : 1 );
1047
1048 queue<CvDTreeNode*> internalNodesQueue;
1049 FileNodeIterator internalNodesIt, leafValsuesIt;
1050 CvDTreeNode* prntNode, *cldNode;
1051
1052 clear();
1053 data = _data;
1054 ensemble = _ensemble;
1055 pruned_tree_idx = 0;
1056
1057 // read tree nodes
1058 FileNode rnode = node[CC_INTERNAL_NODES];
1059 internalNodesIt = rnode.end();
1060 leafValsuesIt = node[CC_LEAF_VALUES].end();
1061 internalNodesIt--; leafValsuesIt--;
1062 for( size_t i = 0; i < rnode.size()/step; i++ )
1063 {
1064 prntNode = data->new_node( 0, 0, 0, 0 );
1065 if ( maxCatCount > 0 )
1066 {
1067 prntNode->split = data->new_split_cat( 0, 0 );
1068 for( int j = subsetN-1; j>=0; j--)
1069 {
1070 *internalNodesIt >> prntNode->split->subset[j]; internalNodesIt--;
1071 }
1072 }
1073 else
1074 {
1075 float split_value;
1076 *internalNodesIt >> split_value; internalNodesIt--;
1077 prntNode->split = data->new_split_ord( 0, split_value, 0, 0, 0);
1078 }
1079 *internalNodesIt >> prntNode->split->var_idx; internalNodesIt--;
1080 int ridx, lidx;
1081 *internalNodesIt >> ridx; internalNodesIt--;
1082 *internalNodesIt >> lidx;internalNodesIt--;
1083 if ( ridx <= 0)
1084 {
1085 prntNode->right = cldNode = data->new_node( 0, 0, 0, 0 );
1086 *leafValsuesIt >> cldNode->value; leafValsuesIt--;
1087 cldNode->parent = prntNode;
1088 }
1089 else
1090 {
1091 prntNode->right = internalNodesQueue.front();
1092 prntNode->right->parent = prntNode;
1093 internalNodesQueue.pop();
1094 }
1095
1096 if ( lidx <= 0)
1097 {
1098 prntNode->left = cldNode = data->new_node( 0, 0, 0, 0 );
1099 *leafValsuesIt >> cldNode->value; leafValsuesIt--;
1100 cldNode->parent = prntNode;
1101 }
1102 else
1103 {
1104 prntNode->left = internalNodesQueue.front();
1105 prntNode->left->parent = prntNode;
1106 internalNodesQueue.pop();
1107 }
1108
1109 internalNodesQueue.push( prntNode );
1110 }
1111
1112 root = internalNodesQueue.front();
1113 internalNodesQueue.pop();
1114 }
1115
split_node_data(CvDTreeNode * node)1116 void CvCascadeBoostTree::split_node_data( CvDTreeNode* node )
1117 {
1118 int n = node->sample_count, nl, nr, scount = data->sample_count;
1119 char* dir = (char*)data->direction->data.ptr;
1120 CvDTreeNode *left = 0, *right = 0;
1121 int* newIdx = data->split_buf->data.i;
1122 int newBufIdx = data->get_child_buf_idx( node );
1123 int workVarCount = data->get_work_var_count();
1124 CvMat* buf = data->buf;
1125 size_t length_buf_row = data->get_length_subbuf();
1126 cv::AutoBuffer<uchar> inn_buf(n*(3*sizeof(int)+sizeof(float)));
1127 int* tempBuf = (int*)(uchar*)inn_buf;
1128 bool splitInputData;
1129
1130 complete_node_dir(node);
1131
1132 for( int i = nl = nr = 0; i < n; i++ )
1133 {
1134 int d = dir[i];
1135 // initialize new indices for splitting ordered variables
1136 newIdx[i] = (nl & (d-1)) | (nr & -d); // d ? ri : li
1137 nr += d;
1138 nl += d^1;
1139 }
1140
1141 node->left = left = data->new_node( node, nl, newBufIdx, node->offset );
1142 node->right = right = data->new_node( node, nr, newBufIdx, node->offset + nl );
1143
1144 splitInputData = node->depth + 1 < data->params.max_depth &&
1145 (node->left->sample_count > data->params.min_sample_count ||
1146 node->right->sample_count > data->params.min_sample_count);
1147
1148 // split ordered variables, keep both halves sorted.
1149 for( int vi = 0; vi < ((CvCascadeBoostTrainData*)data)->numPrecalcIdx; vi++ )
1150 {
1151 int ci = data->get_var_type(vi);
1152 if( ci >= 0 || !splitInputData )
1153 continue;
1154
1155 int n1 = node->get_num_valid(vi);
1156 float *src_val_buf = (float*)(tempBuf + n);
1157 int *src_sorted_idx_buf = (int*)(src_val_buf + n);
1158 int *src_sample_idx_buf = src_sorted_idx_buf + n;
1159 const int* src_sorted_idx = 0;
1160 const float* src_val = 0;
1161 data->get_ord_var_data(node, vi, src_val_buf, src_sorted_idx_buf, &src_val, &src_sorted_idx, src_sample_idx_buf);
1162
1163 for(int i = 0; i < n; i++)
1164 tempBuf[i] = src_sorted_idx[i];
1165
1166 if (data->is_buf_16u)
1167 {
1168 ushort *ldst, *rdst;
1169 ldst = (ushort*)(buf->data.s + left->buf_idx*length_buf_row +
1170 vi*scount + left->offset);
1171 rdst = (ushort*)(ldst + nl);
1172
1173 // split sorted
1174 for( int i = 0; i < n1; i++ )
1175 {
1176 int idx = tempBuf[i];
1177 int d = dir[idx];
1178 idx = newIdx[idx];
1179 if (d)
1180 {
1181 *rdst = (ushort)idx;
1182 rdst++;
1183 }
1184 else
1185 {
1186 *ldst = (ushort)idx;
1187 ldst++;
1188 }
1189 }
1190 CV_Assert( n1 == n );
1191 }
1192 else
1193 {
1194 int *ldst, *rdst;
1195 ldst = buf->data.i + left->buf_idx*length_buf_row +
1196 vi*scount + left->offset;
1197 rdst = buf->data.i + right->buf_idx*length_buf_row +
1198 vi*scount + right->offset;
1199
1200 // split sorted
1201 for( int i = 0; i < n1; i++ )
1202 {
1203 int idx = tempBuf[i];
1204 int d = dir[idx];
1205 idx = newIdx[idx];
1206 if (d)
1207 {
1208 *rdst = idx;
1209 rdst++;
1210 }
1211 else
1212 {
1213 *ldst = idx;
1214 ldst++;
1215 }
1216 }
1217 CV_Assert( n1 == n );
1218 }
1219 }
1220
1221 // split cv_labels using newIdx relocation table
1222 int *src_lbls_buf = tempBuf + n;
1223 const int* src_lbls = data->get_cv_labels(node, src_lbls_buf);
1224
1225 for(int i = 0; i < n; i++)
1226 tempBuf[i] = src_lbls[i];
1227
1228 if (data->is_buf_16u)
1229 {
1230 unsigned short *ldst = (unsigned short *)(buf->data.s + left->buf_idx*length_buf_row +
1231 (size_t)(workVarCount-1)*scount + left->offset);
1232 unsigned short *rdst = (unsigned short *)(buf->data.s + right->buf_idx*length_buf_row +
1233 (size_t)(workVarCount-1)*scount + right->offset);
1234
1235 for( int i = 0; i < n; i++ )
1236 {
1237 int idx = tempBuf[i];
1238 if (dir[i])
1239 {
1240 *rdst = (unsigned short)idx;
1241 rdst++;
1242 }
1243 else
1244 {
1245 *ldst = (unsigned short)idx;
1246 ldst++;
1247 }
1248 }
1249
1250 }
1251 else
1252 {
1253 int *ldst = buf->data.i + left->buf_idx*length_buf_row +
1254 (size_t)(workVarCount-1)*scount + left->offset;
1255 int *rdst = buf->data.i + right->buf_idx*length_buf_row +
1256 (size_t)(workVarCount-1)*scount + right->offset;
1257
1258 for( int i = 0; i < n; i++ )
1259 {
1260 int idx = tempBuf[i];
1261 if (dir[i])
1262 {
1263 *rdst = idx;
1264 rdst++;
1265 }
1266 else
1267 {
1268 *ldst = idx;
1269 ldst++;
1270 }
1271 }
1272 }
1273
1274 // split sample indices
1275 int *sampleIdx_src_buf = tempBuf + n;
1276 const int* sampleIdx_src = data->get_sample_indices(node, sampleIdx_src_buf);
1277
1278 for(int i = 0; i < n; i++)
1279 tempBuf[i] = sampleIdx_src[i];
1280
1281 if (data->is_buf_16u)
1282 {
1283 unsigned short* ldst = (unsigned short*)(buf->data.s + left->buf_idx*length_buf_row +
1284 (size_t)workVarCount*scount + left->offset);
1285 unsigned short* rdst = (unsigned short*)(buf->data.s + right->buf_idx*length_buf_row +
1286 (size_t)workVarCount*scount + right->offset);
1287 for (int i = 0; i < n; i++)
1288 {
1289 unsigned short idx = (unsigned short)tempBuf[i];
1290 if (dir[i])
1291 {
1292 *rdst = idx;
1293 rdst++;
1294 }
1295 else
1296 {
1297 *ldst = idx;
1298 ldst++;
1299 }
1300 }
1301 }
1302 else
1303 {
1304 int* ldst = buf->data.i + left->buf_idx*length_buf_row +
1305 (size_t)workVarCount*scount + left->offset;
1306 int* rdst = buf->data.i + right->buf_idx*length_buf_row +
1307 (size_t)workVarCount*scount + right->offset;
1308 for (int i = 0; i < n; i++)
1309 {
1310 int idx = tempBuf[i];
1311 if (dir[i])
1312 {
1313 *rdst = idx;
1314 rdst++;
1315 }
1316 else
1317 {
1318 *ldst = idx;
1319 ldst++;
1320 }
1321 }
1322 }
1323
1324 for( int vi = 0; vi < data->var_count; vi++ )
1325 {
1326 left->set_num_valid(vi, (int)(nl));
1327 right->set_num_valid(vi, (int)(nr));
1328 }
1329
1330 // deallocate the parent node data that is not needed anymore
1331 data->free_node_data(node);
1332 }
1333
auxMarkFeaturesInMap(const CvDTreeNode * node,Mat & featureMap)1334 static void auxMarkFeaturesInMap( const CvDTreeNode* node, Mat& featureMap)
1335 {
1336 if ( node && node->split )
1337 {
1338 featureMap.ptr<int>(0)[node->split->var_idx] = 1;
1339 auxMarkFeaturesInMap( node->left, featureMap );
1340 auxMarkFeaturesInMap( node->right, featureMap );
1341 }
1342 }
1343
markFeaturesInMap(Mat & featureMap)1344 void CvCascadeBoostTree::markFeaturesInMap( Mat& featureMap )
1345 {
1346 auxMarkFeaturesInMap( root, featureMap );
1347 }
1348
1349 //----------------------------------- CascadeBoost --------------------------------------
1350
train(const CvFeatureEvaluator * _featureEvaluator,int _numSamples,int _precalcValBufSize,int _precalcIdxBufSize,const CvCascadeBoostParams & _params)1351 bool CvCascadeBoost::train( const CvFeatureEvaluator* _featureEvaluator,
1352 int _numSamples,
1353 int _precalcValBufSize, int _precalcIdxBufSize,
1354 const CvCascadeBoostParams& _params )
1355 {
1356 bool isTrained = false;
1357 CV_Assert( !data );
1358 clear();
1359 data = new CvCascadeBoostTrainData( _featureEvaluator, _numSamples,
1360 _precalcValBufSize, _precalcIdxBufSize, _params );
1361 CvMemStorage *storage = cvCreateMemStorage();
1362 weak = cvCreateSeq( 0, sizeof(CvSeq), sizeof(CvBoostTree*), storage );
1363 storage = 0;
1364
1365 set_params( _params );
1366 if ( (_params.boost_type == LOGIT) || (_params.boost_type == GENTLE) )
1367 data->do_responses_copy();
1368
1369 update_weights( 0 );
1370
1371 cout << "+----+---------+---------+" << endl;
1372 cout << "| N | HR | FA |" << endl;
1373 cout << "+----+---------+---------+" << endl;
1374
1375 do
1376 {
1377 CvCascadeBoostTree* tree = new CvCascadeBoostTree;
1378 if( !tree->train( data, subsample_mask, this ) )
1379 {
1380 delete tree;
1381 break;
1382 }
1383 cvSeqPush( weak, &tree );
1384 update_weights( tree );
1385 trim_weights();
1386 if( cvCountNonZero(subsample_mask) == 0 )
1387 break;
1388 }
1389 while( !isErrDesired() && (weak->total < params.weak_count) );
1390
1391 if(weak->total > 0)
1392 {
1393 data->is_classifier = true;
1394 data->free_train_data();
1395 isTrained = true;
1396 }
1397 else
1398 clear();
1399
1400 return isTrained;
1401 }
1402
predict(int sampleIdx,bool returnSum) const1403 float CvCascadeBoost::predict( int sampleIdx, bool returnSum ) const
1404 {
1405 CV_Assert( weak );
1406 double sum = 0;
1407 CvSeqReader reader;
1408 cvStartReadSeq( weak, &reader );
1409 cvSetSeqReaderPos( &reader, 0 );
1410 for( int i = 0; i < weak->total; i++ )
1411 {
1412 CvBoostTree* wtree;
1413 CV_READ_SEQ_ELEM( wtree, reader );
1414 sum += ((CvCascadeBoostTree*)wtree)->predict(sampleIdx)->value;
1415 }
1416 if( !returnSum )
1417 sum = sum < threshold - CV_THRESHOLD_EPS ? 0.0 : 1.0;
1418 return (float)sum;
1419 }
1420
set_params(const CvBoostParams & _params)1421 bool CvCascadeBoost::set_params( const CvBoostParams& _params )
1422 {
1423 minHitRate = ((CvCascadeBoostParams&)_params).minHitRate;
1424 maxFalseAlarm = ((CvCascadeBoostParams&)_params).maxFalseAlarm;
1425 return ( ( minHitRate > 0 ) && ( minHitRate < 1) &&
1426 ( maxFalseAlarm > 0 ) && ( maxFalseAlarm < 1) &&
1427 CvBoost::set_params( _params ));
1428 }
1429
update_weights(CvBoostTree * tree)1430 void CvCascadeBoost::update_weights( CvBoostTree* tree )
1431 {
1432 int n = data->sample_count;
1433 double sumW = 0.;
1434 int step = 0;
1435 float* fdata = 0;
1436 int *sampleIdxBuf;
1437 const int* sampleIdx = 0;
1438 int inn_buf_size = ((params.boost_type == LOGIT) || (params.boost_type == GENTLE) ? n*sizeof(int) : 0) +
1439 ( !tree ? n*sizeof(int) : 0 );
1440 cv::AutoBuffer<uchar> inn_buf(inn_buf_size);
1441 uchar* cur_inn_buf_pos = (uchar*)inn_buf;
1442 if ( (params.boost_type == LOGIT) || (params.boost_type == GENTLE) )
1443 {
1444 step = CV_IS_MAT_CONT(data->responses_copy->type) ?
1445 1 : data->responses_copy->step / CV_ELEM_SIZE(data->responses_copy->type);
1446 fdata = data->responses_copy->data.fl;
1447 sampleIdxBuf = (int*)cur_inn_buf_pos; cur_inn_buf_pos = (uchar*)(sampleIdxBuf + n);
1448 sampleIdx = data->get_sample_indices( data->data_root, sampleIdxBuf );
1449 }
1450 CvMat* buf = data->buf;
1451 size_t length_buf_row = data->get_length_subbuf();
1452 if( !tree ) // before training the first tree, initialize weights and other parameters
1453 {
1454 int* classLabelsBuf = (int*)cur_inn_buf_pos; cur_inn_buf_pos = (uchar*)(classLabelsBuf + n);
1455 const int* classLabels = data->get_class_labels(data->data_root, classLabelsBuf);
1456 // in case of logitboost and gentle adaboost each weak tree is a regression tree,
1457 // so we need to convert class labels to floating-point values
1458 double w0 = 1./n;
1459 double p[2] = { 1, 1 };
1460
1461 cvReleaseMat( &orig_response );
1462 cvReleaseMat( &sum_response );
1463 cvReleaseMat( &weak_eval );
1464 cvReleaseMat( &subsample_mask );
1465 cvReleaseMat( &weights );
1466
1467 orig_response = cvCreateMat( 1, n, CV_32S );
1468 weak_eval = cvCreateMat( 1, n, CV_64F );
1469 subsample_mask = cvCreateMat( 1, n, CV_8U );
1470 weights = cvCreateMat( 1, n, CV_64F );
1471 subtree_weights = cvCreateMat( 1, n + 2, CV_64F );
1472
1473 if (data->is_buf_16u)
1474 {
1475 unsigned short* labels = (unsigned short*)(buf->data.s + data->data_root->buf_idx*length_buf_row +
1476 data->data_root->offset + (size_t)(data->work_var_count-1)*data->sample_count);
1477 for( int i = 0; i < n; i++ )
1478 {
1479 // save original categorical responses {0,1}, convert them to {-1,1}
1480 orig_response->data.i[i] = classLabels[i]*2 - 1;
1481 // make all the samples active at start.
1482 // later, in trim_weights() deactivate/reactive again some, if need
1483 subsample_mask->data.ptr[i] = (uchar)1;
1484 // make all the initial weights the same.
1485 weights->data.db[i] = w0*p[classLabels[i]];
1486 // set the labels to find (from within weak tree learning proc)
1487 // the particular sample weight, and where to store the response.
1488 labels[i] = (unsigned short)i;
1489 }
1490 }
1491 else
1492 {
1493 int* labels = buf->data.i + data->data_root->buf_idx*length_buf_row +
1494 data->data_root->offset + (size_t)(data->work_var_count-1)*data->sample_count;
1495
1496 for( int i = 0; i < n; i++ )
1497 {
1498 // save original categorical responses {0,1}, convert them to {-1,1}
1499 orig_response->data.i[i] = classLabels[i]*2 - 1;
1500 subsample_mask->data.ptr[i] = (uchar)1;
1501 weights->data.db[i] = w0*p[classLabels[i]];
1502 labels[i] = i;
1503 }
1504 }
1505
1506 if( params.boost_type == LOGIT )
1507 {
1508 sum_response = cvCreateMat( 1, n, CV_64F );
1509
1510 for( int i = 0; i < n; i++ )
1511 {
1512 sum_response->data.db[i] = 0;
1513 fdata[sampleIdx[i]*step] = orig_response->data.i[i] > 0 ? 2.f : -2.f;
1514 }
1515
1516 // in case of logitboost each weak tree is a regression tree.
1517 // the target function values are recalculated for each of the trees
1518 data->is_classifier = false;
1519 }
1520 else if( params.boost_type == GENTLE )
1521 {
1522 for( int i = 0; i < n; i++ )
1523 fdata[sampleIdx[i]*step] = (float)orig_response->data.i[i];
1524
1525 data->is_classifier = false;
1526 }
1527 }
1528 else
1529 {
1530 // at this moment, for all the samples that participated in the training of the most
1531 // recent weak classifier we know the responses. For other samples we need to compute them
1532 if( have_subsample )
1533 {
1534 // invert the subsample mask
1535 cvXorS( subsample_mask, cvScalar(1.), subsample_mask );
1536
1537 // run tree through all the non-processed samples
1538 for( int i = 0; i < n; i++ )
1539 if( subsample_mask->data.ptr[i] )
1540 {
1541 weak_eval->data.db[i] = ((CvCascadeBoostTree*)tree)->predict( i )->value;
1542 }
1543 }
1544
1545 // now update weights and other parameters for each type of boosting
1546 if( params.boost_type == DISCRETE )
1547 {
1548 // Discrete AdaBoost:
1549 // weak_eval[i] (=f(x_i)) is in {-1,1}
1550 // err = sum(w_i*(f(x_i) != y_i))/sum(w_i)
1551 // C = log((1-err)/err)
1552 // w_i *= exp(C*(f(x_i) != y_i))
1553
1554 double C, err = 0.;
1555 double scale[] = { 1., 0. };
1556
1557 for( int i = 0; i < n; i++ )
1558 {
1559 double w = weights->data.db[i];
1560 sumW += w;
1561 err += w*(weak_eval->data.db[i] != orig_response->data.i[i]);
1562 }
1563
1564 if( sumW != 0 )
1565 err /= sumW;
1566 C = err = -logRatio( err );
1567 scale[1] = exp(err);
1568
1569 sumW = 0;
1570 for( int i = 0; i < n; i++ )
1571 {
1572 double w = weights->data.db[i]*
1573 scale[weak_eval->data.db[i] != orig_response->data.i[i]];
1574 sumW += w;
1575 weights->data.db[i] = w;
1576 }
1577
1578 tree->scale( C );
1579 }
1580 else if( params.boost_type == REAL )
1581 {
1582 // Real AdaBoost:
1583 // weak_eval[i] = f(x_i) = 0.5*log(p(x_i)/(1-p(x_i))), p(x_i)=P(y=1|x_i)
1584 // w_i *= exp(-y_i*f(x_i))
1585
1586 for( int i = 0; i < n; i++ )
1587 weak_eval->data.db[i] *= -orig_response->data.i[i];
1588
1589 cvExp( weak_eval, weak_eval );
1590
1591 for( int i = 0; i < n; i++ )
1592 {
1593 double w = weights->data.db[i]*weak_eval->data.db[i];
1594 sumW += w;
1595 weights->data.db[i] = w;
1596 }
1597 }
1598 else if( params.boost_type == LOGIT )
1599 {
1600 // LogitBoost:
1601 // weak_eval[i] = f(x_i) in [-z_max,z_max]
1602 // sum_response = F(x_i).
1603 // F(x_i) += 0.5*f(x_i)
1604 // p(x_i) = exp(F(x_i))/(exp(F(x_i)) + exp(-F(x_i))=1/(1+exp(-2*F(x_i)))
1605 // reuse weak_eval: weak_eval[i] <- p(x_i)
1606 // w_i = p(x_i)*1(1 - p(x_i))
1607 // z_i = ((y_i+1)/2 - p(x_i))/(p(x_i)*(1 - p(x_i)))
1608 // store z_i to the data->data_root as the new target responses
1609
1610 const double lbWeightThresh = FLT_EPSILON;
1611 const double lbZMax = 10.;
1612
1613 for( int i = 0; i < n; i++ )
1614 {
1615 double s = sum_response->data.db[i] + 0.5*weak_eval->data.db[i];
1616 sum_response->data.db[i] = s;
1617 weak_eval->data.db[i] = -2*s;
1618 }
1619
1620 cvExp( weak_eval, weak_eval );
1621
1622 for( int i = 0; i < n; i++ )
1623 {
1624 double p = 1./(1. + weak_eval->data.db[i]);
1625 double w = p*(1 - p), z;
1626 w = MAX( w, lbWeightThresh );
1627 weights->data.db[i] = w;
1628 sumW += w;
1629 if( orig_response->data.i[i] > 0 )
1630 {
1631 z = 1./p;
1632 fdata[sampleIdx[i]*step] = (float)min(z, lbZMax);
1633 }
1634 else
1635 {
1636 z = 1./(1-p);
1637 fdata[sampleIdx[i]*step] = (float)-min(z, lbZMax);
1638 }
1639 }
1640 }
1641 else
1642 {
1643 // Gentle AdaBoost:
1644 // weak_eval[i] = f(x_i) in [-1,1]
1645 // w_i *= exp(-y_i*f(x_i))
1646 assert( params.boost_type == GENTLE );
1647
1648 for( int i = 0; i < n; i++ )
1649 weak_eval->data.db[i] *= -orig_response->data.i[i];
1650
1651 cvExp( weak_eval, weak_eval );
1652
1653 for( int i = 0; i < n; i++ )
1654 {
1655 double w = weights->data.db[i] * weak_eval->data.db[i];
1656 weights->data.db[i] = w;
1657 sumW += w;
1658 }
1659 }
1660 }
1661
1662 // renormalize weights
1663 if( sumW > FLT_EPSILON )
1664 {
1665 sumW = 1./sumW;
1666 for( int i = 0; i < n; ++i )
1667 weights->data.db[i] *= sumW;
1668 }
1669 }
1670
isErrDesired()1671 bool CvCascadeBoost::isErrDesired()
1672 {
1673 int sCount = data->sample_count,
1674 numPos = 0, numNeg = 0, numFalse = 0, numPosTrue = 0;
1675 vector<float> eval(sCount);
1676
1677 for( int i = 0; i < sCount; i++ )
1678 if( ((CvCascadeBoostTrainData*)data)->featureEvaluator->getCls( i ) == 1.0F )
1679 eval[numPos++] = predict( i, true );
1680
1681 std::sort(&eval[0], &eval[0] + numPos);
1682
1683 int thresholdIdx = (int)((1.0F - minHitRate) * numPos);
1684
1685 threshold = eval[ thresholdIdx ];
1686 numPosTrue = numPos - thresholdIdx;
1687 for( int i = thresholdIdx - 1; i >= 0; i--)
1688 if ( abs( eval[i] - threshold) < FLT_EPSILON )
1689 numPosTrue++;
1690 float hitRate = ((float) numPosTrue) / ((float) numPos);
1691
1692 for( int i = 0; i < sCount; i++ )
1693 {
1694 if( ((CvCascadeBoostTrainData*)data)->featureEvaluator->getCls( i ) == 0.0F )
1695 {
1696 numNeg++;
1697 if( predict( i ) )
1698 numFalse++;
1699 }
1700 }
1701 float falseAlarm = ((float) numFalse) / ((float) numNeg);
1702
1703 cout << "|"; cout.width(4); cout << right << weak->total;
1704 cout << "|"; cout.width(9); cout << right << hitRate;
1705 cout << "|"; cout.width(9); cout << right << falseAlarm;
1706 cout << "|" << endl;
1707 cout << "+----+---------+---------+" << endl;
1708
1709 return falseAlarm <= maxFalseAlarm;
1710 }
1711
write(FileStorage & fs,const Mat & featureMap) const1712 void CvCascadeBoost::write( FileStorage &fs, const Mat& featureMap ) const
1713 {
1714 // char cmnt[30];
1715 CvCascadeBoostTree* weakTree;
1716 fs << CC_WEAK_COUNT << weak->total;
1717 fs << CC_STAGE_THRESHOLD << threshold;
1718 fs << CC_WEAK_CLASSIFIERS << "[";
1719 for( int wi = 0; wi < weak->total; wi++)
1720 {
1721 /*sprintf( cmnt, "tree %i", wi );
1722 cvWriteComment( fs, cmnt, 0 );*/
1723 weakTree = *((CvCascadeBoostTree**) cvGetSeqElem( weak, wi ));
1724 weakTree->write( fs, featureMap );
1725 }
1726 fs << "]";
1727 }
1728
read(const FileNode & node,const CvFeatureEvaluator * _featureEvaluator,const CvCascadeBoostParams & _params)1729 bool CvCascadeBoost::read( const FileNode &node,
1730 const CvFeatureEvaluator* _featureEvaluator,
1731 const CvCascadeBoostParams& _params )
1732 {
1733 CvMemStorage* storage;
1734 clear();
1735 data = new CvCascadeBoostTrainData( _featureEvaluator, _params );
1736 set_params( _params );
1737
1738 node[CC_STAGE_THRESHOLD] >> threshold;
1739 FileNode rnode = node[CC_WEAK_CLASSIFIERS];
1740
1741 storage = cvCreateMemStorage();
1742 weak = cvCreateSeq( 0, sizeof(CvSeq), sizeof(CvBoostTree*), storage );
1743 for( FileNodeIterator it = rnode.begin(); it != rnode.end(); it++ )
1744 {
1745 CvCascadeBoostTree* tree = new CvCascadeBoostTree();
1746 tree->read( *it, this, data );
1747 cvSeqPush( weak, &tree );
1748 }
1749 return true;
1750 }
1751
markUsedFeaturesInMap(Mat & featureMap)1752 void CvCascadeBoost::markUsedFeaturesInMap( Mat& featureMap )
1753 {
1754 for( int wi = 0; wi < weak->total; wi++ )
1755 {
1756 CvCascadeBoostTree* weakTree = *((CvCascadeBoostTree**) cvGetSeqElem( weak, wi ));
1757 weakTree->markFeaturesInMap( featureMap );
1758 }
1759 }
1760