1 /*M///////////////////////////////////////////////////////////////////////////////////////
2 //
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4 //
5 // By downloading, copying, installing or using the software you agree to this license.
6 // If you do not agree to this license, do not download, install,
7 // copy or use the software.
8 //
9 //
10 // Intel License Agreement
11 //
12 // Copyright (C) 2000, Intel Corporation, all rights reserved.
13 // Third party copyrights are property of their respective owners.
14 //
15 // Redistribution and use in source and binary forms, with or without modification,
16 // are permitted provided that the following conditions are met:
17 //
18 // * Redistribution's of source code must retain the above copyright notice,
19 // this list of conditions and the following disclaimer.
20 //
21 // * Redistribution's in binary form must reproduce the above copyright notice,
22 // this list of conditions and the following disclaimer in the documentation
23 // and/or other materials provided with the distribution.
24 //
25 // * The name of Intel Corporation may not be used to endorse or promote products
26 // derived from this software without specific prior written permission.
27 //
28 // This software is provided by the copyright holders and contributors "as is" and
29 // any express or implied warranties, including, but not limited to, the implied
30 // warranties of merchantability and fitness for a particular purpose are disclaimed.
31 // In no event shall the Intel Corporation or contributors be liable for any direct,
32 // indirect, incidental, special, exemplary, or consequential damages
33 // (including, but not limited to, procurement of substitute goods or services;
34 // loss of use, data, or profits; or business interruption) however caused
35 // and on any theory of liability, whether in contract, strict liability,
36 // or tort (including negligence or otherwise) arising in any way out of
37 // the use of this software, even if advised of the possibility of such damage.
38 //
39 //M*/
40
41 #include "_ml.h"
42
CvANN_MLP_TrainParams()43 CvANN_MLP_TrainParams::CvANN_MLP_TrainParams()
44 {
45 term_crit = cvTermCriteria( CV_TERMCRIT_ITER + CV_TERMCRIT_EPS, 1000, 0.01 );
46 train_method = RPROP;
47 bp_dw_scale = bp_moment_scale = 0.1;
48 rp_dw0 = 0.1; rp_dw_plus = 1.2; rp_dw_minus = 0.5;
49 rp_dw_min = FLT_EPSILON; rp_dw_max = 50.;
50 }
51
52
CvANN_MLP_TrainParams(CvTermCriteria _term_crit,int _train_method,double _param1,double _param2)53 CvANN_MLP_TrainParams::CvANN_MLP_TrainParams( CvTermCriteria _term_crit,
54 int _train_method,
55 double _param1, double _param2 )
56 {
57 term_crit = _term_crit;
58 train_method = _train_method;
59 bp_dw_scale = bp_moment_scale = 0.1;
60 rp_dw0 = 1.; rp_dw_plus = 1.2; rp_dw_minus = 0.5;
61 rp_dw_min = FLT_EPSILON; rp_dw_max = 50.;
62
63 if( train_method == RPROP )
64 {
65 rp_dw0 = _param1;
66 if( rp_dw0 < FLT_EPSILON )
67 rp_dw0 = 1.;
68 rp_dw_min = _param2;
69 rp_dw_min = MAX( rp_dw_min, 0 );
70 }
71 else if( train_method == BACKPROP )
72 {
73 bp_dw_scale = _param1;
74 if( bp_dw_scale <= 0 )
75 bp_dw_scale = 0.1;
76 bp_dw_scale = MAX( bp_dw_scale, 1e-3 );
77 bp_dw_scale = MIN( bp_dw_scale, 1 );
78 bp_moment_scale = _param2;
79 if( bp_moment_scale < 0 )
80 bp_moment_scale = 0.1;
81 bp_moment_scale = MIN( bp_moment_scale, 1 );
82 }
83 else
84 train_method = RPROP;
85 }
86
87
~CvANN_MLP_TrainParams()88 CvANN_MLP_TrainParams::~CvANN_MLP_TrainParams()
89 {
90 }
91
92
CvANN_MLP()93 CvANN_MLP::CvANN_MLP()
94 {
95 layer_sizes = wbuf = 0;
96 min_val = max_val = min_val1 = max_val1 = 0.;
97 weights = 0;
98 rng = cvRNG(-1);
99 default_model_name = "my_nn";
100 clear();
101 }
102
103
CvANN_MLP(const CvMat * _layer_sizes,int _activ_func,double _f_param1,double _f_param2)104 CvANN_MLP::CvANN_MLP( const CvMat* _layer_sizes,
105 int _activ_func,
106 double _f_param1, double _f_param2 )
107 {
108 layer_sizes = wbuf = 0;
109 min_val = max_val = min_val1 = max_val1 = 0.;
110 weights = 0;
111 rng = cvRNG(-1);
112 default_model_name = "my_nn";
113 create( _layer_sizes, _activ_func, _f_param1, _f_param2 );
114 }
115
116
~CvANN_MLP()117 CvANN_MLP::~CvANN_MLP()
118 {
119 clear();
120 }
121
122
clear()123 void CvANN_MLP::clear()
124 {
125 cvReleaseMat( &layer_sizes );
126 cvReleaseMat( &wbuf );
127 cvFree( &weights );
128 activ_func = SIGMOID_SYM;
129 f_param1 = f_param2 = 1;
130 max_buf_sz = 1 << 12;
131 }
132
133
set_activ_func(int _activ_func,double _f_param1,double _f_param2)134 void CvANN_MLP::set_activ_func( int _activ_func, double _f_param1, double _f_param2 )
135 {
136 CV_FUNCNAME( "CvANN_MLP::set_activ_func" );
137
138 __BEGIN__;
139
140 if( _activ_func < 0 || _activ_func > GAUSSIAN )
141 CV_ERROR( CV_StsOutOfRange, "Unknown activation function" );
142
143 activ_func = _activ_func;
144
145 switch( activ_func )
146 {
147 case SIGMOID_SYM:
148 max_val = 0.95; min_val = -max_val;
149 max_val1 = 0.98; min_val1 = -max_val1;
150 if( fabs(_f_param1) < FLT_EPSILON )
151 _f_param1 = 2./3;
152 if( fabs(_f_param2) < FLT_EPSILON )
153 _f_param2 = 1.7159;
154 break;
155 case GAUSSIAN:
156 max_val = 1.; min_val = 0.05;
157 max_val1 = 1.; min_val1 = 0.02;
158 if( fabs(_f_param1) < FLT_EPSILON )
159 _f_param1 = 1.;
160 if( fabs(_f_param2) < FLT_EPSILON )
161 _f_param2 = 1.;
162 break;
163 default:
164 min_val = max_val = min_val1 = max_val1 = 0.;
165 _f_param1 = 1.;
166 _f_param2 = 0.;
167 }
168
169 f_param1 = _f_param1;
170 f_param2 = _f_param2;
171
172 __END__;
173 }
174
175
init_weights()176 void CvANN_MLP::init_weights()
177 {
178 int i, j, k;
179
180 for( i = 1; i < layer_sizes->cols; i++ )
181 {
182 int n1 = layer_sizes->data.i[i-1];
183 int n2 = layer_sizes->data.i[i];
184 double val = 0, G = n2 > 2 ? 0.7*pow((double)n1,1./(n2-1)) : 1.;
185 double* w = weights[i];
186
187 // initialize weights using Nguyen-Widrow algorithm
188 for( j = 0; j < n2; j++ )
189 {
190 double s = 0;
191 for( k = 0; k <= n1; k++ )
192 {
193 val = cvRandReal(&rng)*2-1.;
194 w[k*n2 + j] = val;
195 s += val;
196 }
197
198 if( i < layer_sizes->cols - 1 )
199 {
200 s = 1./(s - val);
201 for( k = 0; k <= n1; k++ )
202 w[k*n2 + j] *= s;
203 w[n1*n2 + j] *= G*(-1+j*2./n2);
204 }
205 }
206 }
207 }
208
209
create(const CvMat * _layer_sizes,int _activ_func,double _f_param1,double _f_param2)210 void CvANN_MLP::create( const CvMat* _layer_sizes, int _activ_func,
211 double _f_param1, double _f_param2 )
212 {
213 CV_FUNCNAME( "CvANN_MLP::create" );
214
215 __BEGIN__;
216
217 int i, l_step, l_count, buf_sz = 0;
218 int *l_src, *l_dst;
219
220 clear();
221
222 if( !CV_IS_MAT(_layer_sizes) ||
223 _layer_sizes->cols != 1 && _layer_sizes->rows != 1 ||
224 CV_MAT_TYPE(_layer_sizes->type) != CV_32SC1 )
225 CV_ERROR( CV_StsBadArg,
226 "The array of layer neuron counters must be an integer vector" );
227
228 CV_CALL( set_activ_func( _activ_func, _f_param1, _f_param2 ));
229
230 l_count = _layer_sizes->rows + _layer_sizes->cols - 1;
231 l_src = _layer_sizes->data.i;
232 l_step = CV_IS_MAT_CONT(_layer_sizes->type) ? 1 :
233 _layer_sizes->step / sizeof(l_src[0]);
234 CV_CALL( layer_sizes = cvCreateMat( 1, l_count, CV_32SC1 ));
235 l_dst = layer_sizes->data.i;
236
237 max_count = 0;
238 for( i = 0; i < l_count; i++ )
239 {
240 int n = l_src[i*l_step];
241 if( n < 1 + (0 < i && i < l_count-1))
242 CV_ERROR( CV_StsOutOfRange,
243 "there should be at least one input and one output "
244 "and every hidden layer must have more than 1 neuron" );
245 l_dst[i] = n;
246 max_count = MAX( max_count, n );
247 if( i > 0 )
248 buf_sz += (l_dst[i-1]+1)*n;
249 }
250
251 buf_sz += (l_dst[0] + l_dst[l_count-1]*2)*2;
252
253 CV_CALL( wbuf = cvCreateMat( 1, buf_sz, CV_64F ));
254 CV_CALL( weights = (double**)cvAlloc( (l_count+1)*sizeof(weights[0]) ));
255
256 weights[0] = wbuf->data.db;
257 weights[1] = weights[0] + l_dst[0]*2;
258 for( i = 1; i < l_count; i++ )
259 weights[i+1] = weights[i] + (l_dst[i-1] + 1)*l_dst[i];
260 weights[l_count+1] = weights[l_count] + l_dst[l_count-1]*2;
261
262 __END__;
263 }
264
265
predict(const CvMat * _inputs,CvMat * _outputs) const266 float CvANN_MLP::predict( const CvMat* _inputs, CvMat* _outputs ) const
267 {
268 CV_FUNCNAME( "CvANN_MLP::predict" );
269
270 __BEGIN__;
271
272 double* buf;
273 int i, j, n, dn = 0, l_count, dn0, buf_sz, min_buf_sz;
274
275 if( !layer_sizes )
276 CV_ERROR( CV_StsError, "The network has not been initialized" );
277
278 if( !CV_IS_MAT(_inputs) || !CV_IS_MAT(_outputs) ||
279 !CV_ARE_TYPES_EQ(_inputs,_outputs) ||
280 CV_MAT_TYPE(_inputs->type) != CV_32FC1 &&
281 CV_MAT_TYPE(_inputs->type) != CV_64FC1 ||
282 _inputs->rows != _outputs->rows )
283 CV_ERROR( CV_StsBadArg, "Both input and output must be floating-point matrices "
284 "of the same type and have the same number of rows" );
285
286 if( _inputs->cols != layer_sizes->data.i[0] )
287 CV_ERROR( CV_StsBadSize, "input matrix must have the same number of columns as "
288 "the number of neurons in the input layer" );
289
290 if( _outputs->cols != layer_sizes->data.i[layer_sizes->cols - 1] )
291 CV_ERROR( CV_StsBadSize, "output matrix must have the same number of columns as "
292 "the number of neurons in the output layer" );
293 n = dn0 = _inputs->rows;
294 min_buf_sz = 2*max_count;
295 buf_sz = n*min_buf_sz;
296
297 if( buf_sz > max_buf_sz )
298 {
299 dn0 = max_buf_sz/min_buf_sz;
300 dn0 = MAX( dn0, 1 );
301 buf_sz = dn0*min_buf_sz;
302 }
303
304 buf = (double*)cvStackAlloc( buf_sz*sizeof(buf[0]) );
305 l_count = layer_sizes->cols;
306
307 for( i = 0; i < n; i += dn )
308 {
309 CvMat hdr[2], _w, *layer_in = &hdr[0], *layer_out = &hdr[1], *temp;
310 dn = MIN( dn0, n - i );
311
312 cvGetRows( _inputs, layer_in, i, i + dn );
313 cvInitMatHeader( layer_out, dn, layer_in->cols, CV_64F, buf );
314
315 scale_input( layer_in, layer_out );
316 CV_SWAP( layer_in, layer_out, temp );
317
318 for( j = 1; j < l_count; j++ )
319 {
320 double* data = buf + (j&1 ? max_count*dn0 : 0);
321 int cols = layer_sizes->data.i[j];
322
323 cvInitMatHeader( layer_out, dn, cols, CV_64F, data );
324 cvInitMatHeader( &_w, layer_in->cols, layer_out->cols, CV_64F, weights[j] );
325 cvGEMM( layer_in, &_w, 1, 0, 0, layer_out );
326 calc_activ_func( layer_out, _w.data.db + _w.rows*_w.cols );
327
328 CV_SWAP( layer_in, layer_out, temp );
329 }
330
331 cvGetRows( _outputs, layer_out, i, i + dn );
332 scale_output( layer_in, layer_out );
333 }
334
335 __END__;
336
337 return 0.f;
338 }
339
340
scale_input(const CvMat * _src,CvMat * _dst) const341 void CvANN_MLP::scale_input( const CvMat* _src, CvMat* _dst ) const
342 {
343 int i, j, cols = _src->cols;
344 double* dst = _dst->data.db;
345 const double* w = weights[0];
346 int step = _src->step;
347
348 if( CV_MAT_TYPE( _src->type ) == CV_32F )
349 {
350 const float* src = _src->data.fl;
351 step /= sizeof(src[0]);
352
353 for( i = 0; i < _src->rows; i++, src += step, dst += cols )
354 for( j = 0; j < cols; j++ )
355 dst[j] = src[j]*w[j*2] + w[j*2+1];
356 }
357 else
358 {
359 const double* src = _src->data.db;
360 step /= sizeof(src[0]);
361
362 for( i = 0; i < _src->rows; i++, src += step, dst += cols )
363 for( j = 0; j < cols; j++ )
364 dst[j] = src[j]*w[j*2] + w[j*2+1];
365 }
366 }
367
368
scale_output(const CvMat * _src,CvMat * _dst) const369 void CvANN_MLP::scale_output( const CvMat* _src, CvMat* _dst ) const
370 {
371 int i, j, cols = _src->cols;
372 const double* src = _src->data.db;
373 const double* w = weights[layer_sizes->cols];
374 int step = _dst->step;
375
376 if( CV_MAT_TYPE( _dst->type ) == CV_32F )
377 {
378 float* dst = _dst->data.fl;
379 step /= sizeof(dst[0]);
380
381 for( i = 0; i < _src->rows; i++, src += cols, dst += step )
382 for( j = 0; j < cols; j++ )
383 dst[j] = (float)(src[j]*w[j*2] + w[j*2+1]);
384 }
385 else
386 {
387 double* dst = _dst->data.db;
388 step /= sizeof(dst[0]);
389
390 for( i = 0; i < _src->rows; i++, src += cols, dst += step )
391 for( j = 0; j < cols; j++ )
392 dst[j] = src[j]*w[j*2] + w[j*2+1];
393 }
394 }
395
396
calc_activ_func(CvMat * sums,const double * bias) const397 void CvANN_MLP::calc_activ_func( CvMat* sums, const double* bias ) const
398 {
399 int i, j, n = sums->rows, cols = sums->cols;
400 double* data = sums->data.db;
401 double scale = 0, scale2 = f_param2;
402
403 switch( activ_func )
404 {
405 case IDENTITY:
406 scale = 1.;
407 break;
408 case SIGMOID_SYM:
409 scale = -f_param1;
410 break;
411 case GAUSSIAN:
412 scale = -f_param1*f_param1;
413 break;
414 default:
415 ;
416 }
417
418 assert( CV_IS_MAT_CONT(sums->type) );
419
420 if( activ_func != GAUSSIAN )
421 {
422 for( i = 0; i < n; i++, data += cols )
423 for( j = 0; j < cols; j++ )
424 data[j] = (data[j] + bias[j])*scale;
425
426 if( activ_func == IDENTITY )
427 return;
428 }
429 else
430 {
431 for( i = 0; i < n; i++, data += cols )
432 for( j = 0; j < cols; j++ )
433 {
434 double t = data[j] + bias[j];
435 data[j] = t*t*scale;
436 }
437 }
438
439 cvExp( sums, sums );
440
441 n *= cols;
442 data -= n;
443
444 switch( activ_func )
445 {
446 case SIGMOID_SYM:
447 for( i = 0; i <= n - 4; i += 4 )
448 {
449 double x0 = 1.+data[i], x1 = 1.+data[i+1], x2 = 1.+data[i+2], x3 = 1.+data[i+3];
450 double a = x0*x1, b = x2*x3, d = scale2/(a*b), t0, t1;
451 a *= d; b *= d;
452 t0 = (2 - x0)*b*x1; t1 = (2 - x1)*b*x0;
453 data[i] = t0; data[i+1] = t1;
454 t0 = (2 - x2)*a*x3; t1 = (2 - x3)*a*x2;
455 data[i+2] = t0; data[i+3] = t1;
456 }
457
458 for( ; i < n; i++ )
459 {
460 double t = scale2*(1. - data[i])/(1. + data[i]);
461 data[i] = t;
462 }
463 break;
464
465 case GAUSSIAN:
466 for( i = 0; i < n; i++ )
467 data[i] = scale2*data[i];
468 break;
469
470 default:
471 ;
472 }
473 }
474
475
calc_activ_func_deriv(CvMat * _xf,CvMat * _df,const double * bias) const476 void CvANN_MLP::calc_activ_func_deriv( CvMat* _xf, CvMat* _df,
477 const double* bias ) const
478 {
479 int i, j, n = _xf->rows, cols = _xf->cols;
480 double* xf = _xf->data.db;
481 double* df = _df->data.db;
482 double scale, scale2 = f_param2;
483 assert( CV_IS_MAT_CONT( _xf->type & _df->type ) );
484
485 if( activ_func == IDENTITY )
486 {
487 for( i = 0; i < n; i++, xf += cols, df += cols )
488 for( j = 0; j < cols; j++ )
489 {
490 xf[j] += bias[j];
491 df[j] = 1;
492 }
493 return;
494 }
495 else if( activ_func == GAUSSIAN )
496 {
497 scale = -f_param1*f_param1;
498 scale2 *= scale;
499 for( i = 0; i < n; i++, xf += cols, df += cols )
500 for( j = 0; j < cols; j++ )
501 {
502 double t = xf[j] + bias[j];
503 df[j] = t*2*scale2;
504 xf[j] = t*t*scale;
505 }
506 }
507 else
508 {
509 scale = -f_param1;
510 for( i = 0; i < n; i++, xf += cols, df += cols )
511 for( j = 0; j < cols; j++ )
512 xf[j] = (xf[j] + bias[j])*scale;
513 }
514
515 cvExp( _xf, _xf );
516
517 n *= cols;
518 xf -= n; df -= n;
519
520 // ((1+exp(-ax))^-1)'=a*((1+exp(-ax))^-2)*exp(-ax);
521 // ((1-exp(-ax))/(1+exp(-ax)))'=(a*exp(-ax)*(1+exp(-ax)) + a*exp(-ax)*(1-exp(-ax)))/(1+exp(-ax))^2=
522 // 2*a*exp(-ax)/(1+exp(-ax))^2
523 switch( activ_func )
524 {
525 case SIGMOID_SYM:
526 scale *= -2*f_param2;
527 for( i = 0; i <= n - 4; i += 4 )
528 {
529 double x0 = 1.+xf[i], x1 = 1.+xf[i+1], x2 = 1.+xf[i+2], x3 = 1.+xf[i+3];
530 double a = x0*x1, b = x2*x3, d = 1./(a*b), t0, t1;
531 a *= d; b *= d;
532
533 t0 = b*x1; t1 = b*x0;
534 df[i] = scale*xf[i]*t0*t0;
535 df[i+1] = scale*xf[i+1]*t1*t1;
536 t0 *= scale2*(2 - x0); t1 *= scale2*(2 - x1);
537 xf[i] = t0; xf[i+1] = t1;
538
539 t0 = a*x3; t1 = a*x2;
540 df[i+2] = scale*xf[i+2]*t0*t0;
541 df[i+3] = scale*xf[i+3]*t1*t1;
542 t0 *= scale2*(2 - x2); t1 *= scale2*(2 - x3);
543 xf[i+2] = t0; xf[i+3] = t1;
544 }
545
546 for( ; i < n; i++ )
547 {
548 double t0 = 1./(1. + xf[i]);
549 double t1 = scale*xf[i]*t0*t0;
550 t0 *= scale2*(1. - xf[i]);
551 df[i] = t1;
552 xf[i] = t0;
553 }
554 break;
555
556 case GAUSSIAN:
557 for( i = 0; i < n; i++ )
558 df[i] *= xf[i];
559 break;
560 default:
561 ;
562 }
563 }
564
565
calc_input_scale(const CvVectors * vecs,int flags)566 void CvANN_MLP::calc_input_scale( const CvVectors* vecs, int flags )
567 {
568 bool reset_weights = (flags & UPDATE_WEIGHTS) == 0;
569 bool no_scale = (flags & NO_INPUT_SCALE) != 0;
570 double* scale = weights[0];
571 int count = vecs->count;
572
573 if( reset_weights )
574 {
575 int i, j, vcount = layer_sizes->data.i[0];
576 int type = vecs->type;
577 double a = no_scale ? 1. : 0.;
578
579 for( j = 0; j < vcount; j++ )
580 scale[2*j] = a, scale[j*2+1] = 0.;
581
582 if( no_scale )
583 return;
584
585 for( i = 0; i < count; i++ )
586 {
587 const float* f = vecs->data.fl[i];
588 const double* d = vecs->data.db[i];
589 for( j = 0; j < vcount; j++ )
590 {
591 double t = type == CV_32F ? (double)f[j] : d[j];
592 scale[j*2] += t;
593 scale[j*2+1] += t*t;
594 }
595 }
596
597 for( j = 0; j < vcount; j++ )
598 {
599 double s = scale[j*2], s2 = scale[j*2+1];
600 double m = s/count, sigma2 = s2/count - m*m;
601 scale[j*2] = sigma2 < DBL_EPSILON ? 1 : 1./sqrt(sigma2);
602 scale[j*2+1] = -m*scale[j*2];
603 }
604 }
605 }
606
607
calc_output_scale(const CvVectors * vecs,int flags)608 void CvANN_MLP::calc_output_scale( const CvVectors* vecs, int flags )
609 {
610 int i, j, vcount = layer_sizes->data.i[layer_sizes->cols-1];
611 int type = vecs->type;
612 double m = min_val, M = max_val, m1 = min_val1, M1 = max_val1;
613 bool reset_weights = (flags & UPDATE_WEIGHTS) == 0;
614 bool no_scale = (flags & NO_OUTPUT_SCALE) != 0;
615 int l_count = layer_sizes->cols;
616 double* scale = weights[l_count];
617 double* inv_scale = weights[l_count+1];
618 int count = vecs->count;
619
620 CV_FUNCNAME( "CvANN_MLP::calc_output_scale" );
621
622 __BEGIN__;
623
624 if( reset_weights )
625 {
626 double a0 = no_scale ? 1 : DBL_MAX, b0 = no_scale ? 0 : -DBL_MAX;
627
628 for( j = 0; j < vcount; j++ )
629 {
630 scale[2*j] = inv_scale[2*j] = a0;
631 scale[j*2+1] = inv_scale[2*j+1] = b0;
632 }
633
634 if( no_scale )
635 EXIT;
636 }
637
638 for( i = 0; i < count; i++ )
639 {
640 const float* f = vecs->data.fl[i];
641 const double* d = vecs->data.db[i];
642
643 for( j = 0; j < vcount; j++ )
644 {
645 double t = type == CV_32F ? (double)f[j] : d[j];
646
647 if( reset_weights )
648 {
649 double mj = scale[j*2], Mj = scale[j*2+1];
650 if( mj > t ) mj = t;
651 if( Mj < t ) Mj = t;
652
653 scale[j*2] = mj;
654 scale[j*2+1] = Mj;
655 }
656 else
657 {
658 t = t*scale[j*2] + scale[2*j+1];
659 if( t < m1 || t > M1 )
660 CV_ERROR( CV_StsOutOfRange,
661 "Some of new output training vector components run exceed the original range too much" );
662 }
663 }
664 }
665
666 if( reset_weights )
667 for( j = 0; j < vcount; j++ )
668 {
669 // map mj..Mj to m..M
670 double mj = scale[j*2], Mj = scale[j*2+1];
671 double a, b;
672 double delta = Mj - mj;
673 if( delta < DBL_EPSILON )
674 a = 1, b = (M + m - Mj - mj)*0.5;
675 else
676 a = (M - m)/delta, b = m - mj*a;
677 inv_scale[j*2] = a; inv_scale[j*2+1] = b;
678 a = 1./a; b = -b*a;
679 scale[j*2] = a; scale[j*2+1] = b;
680 }
681
682 __END__;
683 }
684
685
prepare_to_train(const CvMat * _inputs,const CvMat * _outputs,const CvMat * _sample_weights,const CvMat * _sample_idx,CvVectors * _ivecs,CvVectors * _ovecs,double ** _sw,int _flags)686 bool CvANN_MLP::prepare_to_train( const CvMat* _inputs, const CvMat* _outputs,
687 const CvMat* _sample_weights, const CvMat* _sample_idx,
688 CvVectors* _ivecs, CvVectors* _ovecs, double** _sw, int _flags )
689 {
690 bool ok = false;
691 CvMat* sample_idx = 0;
692 CvVectors ivecs, ovecs;
693 double* sw = 0;
694 int count = 0;
695
696 CV_FUNCNAME( "CvANN_MLP::prepare_to_train" );
697
698 ivecs.data.ptr = ovecs.data.ptr = 0;
699 assert( _ivecs && _ovecs );
700
701 __BEGIN__;
702
703 const int* sidx = 0;
704 int i, sw_type = 0, sw_count = 0;
705 int sw_step = 0;
706 double sw_sum = 0;
707
708 if( !layer_sizes )
709 CV_ERROR( CV_StsError,
710 "The network has not been created. Use method create or the appropriate constructor" );
711
712 if( !CV_IS_MAT(_inputs) || CV_MAT_TYPE(_inputs->type) != CV_32FC1 &&
713 CV_MAT_TYPE(_inputs->type) != CV_64FC1 || _inputs->cols != layer_sizes->data.i[0] )
714 CV_ERROR( CV_StsBadArg,
715 "input training data should be a floating-point matrix with"
716 "the number of rows equal to the number of training samples and "
717 "the number of columns equal to the size of 0-th (input) layer" );
718
719 if( !CV_IS_MAT(_outputs) || CV_MAT_TYPE(_outputs->type) != CV_32FC1 &&
720 CV_MAT_TYPE(_outputs->type) != CV_64FC1 ||
721 _outputs->cols != layer_sizes->data.i[layer_sizes->cols - 1] )
722 CV_ERROR( CV_StsBadArg,
723 "output training data should be a floating-point matrix with"
724 "the number of rows equal to the number of training samples and "
725 "the number of columns equal to the size of last (output) layer" );
726
727 if( _inputs->rows != _outputs->rows )
728 CV_ERROR( CV_StsUnmatchedSizes, "The numbers of input and output samples do not match" );
729
730 if( _sample_idx )
731 {
732 CV_CALL( sample_idx = cvPreprocessIndexArray( _sample_idx, _inputs->rows ));
733 sidx = sample_idx->data.i;
734 count = sample_idx->cols + sample_idx->rows - 1;
735 }
736 else
737 count = _inputs->rows;
738
739 if( _sample_weights )
740 {
741 if( !CV_IS_MAT(_sample_weights) )
742 CV_ERROR( CV_StsBadArg, "sample_weights (if passed) must be a valid matrix" );
743
744 sw_type = CV_MAT_TYPE(_sample_weights->type);
745 sw_count = _sample_weights->cols + _sample_weights->rows - 1;
746
747 if( sw_type != CV_32FC1 && sw_type != CV_64FC1 ||
748 _sample_weights->cols != 1 && _sample_weights->rows != 1 ||
749 sw_count != count && sw_count != _inputs->rows )
750 CV_ERROR( CV_StsBadArg,
751 "sample_weights must be 1d floating-point vector containing weights "
752 "of all or selected training samples" );
753
754 sw_step = CV_IS_MAT_CONT(_sample_weights->type) ? 1 :
755 _sample_weights->step/CV_ELEM_SIZE(sw_type);
756
757 CV_CALL( sw = (double*)cvAlloc( count*sizeof(sw[0]) ));
758 }
759
760 CV_CALL( ivecs.data.ptr = (uchar**)cvAlloc( count*sizeof(ivecs.data.ptr[0]) ));
761 CV_CALL( ovecs.data.ptr = (uchar**)cvAlloc( count*sizeof(ovecs.data.ptr[0]) ));
762
763 ivecs.type = CV_MAT_TYPE(_inputs->type);
764 ovecs.type = CV_MAT_TYPE(_outputs->type);
765 ivecs.count = ovecs.count = count;
766
767 for( i = 0; i < count; i++ )
768 {
769 int idx = sidx ? sidx[i] : i;
770 ivecs.data.ptr[i] = _inputs->data.ptr + idx*_inputs->step;
771 ovecs.data.ptr[i] = _outputs->data.ptr + idx*_outputs->step;
772 if( sw )
773 {
774 int si = sw_count == count ? i : idx;
775 double w = sw_type == CV_32FC1 ?
776 (double)_sample_weights->data.fl[si*sw_step] :
777 _sample_weights->data.db[si*sw_step];
778 sw[i] = w;
779 if( w < 0 )
780 CV_ERROR( CV_StsOutOfRange, "some of sample weights are negative" );
781 sw_sum += w;
782 }
783 }
784
785 // normalize weights
786 if( sw )
787 {
788 sw_sum = sw_sum > DBL_EPSILON ? 1./sw_sum : 0;
789 for( i = 0; i < count; i++ )
790 sw[i] *= sw_sum;
791 }
792
793 calc_input_scale( &ivecs, _flags );
794 CV_CALL( calc_output_scale( &ovecs, _flags ));
795
796 ok = true;
797
798 __END__;
799
800 if( !ok )
801 {
802 cvFree( &ivecs.data.ptr );
803 cvFree( &ovecs.data.ptr );
804 cvFree( &sw );
805 }
806
807 cvReleaseMat( &sample_idx );
808 *_ivecs = ivecs;
809 *_ovecs = ovecs;
810 *_sw = sw;
811
812 return ok;
813 }
814
815
train(const CvMat * _inputs,const CvMat * _outputs,const CvMat * _sample_weights,const CvMat * _sample_idx,CvANN_MLP_TrainParams _params,int flags)816 int CvANN_MLP::train( const CvMat* _inputs, const CvMat* _outputs,
817 const CvMat* _sample_weights, const CvMat* _sample_idx,
818 CvANN_MLP_TrainParams _params, int flags )
819 {
820 const int MAX_ITER = 1000;
821 const double DEFAULT_EPSILON = FLT_EPSILON;
822
823 double* sw = 0;
824 CvVectors x0, u;
825 int iter = -1;
826
827 x0.data.ptr = u.data.ptr = 0;
828
829 CV_FUNCNAME( "CvANN_MLP::train" );
830
831 __BEGIN__;
832
833 int max_iter;
834 double epsilon;
835
836 params = _params;
837
838 // initialize training data
839 CV_CALL( prepare_to_train( _inputs, _outputs, _sample_weights,
840 _sample_idx, &x0, &u, &sw, flags ));
841
842 // ... and link weights
843 if( !(flags & UPDATE_WEIGHTS) )
844 init_weights();
845
846 max_iter = params.term_crit.type & CV_TERMCRIT_ITER ? params.term_crit.max_iter : MAX_ITER;
847 max_iter = MIN( max_iter, MAX_ITER );
848 max_iter = MAX( max_iter, 1 );
849
850 epsilon = params.term_crit.type & CV_TERMCRIT_EPS ? params.term_crit.epsilon : DEFAULT_EPSILON;
851 epsilon = MAX(epsilon, DBL_EPSILON);
852
853 params.term_crit.type = CV_TERMCRIT_ITER + CV_TERMCRIT_EPS;
854 params.term_crit.max_iter = max_iter;
855 params.term_crit.epsilon = epsilon;
856
857 if( params.train_method == CvANN_MLP_TrainParams::BACKPROP )
858 {
859 CV_CALL( iter = train_backprop( x0, u, sw ));
860 }
861 else
862 {
863 CV_CALL( iter = train_rprop( x0, u, sw ));
864 }
865
866 __END__;
867
868 cvFree( &x0.data.ptr );
869 cvFree( &u.data.ptr );
870 cvFree( &sw );
871
872 return iter;
873 }
874
875
train_backprop(CvVectors x0,CvVectors u,const double * sw)876 int CvANN_MLP::train_backprop( CvVectors x0, CvVectors u, const double* sw )
877 {
878 CvMat* dw = 0;
879 CvMat* buf = 0;
880 double **x = 0, **df = 0;
881 CvMat* _idx = 0;
882 int iter = -1, count = x0.count;
883
884 CV_FUNCNAME( "CvANN_MLP::train_backprop" );
885
886 __BEGIN__;
887
888 int i, j, k, ivcount, ovcount, l_count, total = 0, max_iter;
889 double *buf_ptr;
890 double prev_E = DBL_MAX*0.5, E = 0, epsilon;
891
892 max_iter = params.term_crit.max_iter*count;
893 epsilon = params.term_crit.epsilon*count;
894
895 l_count = layer_sizes->cols;
896 ivcount = layer_sizes->data.i[0];
897 ovcount = layer_sizes->data.i[l_count-1];
898
899 // allocate buffers
900 for( i = 0; i < l_count; i++ )
901 total += layer_sizes->data.i[i] + 1;
902
903 CV_CALL( dw = cvCreateMat( wbuf->rows, wbuf->cols, wbuf->type ));
904 cvZero( dw );
905 CV_CALL( buf = cvCreateMat( 1, (total + max_count)*2, CV_64F ));
906 CV_CALL( _idx = cvCreateMat( 1, count, CV_32SC1 ));
907 for( i = 0; i < count; i++ )
908 _idx->data.i[i] = i;
909
910 CV_CALL( x = (double**)cvAlloc( total*2*sizeof(x[0]) ));
911 df = x + total;
912 buf_ptr = buf->data.db;
913
914 for( j = 0; j < l_count; j++ )
915 {
916 x[j] = buf_ptr;
917 df[j] = x[j] + layer_sizes->data.i[j];
918 buf_ptr += (df[j] - x[j])*2;
919 }
920
921 // run back-propagation loop
922 /*
923 y_i = w_i*x_{i-1}
924 x_i = f(y_i)
925 E = 1/2*||u - x_N||^2
926 grad_N = (x_N - u)*f'(y_i)
927 dw_i(t) = momentum*dw_i(t-1) + dw_scale*x_{i-1}*grad_i
928 w_i(t+1) = w_i(t) + dw_i(t)
929 grad_{i-1} = w_i^t*grad_i
930 */
931 for( iter = 0; iter < max_iter; iter++ )
932 {
933 int idx = iter % count;
934 double* w = weights[0];
935 double sweight = sw ? count*sw[idx] : 1.;
936 CvMat _w, _dw, hdr1, hdr2, ghdr1, ghdr2, _df;
937 CvMat *x1 = &hdr1, *x2 = &hdr2, *grad1 = &ghdr1, *grad2 = &ghdr2, *temp;
938
939 if( idx == 0 )
940 {
941 if( fabs(prev_E - E) < epsilon )
942 break;
943 prev_E = E;
944 E = 0;
945
946 // shuffle indices
947 for( i = 0; i < count; i++ )
948 {
949 int tt;
950 j = (unsigned)cvRandInt(&rng) % count;
951 k = (unsigned)cvRandInt(&rng) % count;
952 CV_SWAP( _idx->data.i[j], _idx->data.i[k], tt );
953 }
954 }
955
956 idx = _idx->data.i[idx];
957
958 if( x0.type == CV_32F )
959 {
960 const float* x0data = x0.data.fl[idx];
961 for( j = 0; j < ivcount; j++ )
962 x[0][j] = x0data[j]*w[j*2] + w[j*2 + 1];
963 }
964 else
965 {
966 const double* x0data = x0.data.db[idx];
967 for( j = 0; j < ivcount; j++ )
968 x[0][j] = x0data[j]*w[j*2] + w[j*2 + 1];
969 }
970
971 cvInitMatHeader( x1, 1, ivcount, CV_64F, x[0] );
972
973 // forward pass, compute y[i]=w*x[i-1], x[i]=f(y[i]), df[i]=f'(y[i])
974 for( i = 1; i < l_count; i++ )
975 {
976 cvInitMatHeader( x2, 1, layer_sizes->data.i[i], CV_64F, x[i] );
977 cvInitMatHeader( &_w, x1->cols, x2->cols, CV_64F, weights[i] );
978 cvGEMM( x1, &_w, 1, 0, 0, x2 );
979 _df = *x2;
980 _df.data.db = df[i];
981 calc_activ_func_deriv( x2, &_df, _w.data.db + _w.rows*_w.cols );
982 CV_SWAP( x1, x2, temp );
983 }
984
985 cvInitMatHeader( grad1, 1, ovcount, CV_64F, buf_ptr );
986 *grad2 = *grad1;
987 grad2->data.db = buf_ptr + max_count;
988
989 w = weights[l_count+1];
990
991 // calculate error
992 if( u.type == CV_32F )
993 {
994 const float* udata = u.data.fl[idx];
995 for( k = 0; k < ovcount; k++ )
996 {
997 double t = udata[k]*w[k*2] + w[k*2+1] - x[l_count-1][k];
998 grad1->data.db[k] = t*sweight;
999 E += t*t;
1000 }
1001 }
1002 else
1003 {
1004 const double* udata = u.data.db[idx];
1005 for( k = 0; k < ovcount; k++ )
1006 {
1007 double t = udata[k]*w[k*2] + w[k*2+1] - x[l_count-1][k];
1008 grad1->data.db[k] = t*sweight;
1009 E += t*t;
1010 }
1011 }
1012 E *= sweight;
1013
1014 // backward pass, update weights
1015 for( i = l_count-1; i > 0; i-- )
1016 {
1017 int n1 = layer_sizes->data.i[i-1], n2 = layer_sizes->data.i[i];
1018 cvInitMatHeader( &_df, 1, n2, CV_64F, df[i] );
1019 cvMul( grad1, &_df, grad1 );
1020 cvInitMatHeader( &_w, n1+1, n2, CV_64F, weights[i] );
1021 cvInitMatHeader( &_dw, n1+1, n2, CV_64F, dw->data.db + (weights[i] - weights[0]) );
1022 cvInitMatHeader( x1, n1+1, 1, CV_64F, x[i-1] );
1023 x[i-1][n1] = 1.;
1024 cvGEMM( x1, grad1, params.bp_dw_scale, &_dw, params.bp_moment_scale, &_dw );
1025 cvAdd( &_w, &_dw, &_w );
1026 if( i > 1 )
1027 {
1028 grad2->cols = n1;
1029 _w.rows = n1;
1030 cvGEMM( grad1, &_w, 1, 0, 0, grad2, CV_GEMM_B_T );
1031 }
1032 CV_SWAP( grad1, grad2, temp );
1033 }
1034 }
1035
1036 iter /= count;
1037
1038 __END__;
1039
1040 cvReleaseMat( &dw );
1041 cvReleaseMat( &buf );
1042 cvReleaseMat( &_idx );
1043 cvFree( &x );
1044
1045 return iter;
1046 }
1047
1048
train_rprop(CvVectors x0,CvVectors u,const double * sw)1049 int CvANN_MLP::train_rprop( CvVectors x0, CvVectors u, const double* sw )
1050 {
1051 const int max_buf_sz = 1 << 16;
1052 CvMat* dw = 0;
1053 CvMat* dEdw = 0;
1054 CvMat* prev_dEdw_sign = 0;
1055 CvMat* buf = 0;
1056 double **x = 0, **df = 0;
1057 int iter = -1, count = x0.count;
1058
1059 CV_FUNCNAME( "CvANN_MLP::train" );
1060
1061 __BEGIN__;
1062
1063 int i, ivcount, ovcount, l_count, total = 0, max_iter, buf_sz, dcount0, dcount=0;
1064 double *buf_ptr;
1065 double prev_E = DBL_MAX*0.5, epsilon;
1066 double dw_plus, dw_minus, dw_min, dw_max;
1067 double inv_count;
1068
1069 max_iter = params.term_crit.max_iter;
1070 epsilon = params.term_crit.epsilon;
1071 dw_plus = params.rp_dw_plus;
1072 dw_minus = params.rp_dw_minus;
1073 dw_min = params.rp_dw_min;
1074 dw_max = params.rp_dw_max;
1075
1076 l_count = layer_sizes->cols;
1077 ivcount = layer_sizes->data.i[0];
1078 ovcount = layer_sizes->data.i[l_count-1];
1079
1080 // allocate buffers
1081 for( i = 0; i < l_count; i++ )
1082 total += layer_sizes->data.i[i];
1083
1084 CV_CALL( dw = cvCreateMat( wbuf->rows, wbuf->cols, wbuf->type ));
1085 cvSet( dw, cvScalarAll(params.rp_dw0) );
1086 CV_CALL( dEdw = cvCreateMat( wbuf->rows, wbuf->cols, wbuf->type ));
1087 cvZero( dEdw );
1088 CV_CALL( prev_dEdw_sign = cvCreateMat( wbuf->rows, wbuf->cols, CV_8SC1 ));
1089 cvZero( prev_dEdw_sign );
1090
1091 inv_count = 1./count;
1092 dcount0 = max_buf_sz/(2*total);
1093 dcount0 = MAX( dcount0, 1 );
1094 dcount0 = MIN( dcount0, count );
1095 buf_sz = dcount0*(total + max_count)*2;
1096
1097 CV_CALL( buf = cvCreateMat( 1, buf_sz, CV_64F ));
1098
1099 CV_CALL( x = (double**)cvAlloc( total*2*sizeof(x[0]) ));
1100 df = x + total;
1101 buf_ptr = buf->data.db;
1102
1103 for( i = 0; i < l_count; i++ )
1104 {
1105 x[i] = buf_ptr;
1106 df[i] = x[i] + layer_sizes->data.i[i]*dcount0;
1107 buf_ptr += (df[i] - x[i])*2;
1108 }
1109
1110 // run rprop loop
1111 /*
1112 y_i(t) = w_i(t)*x_{i-1}(t)
1113 x_i(t) = f(y_i(t))
1114 E = sum_over_all_samples(1/2*||u - x_N||^2)
1115 grad_N = (x_N - u)*f'(y_i)
1116
1117 MIN(dw_i{jk}(t)*dw_plus, dw_max), if dE/dw_i{jk}(t)*dE/dw_i{jk}(t-1) > 0
1118 dw_i{jk}(t) = MAX(dw_i{jk}(t)*dw_minus, dw_min), if dE/dw_i{jk}(t)*dE/dw_i{jk}(t-1) < 0
1119 dw_i{jk}(t-1) else
1120
1121 if (dE/dw_i{jk}(t)*dE/dw_i{jk}(t-1) < 0)
1122 dE/dw_i{jk}(t)<-0
1123 else
1124 w_i{jk}(t+1) = w_i{jk}(t) + dw_i{jk}(t)
1125 grad_{i-1}(t) = w_i^t(t)*grad_i(t)
1126 */
1127 for( iter = 0; iter < max_iter; iter++ )
1128 {
1129 int n1, n2, si, j, k;
1130 double* w;
1131 CvMat _w, _dEdw, hdr1, hdr2, ghdr1, ghdr2, _df;
1132 CvMat *x1, *x2, *grad1, *grad2, *temp;
1133 double E = 0;
1134
1135 // first, iterate through all the samples and compute dEdw
1136 for( si = 0; si < count; si += dcount )
1137 {
1138 dcount = MIN( count - si, dcount0 );
1139 w = weights[0];
1140 grad1 = &ghdr1; grad2 = &ghdr2;
1141 x1 = &hdr1; x2 = &hdr2;
1142
1143 // grab and preprocess input data
1144 if( x0.type == CV_32F )
1145 for( i = 0; i < dcount; i++ )
1146 {
1147 const float* x0data = x0.data.fl[si+i];
1148 double* xdata = x[0]+i*ivcount;
1149 for( j = 0; j < ivcount; j++ )
1150 xdata[j] = x0data[j]*w[j*2] + w[j*2+1];
1151 }
1152 else
1153 for( i = 0; i < dcount; i++ )
1154 {
1155 const double* x0data = x0.data.db[si+i];
1156 double* xdata = x[0]+i*ivcount;
1157 for( j = 0; j < ivcount; j++ )
1158 xdata[j] = x0data[j]*w[j*2] + w[j*2+1];
1159 }
1160
1161 cvInitMatHeader( x1, dcount, ivcount, CV_64F, x[0] );
1162
1163 // forward pass, compute y[i]=w*x[i-1], x[i]=f(y[i]), df[i]=f'(y[i])
1164 for( i = 1; i < l_count; i++ )
1165 {
1166 cvInitMatHeader( x2, dcount, layer_sizes->data.i[i], CV_64F, x[i] );
1167 cvInitMatHeader( &_w, x1->cols, x2->cols, CV_64F, weights[i] );
1168 cvGEMM( x1, &_w, 1, 0, 0, x2 );
1169 _df = *x2;
1170 _df.data.db = df[i];
1171 calc_activ_func_deriv( x2, &_df, _w.data.db + _w.rows*_w.cols );
1172 CV_SWAP( x1, x2, temp );
1173 }
1174
1175 cvInitMatHeader( grad1, dcount, ovcount, CV_64F, buf_ptr );
1176 w = weights[l_count+1];
1177 grad2->data.db = buf_ptr + max_count*dcount;
1178
1179 // calculate error
1180 if( u.type == CV_32F )
1181 for( i = 0; i < dcount; i++ )
1182 {
1183 const float* udata = u.data.fl[si+i];
1184 const double* xdata = x[l_count-1] + i*ovcount;
1185 double* gdata = grad1->data.db + i*ovcount;
1186 double sweight = sw ? sw[si+i] : inv_count, E1 = 0;
1187
1188 for( j = 0; j < ovcount; j++ )
1189 {
1190 double t = udata[j]*w[j*2] + w[j*2+1] - xdata[j];
1191 gdata[j] = t*sweight;
1192 E1 += t*t;
1193 }
1194 E += sweight*E1;
1195 }
1196 else
1197 for( i = 0; i < dcount; i++ )
1198 {
1199 const double* udata = u.data.db[si+i];
1200 const double* xdata = x[l_count-1] + i*ovcount;
1201 double* gdata = grad1->data.db + i*ovcount;
1202 double sweight = sw ? sw[si+i] : inv_count, E1 = 0;
1203
1204 for( j = 0; j < ovcount; j++ )
1205 {
1206 double t = udata[j]*w[j*2] + w[j*2+1] - xdata[j];
1207 gdata[j] = t*sweight;
1208 E1 += t*t;
1209 }
1210 E += sweight*E1;
1211 }
1212
1213 // backward pass, update dEdw
1214 for( i = l_count-1; i > 0; i-- )
1215 {
1216 n1 = layer_sizes->data.i[i-1]; n2 = layer_sizes->data.i[i];
1217 cvInitMatHeader( &_df, dcount, n2, CV_64F, df[i] );
1218 cvMul( grad1, &_df, grad1 );
1219 cvInitMatHeader( &_dEdw, n1, n2, CV_64F, dEdw->data.db+(weights[i]-weights[0]) );
1220 cvInitMatHeader( x1, dcount, n1, CV_64F, x[i-1] );
1221 cvGEMM( x1, grad1, 1, &_dEdw, 1, &_dEdw, CV_GEMM_A_T );
1222 // update bias part of dEdw
1223 for( k = 0; k < dcount; k++ )
1224 {
1225 double* dst = _dEdw.data.db + n1*n2;
1226 const double* src = grad1->data.db + k*n2;
1227 for( j = 0; j < n2; j++ )
1228 dst[j] += src[j];
1229 }
1230 cvInitMatHeader( &_w, n1, n2, CV_64F, weights[i] );
1231 cvInitMatHeader( grad2, dcount, n1, CV_64F, grad2->data.db );
1232
1233 if( i > 1 )
1234 cvGEMM( grad1, &_w, 1, 0, 0, grad2, CV_GEMM_B_T );
1235 CV_SWAP( grad1, grad2, temp );
1236 }
1237 }
1238
1239 // now update weights
1240 for( i = 1; i < l_count; i++ )
1241 {
1242 n1 = layer_sizes->data.i[i-1]; n2 = layer_sizes->data.i[i];
1243 for( k = 0; k <= n1; k++ )
1244 {
1245 double* wk = weights[i]+k*n2;
1246 size_t delta = wk - weights[0];
1247 double* dwk = dw->data.db + delta;
1248 double* dEdwk = dEdw->data.db + delta;
1249 char* prevEk = (char*)(prev_dEdw_sign->data.ptr + delta);
1250
1251 for( j = 0; j < n2; j++ )
1252 {
1253 double Eval = dEdwk[j];
1254 double dval = dwk[j];
1255 double wval = wk[j];
1256 int s = CV_SIGN(Eval);
1257 int ss = prevEk[j]*s;
1258 if( ss > 0 )
1259 {
1260 dval *= dw_plus;
1261 dval = MIN( dval, dw_max );
1262 dwk[j] = dval;
1263 wk[j] = wval + dval*s;
1264 }
1265 else if( ss < 0 )
1266 {
1267 dval *= dw_minus;
1268 dval = MAX( dval, dw_min );
1269 prevEk[j] = 0;
1270 dwk[j] = dval;
1271 wk[j] = wval + dval*s;
1272 }
1273 else
1274 {
1275 prevEk[j] = (char)s;
1276 wk[j] = wval + dval*s;
1277 }
1278 dEdwk[j] = 0.;
1279 }
1280 }
1281 }
1282
1283 if( fabs(prev_E - E) < epsilon )
1284 break;
1285 prev_E = E;
1286 E = 0;
1287 }
1288
1289 __END__;
1290
1291 cvReleaseMat( &dw );
1292 cvReleaseMat( &dEdw );
1293 cvReleaseMat( &prev_dEdw_sign );
1294 cvReleaseMat( &buf );
1295 cvFree( &x );
1296
1297 return iter;
1298 }
1299
1300
write_params(CvFileStorage * fs)1301 void CvANN_MLP::write_params( CvFileStorage* fs )
1302 {
1303 //CV_FUNCNAME( "CvANN_MLP::write_params" );
1304
1305 __BEGIN__;
1306
1307 const char* activ_func_name = activ_func == IDENTITY ? "IDENTITY" :
1308 activ_func == SIGMOID_SYM ? "SIGMOID_SYM" :
1309 activ_func == GAUSSIAN ? "GAUSSIAN" : 0;
1310
1311 if( activ_func_name )
1312 cvWriteString( fs, "activation_function", activ_func_name );
1313 else
1314 cvWriteInt( fs, "activation_function", activ_func );
1315
1316 if( activ_func != IDENTITY )
1317 {
1318 cvWriteReal( fs, "f_param1", f_param1 );
1319 cvWriteReal( fs, "f_param2", f_param2 );
1320 }
1321
1322 cvWriteReal( fs, "min_val", min_val );
1323 cvWriteReal( fs, "max_val", max_val );
1324 cvWriteReal( fs, "min_val1", min_val1 );
1325 cvWriteReal( fs, "max_val1", max_val1 );
1326
1327 cvStartWriteStruct( fs, "training_params", CV_NODE_MAP );
1328 if( params.train_method == CvANN_MLP_TrainParams::BACKPROP )
1329 {
1330 cvWriteString( fs, "train_method", "BACKPROP" );
1331 cvWriteReal( fs, "dw_scale", params.bp_dw_scale );
1332 cvWriteReal( fs, "moment_scale", params.bp_moment_scale );
1333 }
1334 else if( params.train_method == CvANN_MLP_TrainParams::RPROP )
1335 {
1336 cvWriteString( fs, "train_method", "RPROP" );
1337 cvWriteReal( fs, "dw0", params.rp_dw0 );
1338 cvWriteReal( fs, "dw_plus", params.rp_dw_plus );
1339 cvWriteReal( fs, "dw_minus", params.rp_dw_minus );
1340 cvWriteReal( fs, "dw_min", params.rp_dw_min );
1341 cvWriteReal( fs, "dw_max", params.rp_dw_max );
1342 }
1343
1344 cvStartWriteStruct( fs, "term_criteria", CV_NODE_MAP + CV_NODE_FLOW );
1345 if( params.term_crit.type & CV_TERMCRIT_EPS )
1346 cvWriteReal( fs, "epsilon", params.term_crit.epsilon );
1347 if( params.term_crit.type & CV_TERMCRIT_ITER )
1348 cvWriteInt( fs, "iterations", params.term_crit.max_iter );
1349 cvEndWriteStruct( fs );
1350
1351 cvEndWriteStruct( fs );
1352
1353 __END__;
1354 }
1355
1356
write(CvFileStorage * fs,const char * name)1357 void CvANN_MLP::write( CvFileStorage* fs, const char* name )
1358 {
1359 CV_FUNCNAME( "CvANN_MLP::write" );
1360
1361 __BEGIN__;
1362
1363 int i, l_count = layer_sizes->cols;
1364
1365 if( !layer_sizes )
1366 CV_ERROR( CV_StsError, "The network has not been initialized" );
1367
1368 cvStartWriteStruct( fs, name, CV_NODE_MAP, CV_TYPE_NAME_ML_ANN_MLP );
1369
1370 cvWrite( fs, "layer_sizes", layer_sizes );
1371
1372 write_params( fs );
1373
1374 cvStartWriteStruct( fs, "input_scale", CV_NODE_SEQ + CV_NODE_FLOW );
1375 cvWriteRawData( fs, weights[0], layer_sizes->data.i[0]*2, "d" );
1376 cvEndWriteStruct( fs );
1377
1378 cvStartWriteStruct( fs, "output_scale", CV_NODE_SEQ + CV_NODE_FLOW );
1379 cvWriteRawData( fs, weights[l_count], layer_sizes->data.i[l_count-1]*2, "d" );
1380 cvEndWriteStruct( fs );
1381
1382 cvStartWriteStruct( fs, "inv_output_scale", CV_NODE_SEQ + CV_NODE_FLOW );
1383 cvWriteRawData( fs, weights[l_count+1], layer_sizes->data.i[l_count-1]*2, "d" );
1384 cvEndWriteStruct( fs );
1385
1386 cvStartWriteStruct( fs, "weights", CV_NODE_SEQ );
1387 for( i = 1; i < l_count; i++ )
1388 {
1389 cvStartWriteStruct( fs, 0, CV_NODE_SEQ + CV_NODE_FLOW );
1390 cvWriteRawData( fs, weights[i], (layer_sizes->data.i[i-1]+1)*layer_sizes->data.i[i], "d" );
1391 cvEndWriteStruct( fs );
1392 }
1393
1394 cvEndWriteStruct( fs );
1395
1396 __END__;
1397 }
1398
1399
read_params(CvFileStorage * fs,CvFileNode * node)1400 void CvANN_MLP::read_params( CvFileStorage* fs, CvFileNode* node )
1401 {
1402 //CV_FUNCNAME( "CvANN_MLP::read_params" );
1403
1404 __BEGIN__;
1405
1406 const char* activ_func_name = cvReadStringByName( fs, node, "activation_function", 0 );
1407 CvFileNode* tparams_node;
1408
1409 if( activ_func_name )
1410 activ_func = strcmp( activ_func_name, "SIGMOID_SYM" ) == 0 ? SIGMOID_SYM :
1411 strcmp( activ_func_name, "IDENTITY" ) == 0 ? IDENTITY :
1412 strcmp( activ_func_name, "GAUSSIAN" ) == 0 ? GAUSSIAN : 0;
1413 else
1414 activ_func = cvReadIntByName( fs, node, "activation_function" );
1415
1416 f_param1 = cvReadRealByName( fs, node, "f_param1", 0 );
1417 f_param2 = cvReadRealByName( fs, node, "f_param2", 0 );
1418
1419 set_activ_func( activ_func, f_param1, f_param2 );
1420
1421 min_val = cvReadRealByName( fs, node, "min_val", 0. );
1422 max_val = cvReadRealByName( fs, node, "max_val", 1. );
1423 min_val1 = cvReadRealByName( fs, node, "min_val1", 0. );
1424 max_val1 = cvReadRealByName( fs, node, "max_val1", 1. );
1425
1426 tparams_node = cvGetFileNodeByName( fs, node, "training_params" );
1427 params = CvANN_MLP_TrainParams();
1428
1429 if( tparams_node )
1430 {
1431 const char* tmethod_name = cvReadStringByName( fs, tparams_node, "train_method", "" );
1432 CvFileNode* tcrit_node;
1433
1434 if( strcmp( tmethod_name, "BACKPROP" ) == 0 )
1435 {
1436 params.train_method = CvANN_MLP_TrainParams::BACKPROP;
1437 params.bp_dw_scale = cvReadRealByName( fs, tparams_node, "dw_scale", 0 );
1438 params.bp_moment_scale = cvReadRealByName( fs, tparams_node, "moment_scale", 0 );
1439 }
1440 else if( strcmp( tmethod_name, "RPROP" ) == 0 )
1441 {
1442 params.train_method = CvANN_MLP_TrainParams::RPROP;
1443 params.rp_dw0 = cvReadRealByName( fs, tparams_node, "dw0", 0 );
1444 params.rp_dw_plus = cvReadRealByName( fs, tparams_node, "dw_plus", 0 );
1445 params.rp_dw_minus = cvReadRealByName( fs, tparams_node, "dw_minus", 0 );
1446 params.rp_dw_min = cvReadRealByName( fs, tparams_node, "dw_min", 0 );
1447 params.rp_dw_max = cvReadRealByName( fs, tparams_node, "dw_max", 0 );
1448 }
1449
1450 tcrit_node = cvGetFileNodeByName( fs, tparams_node, "term_criteria" );
1451 if( tcrit_node )
1452 {
1453 params.term_crit.epsilon = cvReadRealByName( fs, tcrit_node, "epsilon", -1 );
1454 params.term_crit.max_iter = cvReadIntByName( fs, tcrit_node, "iterations", -1 );
1455 params.term_crit.type = (params.term_crit.epsilon >= 0 ? CV_TERMCRIT_EPS : 0) +
1456 (params.term_crit.max_iter >= 0 ? CV_TERMCRIT_ITER : 0);
1457 }
1458 }
1459
1460 __END__;
1461 }
1462
1463
read(CvFileStorage * fs,CvFileNode * node)1464 void CvANN_MLP::read( CvFileStorage* fs, CvFileNode* node )
1465 {
1466 CvMat* _layer_sizes = 0;
1467
1468 CV_FUNCNAME( "CvANN_MLP::read" );
1469
1470 __BEGIN__;
1471
1472 CvFileNode* w;
1473 CvSeqReader reader;
1474 int i, l_count;
1475
1476 _layer_sizes = (CvMat*)cvReadByName( fs, node, "layer_sizes" );
1477 CV_CALL( create( _layer_sizes, SIGMOID_SYM, 0, 0 ));
1478 l_count = layer_sizes->cols;
1479
1480 CV_CALL( read_params( fs, node ));
1481
1482 w = cvGetFileNodeByName( fs, node, "input_scale" );
1483 if( !w || CV_NODE_TYPE(w->tag) != CV_NODE_SEQ ||
1484 w->data.seq->total != layer_sizes->data.i[0]*2 )
1485 CV_ERROR( CV_StsParseError, "input_scale tag is not found or is invalid" );
1486
1487 CV_CALL( cvReadRawData( fs, w, weights[0], "d" ));
1488
1489 w = cvGetFileNodeByName( fs, node, "output_scale" );
1490 if( !w || CV_NODE_TYPE(w->tag) != CV_NODE_SEQ ||
1491 w->data.seq->total != layer_sizes->data.i[l_count-1]*2 )
1492 CV_ERROR( CV_StsParseError, "output_scale tag is not found or is invalid" );
1493
1494 CV_CALL( cvReadRawData( fs, w, weights[l_count], "d" ));
1495
1496 w = cvGetFileNodeByName( fs, node, "inv_output_scale" );
1497 if( !w || CV_NODE_TYPE(w->tag) != CV_NODE_SEQ ||
1498 w->data.seq->total != layer_sizes->data.i[l_count-1]*2 )
1499 CV_ERROR( CV_StsParseError, "inv_output_scale tag is not found or is invalid" );
1500
1501 CV_CALL( cvReadRawData( fs, w, weights[l_count+1], "d" ));
1502
1503 w = cvGetFileNodeByName( fs, node, "weights" );
1504 if( !w || CV_NODE_TYPE(w->tag) != CV_NODE_SEQ ||
1505 w->data.seq->total != l_count - 1 )
1506 CV_ERROR( CV_StsParseError, "weights tag is not found or is invalid" );
1507
1508 cvStartReadSeq( w->data.seq, &reader );
1509
1510 for( i = 1; i < l_count; i++ )
1511 {
1512 w = (CvFileNode*)reader.ptr;
1513 CV_CALL( cvReadRawData( fs, w, weights[i], "d" ));
1514 CV_NEXT_SEQ_ELEM( reader.seq->elem_size, reader );
1515 }
1516
1517 __END__;
1518 }
1519
1520 /* End of file. */
1521