• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*M///////////////////////////////////////////////////////////////////////////////////////
2 //
3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4 //
5 //  By downloading, copying, installing or using the software you agree to this license.
6 //  If you do not agree to this license, do not download, install,
7 //  copy or use the software.
8 //
9 //
10 //                           License Agreement
11 //                For Open Source Computer Vision Library
12 //
13 // Copyright (C) 2000, Intel Corporation, all rights reserved.
14 // Copyright (C) 2013, OpenCV Foundation, all rights reserved.
15 // Copyright (C) 2014, Itseez Inc, all rights reserved.
16 // Third party copyrights are property of their respective owners.
17 //
18 // Redistribution and use in source and binary forms, with or without modification,
19 // are permitted provided that the following conditions are met:
20 //
21 //   * Redistribution's of source code must retain the above copyright notice,
22 //     this list of conditions and the following disclaimer.
23 //
24 //   * Redistribution's in binary form must reproduce the above copyright notice,
25 //     this list of conditions and the following disclaimer in the documentation
26 //     and/or other materials provided with the distribution.
27 //
28 //   * The name of the copyright holders may not be used to endorse or promote products
29 //     derived from this software without specific prior written permission.
30 //
31 // This software is provided by the copyright holders and contributors "as is" and
32 // any express or implied warranties, including, but not limited to, the implied
33 // warranties of merchantability and fitness for a particular purpose are disclaimed.
34 // In no event shall the Intel Corporation or contributors be liable for any direct,
35 // indirect, incidental, special, exemplary, or consequential damages
36 // (including, but not limited to, procurement of substitute goods or services;
37 // loss of use, data, or profits; or business interruption) however caused
38 // and on any theory of liability, whether in contract, strict liability,
39 // or tort (including negligence or otherwise) arising in any way out of
40 // the use of this software, even if advised of the possibility of such damage.
41 //
42 //M*/
43 
44 #ifndef __OPENCV_ML_HPP__
45 #define __OPENCV_ML_HPP__
46 
47 #ifdef __cplusplus
48 #  include "opencv2/core.hpp"
49 #endif
50 
51 #ifdef __cplusplus
52 
53 #include <float.h>
54 #include <map>
55 #include <iostream>
56 
57 /**
58   @defgroup ml Machine Learning
59 
60   The Machine Learning Library (MLL) is a set of classes and functions for statistical
61   classification, regression, and clustering of data.
62 
63   Most of the classification and regression algorithms are implemented as C++ classes. As the
64   algorithms have different sets of features (like an ability to handle missing measurements or
65   categorical input variables), there is a little common ground between the classes. This common
66   ground is defined by the class cv::ml::StatModel that all the other ML classes are derived from.
67 
68   See detailed overview here: @ref ml_intro.
69  */
70 
71 namespace cv
72 {
73 
74 namespace ml
75 {
76 
77 //! @addtogroup ml
78 //! @{
79 
80 /** @brief Variable types */
81 enum VariableTypes
82 {
83     VAR_NUMERICAL    =0, //!< same as VAR_ORDERED
84     VAR_ORDERED      =0, //!< ordered variables
85     VAR_CATEGORICAL  =1  //!< categorical variables
86 };
87 
88 /** @brief %Error types */
89 enum ErrorTypes
90 {
91     TEST_ERROR = 0,
92     TRAIN_ERROR = 1
93 };
94 
95 /** @brief Sample types */
96 enum SampleTypes
97 {
98     ROW_SAMPLE = 0, //!< each training sample is a row of samples
99     COL_SAMPLE = 1  //!< each training sample occupies a column of samples
100 };
101 
102 /** @brief The structure represents the logarithmic grid range of statmodel parameters.
103 
104 It is used for optimizing statmodel accuracy by varying model parameters, the accuracy estimate
105 being computed by cross-validation.
106  */
107 class CV_EXPORTS ParamGrid
108 {
109 public:
110     /** @brief Default constructor */
111     ParamGrid();
112     /** @brief Constructor with parameters */
113     ParamGrid(double _minVal, double _maxVal, double _logStep);
114 
115     double minVal; //!< Minimum value of the statmodel parameter. Default value is 0.
116     double maxVal; //!< Maximum value of the statmodel parameter. Default value is 0.
117     /** @brief Logarithmic step for iterating the statmodel parameter.
118 
119     The grid determines the following iteration sequence of the statmodel parameter values:
120     \f[(minVal, minVal*step, minVal*{step}^2, \dots,  minVal*{logStep}^n),\f]
121     where \f$n\f$ is the maximal index satisfying
122     \f[\texttt{minVal} * \texttt{logStep} ^n <  \texttt{maxVal}\f]
123     The grid is logarithmic, so logStep must always be greater then 1. Default value is 1.
124     */
125     double logStep;
126 };
127 
128 /** @brief Class encapsulating training data.
129 
130 Please note that the class only specifies the interface of training data, but not implementation.
131 All the statistical model classes in _ml_ module accepts Ptr\<TrainData\> as parameter. In other
132 words, you can create your own class derived from TrainData and pass smart pointer to the instance
133 of this class into StatModel::train.
134 
135 @sa @ref ml_intro_data
136  */
137 class CV_EXPORTS_W TrainData
138 {
139 public:
missingValue()140     static inline float missingValue() { return FLT_MAX; }
141     virtual ~TrainData();
142 
143     CV_WRAP virtual int getLayout() const = 0;
144     CV_WRAP virtual int getNTrainSamples() const = 0;
145     CV_WRAP virtual int getNTestSamples() const = 0;
146     CV_WRAP virtual int getNSamples() const = 0;
147     CV_WRAP virtual int getNVars() const = 0;
148     CV_WRAP virtual int getNAllVars() const = 0;
149 
150     CV_WRAP virtual void getSample(InputArray varIdx, int sidx, float* buf) const = 0;
151     CV_WRAP virtual Mat getSamples() const = 0;
152     CV_WRAP virtual Mat getMissing() const = 0;
153 
154     /** @brief Returns matrix of train samples
155 
156     @param layout The requested layout. If it's different from the initial one, the matrix is
157         transposed. See ml::SampleTypes.
158     @param compressSamples if true, the function returns only the training samples (specified by
159         sampleIdx)
160     @param compressVars if true, the function returns the shorter training samples, containing only
161         the active variables.
162 
163     In current implementation the function tries to avoid physical data copying and returns the
164     matrix stored inside TrainData (unless the transposition or compression is needed).
165      */
166     CV_WRAP virtual Mat getTrainSamples(int layout=ROW_SAMPLE,
167                                 bool compressSamples=true,
168                                 bool compressVars=true) const = 0;
169 
170     /** @brief Returns the vector of responses
171 
172     The function returns ordered or the original categorical responses. Usually it's used in
173     regression algorithms.
174      */
175     CV_WRAP virtual Mat getTrainResponses() const = 0;
176 
177     /** @brief Returns the vector of normalized categorical responses
178 
179     The function returns vector of responses. Each response is integer from `0` to `<number of
180     classes>-1`. The actual label value can be retrieved then from the class label vector, see
181     TrainData::getClassLabels.
182      */
183     CV_WRAP virtual Mat getTrainNormCatResponses() const = 0;
184     CV_WRAP virtual Mat getTestResponses() const = 0;
185     CV_WRAP virtual Mat getTestNormCatResponses() const = 0;
186     CV_WRAP virtual Mat getResponses() const = 0;
187     CV_WRAP virtual Mat getNormCatResponses() const = 0;
188     CV_WRAP virtual Mat getSampleWeights() const = 0;
189     CV_WRAP virtual Mat getTrainSampleWeights() const = 0;
190     CV_WRAP virtual Mat getTestSampleWeights() const = 0;
191     CV_WRAP virtual Mat getVarIdx() const = 0;
192     CV_WRAP virtual Mat getVarType() const = 0;
193     CV_WRAP virtual int getResponseType() const = 0;
194     CV_WRAP virtual Mat getTrainSampleIdx() const = 0;
195     CV_WRAP virtual Mat getTestSampleIdx() const = 0;
196     CV_WRAP virtual void getValues(int vi, InputArray sidx, float* values) const = 0;
197     virtual void getNormCatValues(int vi, InputArray sidx, int* values) const = 0;
198     CV_WRAP virtual Mat getDefaultSubstValues() const = 0;
199 
200     CV_WRAP virtual int getCatCount(int vi) const = 0;
201 
202     /** @brief Returns the vector of class labels
203 
204     The function returns vector of unique labels occurred in the responses.
205      */
206     CV_WRAP virtual Mat getClassLabels() const = 0;
207 
208     CV_WRAP virtual Mat getCatOfs() const = 0;
209     CV_WRAP virtual Mat getCatMap() const = 0;
210 
211     /** @brief Splits the training data into the training and test parts
212     @sa TrainData::setTrainTestSplitRatio
213      */
214     CV_WRAP virtual void setTrainTestSplit(int count, bool shuffle=true) = 0;
215 
216     /** @brief Splits the training data into the training and test parts
217 
218     The function selects a subset of specified relative size and then returns it as the training
219     set. If the function is not called, all the data is used for training. Please, note that for
220     each of TrainData::getTrain\* there is corresponding TrainData::getTest\*, so that the test
221     subset can be retrieved and processed as well.
222     @sa TrainData::setTrainTestSplit
223      */
224     CV_WRAP virtual void setTrainTestSplitRatio(double ratio, bool shuffle=true) = 0;
225     CV_WRAP virtual void shuffleTrainTest() = 0;
226 
227     CV_WRAP static Mat getSubVector(const Mat& vec, const Mat& idx);
228 
229     /** @brief Reads the dataset from a .csv file and returns the ready-to-use training data.
230 
231     @param filename The input file name
232     @param headerLineCount The number of lines in the beginning to skip; besides the header, the
233         function also skips empty lines and lines staring with `#`
234     @param responseStartIdx Index of the first output variable. If -1, the function considers the
235         last variable as the response
236     @param responseEndIdx Index of the last output variable + 1. If -1, then there is single
237         response variable at responseStartIdx.
238     @param varTypeSpec The optional text string that specifies the variables' types. It has the
239         format `ord[n1-n2,n3,n4-n5,...]cat[n6,n7-n8,...]`. That is, variables from `n1 to n2`
240         (inclusive range), `n3`, `n4 to n5` ... are considered ordered and `n6`, `n7 to n8` ... are
241         considered as categorical. The range `[n1..n2] + [n3] + [n4..n5] + ... + [n6] + [n7..n8]`
242         should cover all the variables. If varTypeSpec is not specified, then algorithm uses the
243         following rules:
244         - all input variables are considered ordered by default. If some column contains has non-
245           numerical values, e.g. 'apple', 'pear', 'apple', 'apple', 'mango', the corresponding
246           variable is considered categorical.
247         - if there are several output variables, they are all considered as ordered. Error is
248           reported when non-numerical values are used.
249         - if there is a single output variable, then if its values are non-numerical or are all
250           integers, then it's considered categorical. Otherwise, it's considered ordered.
251     @param delimiter The character used to separate values in each line.
252     @param missch The character used to specify missing measurements. It should not be a digit.
253         Although it's a non-numerical value, it surely does not affect the decision of whether the
254         variable ordered or categorical.
255      */
256     static Ptr<TrainData> loadFromCSV(const String& filename,
257                                       int headerLineCount,
258                                       int responseStartIdx=-1,
259                                       int responseEndIdx=-1,
260                                       const String& varTypeSpec=String(),
261                                       char delimiter=',',
262                                       char missch='?');
263 
264     /** @brief Creates training data from in-memory arrays.
265 
266     @param samples matrix of samples. It should have CV_32F type.
267     @param layout see ml::SampleTypes.
268     @param responses matrix of responses. If the responses are scalar, they should be stored as a
269         single row or as a single column. The matrix should have type CV_32F or CV_32S (in the
270         former case the responses are considered as ordered by default; in the latter case - as
271         categorical)
272     @param varIdx vector specifying which variables to use for training. It can be an integer vector
273         (CV_32S) containing 0-based variable indices or byte vector (CV_8U) containing a mask of
274         active variables.
275     @param sampleIdx vector specifying which samples to use for training. It can be an integer
276         vector (CV_32S) containing 0-based sample indices or byte vector (CV_8U) containing a mask
277         of training samples.
278     @param sampleWeights optional vector with weights for each sample. It should have CV_32F type.
279     @param varType optional vector of type CV_8U and size `<number_of_variables_in_samples> +
280         <number_of_variables_in_responses>`, containing types of each input and output variable. See
281         ml::VariableTypes.
282      */
283     CV_WRAP static Ptr<TrainData> create(InputArray samples, int layout, InputArray responses,
284                                  InputArray varIdx=noArray(), InputArray sampleIdx=noArray(),
285                                  InputArray sampleWeights=noArray(), InputArray varType=noArray());
286 };
287 
288 /** @brief Base class for statistical models in OpenCV ML.
289  */
290 class CV_EXPORTS_W StatModel : public Algorithm
291 {
292 public:
293     /** Predict options */
294     enum Flags {
295         UPDATE_MODEL = 1,
296         RAW_OUTPUT=1, //!< makes the method return the raw results (the sum), not the class label
297         COMPRESSED_INPUT=2,
298         PREPROCESSED_INPUT=4
299     };
300 
301     /** @brief Returns the number of variables in training samples */
302     CV_WRAP virtual int getVarCount() const = 0;
303 
304     CV_WRAP virtual bool empty() const;
305 
306     /** @brief Returns true if the model is trained */
307     CV_WRAP virtual bool isTrained() const = 0;
308     /** @brief Returns true if the model is classifier */
309     CV_WRAP virtual bool isClassifier() const = 0;
310 
311     /** @brief Trains the statistical model
312 
313     @param trainData training data that can be loaded from file using TrainData::loadFromCSV or
314         created with TrainData::create.
315     @param flags optional flags, depending on the model. Some of the models can be updated with the
316         new training samples, not completely overwritten (such as NormalBayesClassifier or ANN_MLP).
317      */
318     CV_WRAP virtual bool train( const Ptr<TrainData>& trainData, int flags=0 );
319 
320     /** @brief Trains the statistical model
321 
322     @param samples training samples
323     @param layout See ml::SampleTypes.
324     @param responses vector of responses associated with the training samples.
325     */
326     CV_WRAP virtual bool train( InputArray samples, int layout, InputArray responses );
327 
328     /** @brief Computes error on the training or test dataset
329 
330     @param data the training data
331     @param test if true, the error is computed over the test subset of the data, otherwise it's
332         computed over the training subset of the data. Please note that if you loaded a completely
333         different dataset to evaluate already trained classifier, you will probably want not to set
334         the test subset at all with TrainData::setTrainTestSplitRatio and specify test=false, so
335         that the error is computed for the whole new set. Yes, this sounds a bit confusing.
336     @param resp the optional output responses.
337 
338     The method uses StatModel::predict to compute the error. For regression models the error is
339     computed as RMS, for classifiers - as a percent of missclassified samples (0%-100%).
340      */
341     CV_WRAP virtual float calcError( const Ptr<TrainData>& data, bool test, OutputArray resp ) const;
342 
343     /** @brief Predicts response(s) for the provided sample(s)
344 
345     @param samples The input samples, floating-point matrix
346     @param results The optional output matrix of results.
347     @param flags The optional flags, model-dependent. See cv::ml::StatModel::Flags.
348      */
349     CV_WRAP virtual float predict( InputArray samples, OutputArray results=noArray(), int flags=0 ) const = 0;
350 
351     /** @brief Create and train model with default parameters
352 
353     The class must implement static `create()` method with no parameters or with all default parameter values
354     */
train(const Ptr<TrainData> & data,int flags=0)355     template<typename _Tp> static Ptr<_Tp> train(const Ptr<TrainData>& data, int flags=0)
356     {
357         Ptr<_Tp> model = _Tp::create();
358         return !model.empty() && model->train(data, flags) ? model : Ptr<_Tp>();
359     }
360 };
361 
362 /****************************************************************************************\
363 *                                 Normal Bayes Classifier                                *
364 \****************************************************************************************/
365 
366 /** @brief Bayes classifier for normally distributed data.
367 
368 @sa @ref ml_intro_bayes
369  */
370 class CV_EXPORTS_W NormalBayesClassifier : public StatModel
371 {
372 public:
373     /** @brief Predicts the response for sample(s).
374 
375     The method estimates the most probable classes for input vectors. Input vectors (one or more)
376     are stored as rows of the matrix inputs. In case of multiple input vectors, there should be one
377     output vector outputs. The predicted class for a single input vector is returned by the method.
378     The vector outputProbs contains the output probabilities corresponding to each element of
379     result.
380      */
381     CV_WRAP virtual float predictProb( InputArray inputs, OutputArray outputs,
382                                OutputArray outputProbs, int flags=0 ) const = 0;
383 
384     /** Creates empty model
385     Use StatModel::train to train the model after creation. */
386     CV_WRAP static Ptr<NormalBayesClassifier> create();
387 };
388 
389 /****************************************************************************************\
390 *                          K-Nearest Neighbour Classifier                                *
391 \****************************************************************************************/
392 
393 /** @brief The class implements K-Nearest Neighbors model
394 
395 @sa @ref ml_intro_knn
396  */
397 class CV_EXPORTS_W KNearest : public StatModel
398 {
399 public:
400 
401     /** Default number of neighbors to use in predict method. */
402     /** @see setDefaultK */
403     CV_WRAP virtual int getDefaultK() const = 0;
404     /** @copybrief getDefaultK @see getDefaultK */
405     CV_WRAP virtual void setDefaultK(int val) = 0;
406 
407     /** Whether classification or regression model should be trained. */
408     /** @see setIsClassifier */
409     CV_WRAP virtual bool getIsClassifier() const = 0;
410     /** @copybrief getIsClassifier @see getIsClassifier */
411     CV_WRAP virtual void setIsClassifier(bool val) = 0;
412 
413     /** Parameter for KDTree implementation. */
414     /** @see setEmax */
415     CV_WRAP virtual int getEmax() const = 0;
416     /** @copybrief getEmax @see getEmax */
417     CV_WRAP virtual void setEmax(int val) = 0;
418 
419     /** %Algorithm type, one of KNearest::Types. */
420     /** @see setAlgorithmType */
421     CV_WRAP virtual int getAlgorithmType() const = 0;
422     /** @copybrief getAlgorithmType @see getAlgorithmType */
423     CV_WRAP virtual void setAlgorithmType(int val) = 0;
424 
425     /** @brief Finds the neighbors and predicts responses for input vectors.
426 
427     @param samples Input samples stored by rows. It is a single-precision floating-point matrix of
428         `<number_of_samples> * k` size.
429     @param k Number of used nearest neighbors. Should be greater than 1.
430     @param results Vector with results of prediction (regression or classification) for each input
431         sample. It is a single-precision floating-point vector with `<number_of_samples>` elements.
432     @param neighborResponses Optional output values for corresponding neighbors. It is a single-
433         precision floating-point matrix of `<number_of_samples> * k` size.
434     @param dist Optional output distances from the input vectors to the corresponding neighbors. It
435         is a single-precision floating-point matrix of `<number_of_samples> * k` size.
436 
437     For each input vector (a row of the matrix samples), the method finds the k nearest neighbors.
438     In case of regression, the predicted result is a mean value of the particular vector's neighbor
439     responses. In case of classification, the class is determined by voting.
440 
441     For each input vector, the neighbors are sorted by their distances to the vector.
442 
443     In case of C++ interface you can use output pointers to empty matrices and the function will
444     allocate memory itself.
445 
446     If only a single input vector is passed, all output matrices are optional and the predicted
447     value is returned by the method.
448 
449     The function is parallelized with the TBB library.
450      */
451     CV_WRAP virtual float findNearest( InputArray samples, int k,
452                                OutputArray results,
453                                OutputArray neighborResponses=noArray(),
454                                OutputArray dist=noArray() ) const = 0;
455 
456     /** @brief Implementations of KNearest algorithm
457        */
458     enum Types
459     {
460         BRUTE_FORCE=1,
461         KDTREE=2
462     };
463 
464     /** @brief Creates the empty model
465 
466     The static method creates empty %KNearest classifier. It should be then trained using StatModel::train method.
467      */
468     CV_WRAP static Ptr<KNearest> create();
469 };
470 
471 /****************************************************************************************\
472 *                                   Support Vector Machines                              *
473 \****************************************************************************************/
474 
475 /** @brief Support Vector Machines.
476 
477 @sa @ref ml_intro_svm
478  */
479 class CV_EXPORTS_W SVM : public StatModel
480 {
481 public:
482 
483     class CV_EXPORTS Kernel : public Algorithm
484     {
485     public:
486         virtual int getType() const = 0;
487         virtual void calc( int vcount, int n, const float* vecs, const float* another, float* results ) = 0;
488     };
489 
490     /** Type of a %SVM formulation.
491     See SVM::Types. Default value is SVM::C_SVC. */
492     /** @see setType */
493     CV_WRAP virtual int getType() const = 0;
494     /** @copybrief getType @see getType */
495     CV_WRAP virtual void setType(int val) = 0;
496 
497     /** Parameter \f$\gamma\f$ of a kernel function.
498     For SVM::POLY, SVM::RBF, SVM::SIGMOID or SVM::CHI2. Default value is 1. */
499     /** @see setGamma */
500     CV_WRAP virtual double getGamma() const = 0;
501     /** @copybrief getGamma @see getGamma */
502     CV_WRAP virtual void setGamma(double val) = 0;
503 
504     /** Parameter _coef0_ of a kernel function.
505     For SVM::POLY or SVM::SIGMOID. Default value is 0.*/
506     /** @see setCoef0 */
507     CV_WRAP virtual double getCoef0() const = 0;
508     /** @copybrief getCoef0 @see getCoef0 */
509     CV_WRAP virtual void setCoef0(double val) = 0;
510 
511     /** Parameter _degree_ of a kernel function.
512     For SVM::POLY. Default value is 0. */
513     /** @see setDegree */
514     CV_WRAP virtual double getDegree() const = 0;
515     /** @copybrief getDegree @see getDegree */
516     CV_WRAP virtual void setDegree(double val) = 0;
517 
518     /** Parameter _C_ of a %SVM optimization problem.
519     For SVM::C_SVC, SVM::EPS_SVR or SVM::NU_SVR. Default value is 0. */
520     /** @see setC */
521     CV_WRAP virtual double getC() const = 0;
522     /** @copybrief getC @see getC */
523     CV_WRAP virtual void setC(double val) = 0;
524 
525     /** Parameter \f$\nu\f$ of a %SVM optimization problem.
526     For SVM::NU_SVC, SVM::ONE_CLASS or SVM::NU_SVR. Default value is 0. */
527     /** @see setNu */
528     CV_WRAP virtual double getNu() const = 0;
529     /** @copybrief getNu @see getNu */
530     CV_WRAP virtual void setNu(double val) = 0;
531 
532     /** Parameter \f$\epsilon\f$ of a %SVM optimization problem.
533     For SVM::EPS_SVR. Default value is 0. */
534     /** @see setP */
535     CV_WRAP virtual double getP() const = 0;
536     /** @copybrief getP @see getP */
537     CV_WRAP virtual void setP(double val) = 0;
538 
539     /** Optional weights in the SVM::C_SVC problem, assigned to particular classes.
540     They are multiplied by _C_ so the parameter _C_ of class _i_ becomes `classWeights(i) * C`. Thus
541     these weights affect the misclassification penalty for different classes. The larger weight,
542     the larger penalty on misclassification of data from the corresponding class. Default value is
543     empty Mat. */
544     /** @see setClassWeights */
545     CV_WRAP virtual cv::Mat getClassWeights() const = 0;
546     /** @copybrief getClassWeights @see getClassWeights */
547     CV_WRAP virtual void setClassWeights(const cv::Mat &val) = 0;
548 
549     /** Termination criteria of the iterative %SVM training procedure which solves a partial
550     case of constrained quadratic optimization problem.
551     You can specify tolerance and/or the maximum number of iterations. Default value is
552     `TermCriteria( TermCriteria::MAX_ITER + TermCriteria::EPS, 1000, FLT_EPSILON )`; */
553     /** @see setTermCriteria */
554     CV_WRAP virtual cv::TermCriteria getTermCriteria() const = 0;
555     /** @copybrief getTermCriteria @see getTermCriteria */
556     CV_WRAP virtual void setTermCriteria(const cv::TermCriteria &val) = 0;
557 
558     /** Type of a %SVM kernel.
559     See SVM::KernelTypes. Default value is SVM::RBF. */
560     CV_WRAP virtual int getKernelType() const = 0;
561 
562     /** Initialize with one of predefined kernels.
563     See SVM::KernelTypes. */
564     CV_WRAP virtual void setKernel(int kernelType) = 0;
565 
566     /** Initialize with custom kernel.
567     See SVM::Kernel class for implementation details */
568     virtual void setCustomKernel(const Ptr<Kernel> &_kernel) = 0;
569 
570     //! %SVM type
571     enum Types {
572         /** C-Support Vector Classification. n-class classification (n \f$\geq\f$ 2), allows
573         imperfect separation of classes with penalty multiplier C for outliers. */
574         C_SVC=100,
575         /** \f$\nu\f$-Support Vector Classification. n-class classification with possible
576         imperfect separation. Parameter \f$\nu\f$ (in the range 0..1, the larger the value, the smoother
577         the decision boundary) is used instead of C. */
578         NU_SVC=101,
579         /** Distribution Estimation (One-class %SVM). All the training data are from
580         the same class, %SVM builds a boundary that separates the class from the rest of the feature
581         space. */
582         ONE_CLASS=102,
583         /** \f$\epsilon\f$-Support Vector Regression. The distance between feature vectors
584         from the training set and the fitting hyper-plane must be less than p. For outliers the
585         penalty multiplier C is used. */
586         EPS_SVR=103,
587         /** \f$\nu\f$-Support Vector Regression. \f$\nu\f$ is used instead of p.
588         See @cite LibSVM for details. */
589         NU_SVR=104
590     };
591 
592     /** @brief %SVM kernel type
593 
594     A comparison of different kernels on the following 2D test case with four classes. Four
595     SVM::C_SVC SVMs have been trained (one against rest) with auto_train. Evaluation on three
596     different kernels (SVM::CHI2, SVM::INTER, SVM::RBF). The color depicts the class with max score.
597     Bright means max-score \> 0, dark means max-score \< 0.
598     ![image](pics/SVM_Comparison.png)
599     */
600     enum KernelTypes {
601         /** Returned by SVM::getKernelType in case when custom kernel has been set */
602         CUSTOM=-1,
603         /** Linear kernel. No mapping is done, linear discrimination (or regression) is
604         done in the original feature space. It is the fastest option. \f$K(x_i, x_j) = x_i^T x_j\f$. */
605         LINEAR=0,
606         /** Polynomial kernel:
607         \f$K(x_i, x_j) = (\gamma x_i^T x_j + coef0)^{degree}, \gamma > 0\f$. */
608         POLY=1,
609         /** Radial basis function (RBF), a good choice in most cases.
610         \f$K(x_i, x_j) = e^{-\gamma ||x_i - x_j||^2}, \gamma > 0\f$. */
611         RBF=2,
612         /** Sigmoid kernel: \f$K(x_i, x_j) = \tanh(\gamma x_i^T x_j + coef0)\f$. */
613         SIGMOID=3,
614         /** Exponential Chi2 kernel, similar to the RBF kernel:
615         \f$K(x_i, x_j) = e^{-\gamma \chi^2(x_i,x_j)}, \chi^2(x_i,x_j) = (x_i-x_j)^2/(x_i+x_j), \gamma > 0\f$. */
616         CHI2=4,
617         /** Histogram intersection kernel. A fast kernel. \f$K(x_i, x_j) = min(x_i,x_j)\f$. */
618         INTER=5
619     };
620 
621     //! %SVM params type
622     enum ParamTypes {
623         C=0,
624         GAMMA=1,
625         P=2,
626         NU=3,
627         COEF=4,
628         DEGREE=5
629     };
630 
631     /** @brief Trains an %SVM with optimal parameters.
632 
633     @param data the training data that can be constructed using TrainData::create or
634         TrainData::loadFromCSV.
635     @param kFold Cross-validation parameter. The training set is divided into kFold subsets. One
636         subset is used to test the model, the others form the train set. So, the %SVM algorithm is
637         executed kFold times.
638     @param Cgrid grid for C
639     @param gammaGrid grid for gamma
640     @param pGrid grid for p
641     @param nuGrid grid for nu
642     @param coeffGrid grid for coeff
643     @param degreeGrid grid for degree
644     @param balanced If true and the problem is 2-class classification then the method creates more
645         balanced cross-validation subsets that is proportions between classes in subsets are close
646         to such proportion in the whole train dataset.
647 
648     The method trains the %SVM model automatically by choosing the optimal parameters C, gamma, p,
649     nu, coef0, degree. Parameters are considered optimal when the cross-validation
650     estimate of the test set error is minimal.
651 
652     If there is no need to optimize a parameter, the corresponding grid step should be set to any
653     value less than or equal to 1. For example, to avoid optimization in gamma, set `gammaGrid.step
654     = 0`, `gammaGrid.minVal`, `gamma_grid.maxVal` as arbitrary numbers. In this case, the value
655     `Gamma` is taken for gamma.
656 
657     And, finally, if the optimization in a parameter is required but the corresponding grid is
658     unknown, you may call the function SVM::getDefaultGrid. To generate a grid, for example, for
659     gamma, call `SVM::getDefaultGrid(SVM::GAMMA)`.
660 
661     This function works for the classification (SVM::C_SVC or SVM::NU_SVC) as well as for the
662     regression (SVM::EPS_SVR or SVM::NU_SVR). If it is SVM::ONE_CLASS, no optimization is made and
663     the usual %SVM with parameters specified in params is executed.
664      */
665     virtual bool trainAuto( const Ptr<TrainData>& data, int kFold = 10,
666                     ParamGrid Cgrid = SVM::getDefaultGrid(SVM::C),
667                     ParamGrid gammaGrid  = SVM::getDefaultGrid(SVM::GAMMA),
668                     ParamGrid pGrid      = SVM::getDefaultGrid(SVM::P),
669                     ParamGrid nuGrid     = SVM::getDefaultGrid(SVM::NU),
670                     ParamGrid coeffGrid  = SVM::getDefaultGrid(SVM::COEF),
671                     ParamGrid degreeGrid = SVM::getDefaultGrid(SVM::DEGREE),
672                     bool balanced=false) = 0;
673 
674     /** @brief Retrieves all the support vectors
675 
676     The method returns all the support vector as floating-point matrix, where support vectors are
677     stored as matrix rows.
678      */
679     CV_WRAP virtual Mat getSupportVectors() const = 0;
680 
681     /** @brief Retrieves the decision function
682 
683     @param i the index of the decision function. If the problem solved is regression, 1-class or
684         2-class classification, then there will be just one decision function and the index should
685         always be 0. Otherwise, in the case of N-class classification, there will be \f$N(N-1)/2\f$
686         decision functions.
687     @param alpha the optional output vector for weights, corresponding to different support vectors.
688         In the case of linear %SVM all the alpha's will be 1's.
689     @param svidx the optional output vector of indices of support vectors within the matrix of
690         support vectors (which can be retrieved by SVM::getSupportVectors). In the case of linear
691         %SVM each decision function consists of a single "compressed" support vector.
692 
693     The method returns rho parameter of the decision function, a scalar subtracted from the weighted
694     sum of kernel responses.
695      */
696     CV_WRAP virtual double getDecisionFunction(int i, OutputArray alpha, OutputArray svidx) const = 0;
697 
698     /** @brief Generates a grid for %SVM parameters.
699 
700     @param param_id %SVM parameters IDs that must be one of the SVM::ParamTypes. The grid is
701     generated for the parameter with this ID.
702 
703     The function generates a grid for the specified parameter of the %SVM algorithm. The grid may be
704     passed to the function SVM::trainAuto.
705      */
706     static ParamGrid getDefaultGrid( int param_id );
707 
708     /** Creates empty model.
709     Use StatModel::train to train the model. Since %SVM has several parameters, you may want to
710     find the best parameters for your problem, it can be done with SVM::trainAuto. */
711     CV_WRAP static Ptr<SVM> create();
712 };
713 
714 /****************************************************************************************\
715 *                              Expectation - Maximization                                *
716 \****************************************************************************************/
717 
718 /** @brief The class implements the Expectation Maximization algorithm.
719 
720 @sa @ref ml_intro_em
721  */
722 class CV_EXPORTS_W EM : public StatModel
723 {
724 public:
725     //! Type of covariation matrices
726     enum Types {
727         /** A scaled identity matrix \f$\mu_k * I\f$. There is the only
728         parameter \f$\mu_k\f$ to be estimated for each matrix. The option may be used in special cases,
729         when the constraint is relevant, or as a first step in the optimization (for example in case
730         when the data is preprocessed with PCA). The results of such preliminary estimation may be
731         passed again to the optimization procedure, this time with
732         covMatType=EM::COV_MAT_DIAGONAL. */
733         COV_MAT_SPHERICAL=0,
734         /** A diagonal matrix with positive diagonal elements. The number of
735         free parameters is d for each matrix. This is most commonly used option yielding good
736         estimation results. */
737         COV_MAT_DIAGONAL=1,
738         /** A symmetric positively defined matrix. The number of free
739         parameters in each matrix is about \f$d^2/2\f$. It is not recommended to use this option, unless
740         there is pretty accurate initial estimation of the parameters and/or a huge number of
741         training samples. */
742         COV_MAT_GENERIC=2,
743         COV_MAT_DEFAULT=COV_MAT_DIAGONAL
744     };
745 
746     //! Default parameters
747     enum {DEFAULT_NCLUSTERS=5, DEFAULT_MAX_ITERS=100};
748 
749     //! The initial step
750     enum {START_E_STEP=1, START_M_STEP=2, START_AUTO_STEP=0};
751 
752     /** The number of mixture components in the Gaussian mixture model.
753     Default value of the parameter is EM::DEFAULT_NCLUSTERS=5. Some of %EM implementation could
754     determine the optimal number of mixtures within a specified value range, but that is not the
755     case in ML yet. */
756     /** @see setClustersNumber */
757     CV_WRAP virtual int getClustersNumber() const = 0;
758     /** @copybrief getClustersNumber @see getClustersNumber */
759     CV_WRAP virtual void setClustersNumber(int val) = 0;
760 
761     /** Constraint on covariance matrices which defines type of matrices.
762     See EM::Types. */
763     /** @see setCovarianceMatrixType */
764     CV_WRAP virtual int getCovarianceMatrixType() const = 0;
765     /** @copybrief getCovarianceMatrixType @see getCovarianceMatrixType */
766     CV_WRAP virtual void setCovarianceMatrixType(int val) = 0;
767 
768     /** The termination criteria of the %EM algorithm.
769     The %EM algorithm can be terminated by the number of iterations termCrit.maxCount (number of
770     M-steps) or when relative change of likelihood logarithm is less than termCrit.epsilon. Default
771     maximum number of iterations is EM::DEFAULT_MAX_ITERS=100. */
772     /** @see setTermCriteria */
773     CV_WRAP virtual TermCriteria getTermCriteria() const = 0;
774     /** @copybrief getTermCriteria @see getTermCriteria */
775     CV_WRAP virtual void setTermCriteria(const TermCriteria &val) = 0;
776 
777     /** @brief Returns weights of the mixtures
778 
779     Returns vector with the number of elements equal to the number of mixtures.
780      */
781     CV_WRAP virtual Mat getWeights() const = 0;
782     /** @brief Returns the cluster centers (means of the Gaussian mixture)
783 
784     Returns matrix with the number of rows equal to the number of mixtures and number of columns
785     equal to the space dimensionality.
786      */
787     CV_WRAP virtual Mat getMeans() const = 0;
788     /** @brief Returns covariation matrices
789 
790     Returns vector of covariation matrices. Number of matrices is the number of gaussian mixtures,
791     each matrix is a square floating-point matrix NxN, where N is the space dimensionality.
792      */
793     virtual void getCovs(std::vector<Mat>& covs) const = 0;
794 
795     /** @brief Returns a likelihood logarithm value and an index of the most probable mixture component
796     for the given sample.
797 
798     @param sample A sample for classification. It should be a one-channel matrix of
799         \f$1 \times dims\f$ or \f$dims \times 1\f$ size.
800     @param probs Optional output matrix that contains posterior probabilities of each component
801         given the sample. It has \f$1 \times nclusters\f$ size and CV_64FC1 type.
802 
803     The method returns a two-element double vector. Zero element is a likelihood logarithm value for
804     the sample. First element is an index of the most probable mixture component for the given
805     sample.
806      */
807     CV_WRAP CV_WRAP virtual Vec2d predict2(InputArray sample, OutputArray probs) const = 0;
808 
809     /** @brief Estimate the Gaussian mixture parameters from a samples set.
810 
811     This variation starts with Expectation step. Initial values of the model parameters will be
812     estimated by the k-means algorithm.
813 
814     Unlike many of the ML models, %EM is an unsupervised learning algorithm and it does not take
815     responses (class labels or function values) as input. Instead, it computes the *Maximum
816     Likelihood Estimate* of the Gaussian mixture parameters from an input sample set, stores all the
817     parameters inside the structure: \f$p_{i,k}\f$ in probs, \f$a_k\f$ in means , \f$S_k\f$ in
818     covs[k], \f$\pi_k\f$ in weights , and optionally computes the output "class label" for each
819     sample: \f$\texttt{labels}_i=\texttt{arg max}_k(p_{i,k}), i=1..N\f$ (indices of the most
820     probable mixture component for each sample).
821 
822     The trained model can be used further for prediction, just like any other classifier. The
823     trained model is similar to the NormalBayesClassifier.
824 
825     @param samples Samples from which the Gaussian mixture model will be estimated. It should be a
826         one-channel matrix, each row of which is a sample. If the matrix does not have CV_64F type
827         it will be converted to the inner matrix of such type for the further computing.
828     @param logLikelihoods The optional output matrix that contains a likelihood logarithm value for
829         each sample. It has \f$nsamples \times 1\f$ size and CV_64FC1 type.
830     @param labels The optional output "class label" for each sample:
831         \f$\texttt{labels}_i=\texttt{arg max}_k(p_{i,k}), i=1..N\f$ (indices of the most probable
832         mixture component for each sample). It has \f$nsamples \times 1\f$ size and CV_32SC1 type.
833     @param probs The optional output matrix that contains posterior probabilities of each Gaussian
834         mixture component given the each sample. It has \f$nsamples \times nclusters\f$ size and
835         CV_64FC1 type.
836      */
837     CV_WRAP virtual bool trainEM(InputArray samples,
838                          OutputArray logLikelihoods=noArray(),
839                          OutputArray labels=noArray(),
840                          OutputArray probs=noArray()) = 0;
841 
842     /** @brief Estimate the Gaussian mixture parameters from a samples set.
843 
844     This variation starts with Expectation step. You need to provide initial means \f$a_k\f$ of
845     mixture components. Optionally you can pass initial weights \f$\pi_k\f$ and covariance matrices
846     \f$S_k\f$ of mixture components.
847 
848     @param samples Samples from which the Gaussian mixture model will be estimated. It should be a
849         one-channel matrix, each row of which is a sample. If the matrix does not have CV_64F type
850         it will be converted to the inner matrix of such type for the further computing.
851     @param means0 Initial means \f$a_k\f$ of mixture components. It is a one-channel matrix of
852         \f$nclusters \times dims\f$ size. If the matrix does not have CV_64F type it will be
853         converted to the inner matrix of such type for the further computing.
854     @param covs0 The vector of initial covariance matrices \f$S_k\f$ of mixture components. Each of
855         covariance matrices is a one-channel matrix of \f$dims \times dims\f$ size. If the matrices
856         do not have CV_64F type they will be converted to the inner matrices of such type for the
857         further computing.
858     @param weights0 Initial weights \f$\pi_k\f$ of mixture components. It should be a one-channel
859         floating-point matrix with \f$1 \times nclusters\f$ or \f$nclusters \times 1\f$ size.
860     @param logLikelihoods The optional output matrix that contains a likelihood logarithm value for
861         each sample. It has \f$nsamples \times 1\f$ size and CV_64FC1 type.
862     @param labels The optional output "class label" for each sample:
863         \f$\texttt{labels}_i=\texttt{arg max}_k(p_{i,k}), i=1..N\f$ (indices of the most probable
864         mixture component for each sample). It has \f$nsamples \times 1\f$ size and CV_32SC1 type.
865     @param probs The optional output matrix that contains posterior probabilities of each Gaussian
866         mixture component given the each sample. It has \f$nsamples \times nclusters\f$ size and
867         CV_64FC1 type.
868     */
869     CV_WRAP virtual bool trainE(InputArray samples, InputArray means0,
870                         InputArray covs0=noArray(),
871                         InputArray weights0=noArray(),
872                         OutputArray logLikelihoods=noArray(),
873                         OutputArray labels=noArray(),
874                         OutputArray probs=noArray()) = 0;
875 
876     /** @brief Estimate the Gaussian mixture parameters from a samples set.
877 
878     This variation starts with Maximization step. You need to provide initial probabilities
879     \f$p_{i,k}\f$ to use this option.
880 
881     @param samples Samples from which the Gaussian mixture model will be estimated. It should be a
882         one-channel matrix, each row of which is a sample. If the matrix does not have CV_64F type
883         it will be converted to the inner matrix of such type for the further computing.
884     @param probs0
885     @param logLikelihoods The optional output matrix that contains a likelihood logarithm value for
886         each sample. It has \f$nsamples \times 1\f$ size and CV_64FC1 type.
887     @param labels The optional output "class label" for each sample:
888         \f$\texttt{labels}_i=\texttt{arg max}_k(p_{i,k}), i=1..N\f$ (indices of the most probable
889         mixture component for each sample). It has \f$nsamples \times 1\f$ size and CV_32SC1 type.
890     @param probs The optional output matrix that contains posterior probabilities of each Gaussian
891         mixture component given the each sample. It has \f$nsamples \times nclusters\f$ size and
892         CV_64FC1 type.
893     */
894     CV_WRAP virtual bool trainM(InputArray samples, InputArray probs0,
895                         OutputArray logLikelihoods=noArray(),
896                         OutputArray labels=noArray(),
897                         OutputArray probs=noArray()) = 0;
898 
899     /** Creates empty %EM model.
900     The model should be trained then using StatModel::train(traindata, flags) method. Alternatively, you
901     can use one of the EM::train\* methods or load it from file using Algorithm::load\<EM\>(filename).
902      */
903     CV_WRAP static Ptr<EM> create();
904 };
905 
906 /****************************************************************************************\
907 *                                      Decision Tree                                     *
908 \****************************************************************************************/
909 
910 /** @brief The class represents a single decision tree or a collection of decision trees.
911 
912 The current public interface of the class allows user to train only a single decision tree, however
913 the class is capable of storing multiple decision trees and using them for prediction (by summing
914 responses or using a voting schemes), and the derived from DTrees classes (such as RTrees and Boost)
915 use this capability to implement decision tree ensembles.
916 
917 @sa @ref ml_intro_trees
918 */
919 class CV_EXPORTS_W DTrees : public StatModel
920 {
921 public:
922     /** Predict options */
923     enum Flags { PREDICT_AUTO=0, PREDICT_SUM=(1<<8), PREDICT_MAX_VOTE=(2<<8), PREDICT_MASK=(3<<8) };
924 
925     /** Cluster possible values of a categorical variable into K\<=maxCategories clusters to
926     find a suboptimal split.
927     If a discrete variable, on which the training procedure tries to make a split, takes more than
928     maxCategories values, the precise best subset estimation may take a very long time because the
929     algorithm is exponential. Instead, many decision trees engines (including our implementation)
930     try to find sub-optimal split in this case by clustering all the samples into maxCategories
931     clusters that is some categories are merged together. The clustering is applied only in n \>
932     2-class classification problems for categorical variables with N \> max_categories possible
933     values. In case of regression and 2-class classification the optimal split can be found
934     efficiently without employing clustering, thus the parameter is not used in these cases.
935     Default value is 10.*/
936     /** @see setMaxCategories */
937     CV_WRAP virtual int getMaxCategories() const = 0;
938     /** @copybrief getMaxCategories @see getMaxCategories */
939     CV_WRAP virtual void setMaxCategories(int val) = 0;
940 
941     /** The maximum possible depth of the tree.
942     That is the training algorithms attempts to split a node while its depth is less than maxDepth.
943     The root node has zero depth. The actual depth may be smaller if the other termination criteria
944     are met (see the outline of the training procedure @ref ml_intro_trees "here"), and/or if the
945     tree is pruned. Default value is INT_MAX.*/
946     /** @see setMaxDepth */
947     CV_WRAP virtual int getMaxDepth() const = 0;
948     /** @copybrief getMaxDepth @see getMaxDepth */
949     CV_WRAP virtual void setMaxDepth(int val) = 0;
950 
951     /** If the number of samples in a node is less than this parameter then the node will not be split.
952 
953     Default value is 10.*/
954     /** @see setMinSampleCount */
955     CV_WRAP virtual int getMinSampleCount() const = 0;
956     /** @copybrief getMinSampleCount @see getMinSampleCount */
957     CV_WRAP virtual void setMinSampleCount(int val) = 0;
958 
959     /** If CVFolds \> 1 then algorithms prunes the built decision tree using K-fold
960     cross-validation procedure where K is equal to CVFolds.
961     Default value is 10.*/
962     /** @see setCVFolds */
963     CV_WRAP virtual int getCVFolds() const = 0;
964     /** @copybrief getCVFolds @see getCVFolds */
965     CV_WRAP virtual void setCVFolds(int val) = 0;
966 
967     /** If true then surrogate splits will be built.
968     These splits allow to work with missing data and compute variable importance correctly.
969     Default value is false.
970     @note currently it's not implemented.*/
971     /** @see setUseSurrogates */
972     CV_WRAP virtual bool getUseSurrogates() const = 0;
973     /** @copybrief getUseSurrogates @see getUseSurrogates */
974     CV_WRAP virtual void setUseSurrogates(bool val) = 0;
975 
976     /** If true then a pruning will be harsher.
977     This will make a tree more compact and more resistant to the training data noise but a bit less
978     accurate. Default value is true.*/
979     /** @see setUse1SERule */
980     CV_WRAP virtual bool getUse1SERule() const = 0;
981     /** @copybrief getUse1SERule @see getUse1SERule */
982     CV_WRAP virtual void setUse1SERule(bool val) = 0;
983 
984     /** If true then pruned branches are physically removed from the tree.
985     Otherwise they are retained and it is possible to get results from the original unpruned (or
986     pruned less aggressively) tree. Default value is true.*/
987     /** @see setTruncatePrunedTree */
988     CV_WRAP virtual bool getTruncatePrunedTree() const = 0;
989     /** @copybrief getTruncatePrunedTree @see getTruncatePrunedTree */
990     CV_WRAP virtual void setTruncatePrunedTree(bool val) = 0;
991 
992     /** Termination criteria for regression trees.
993     If all absolute differences between an estimated value in a node and values of train samples
994     in this node are less than this parameter then the node will not be split further. Default
995     value is 0.01f*/
996     /** @see setRegressionAccuracy */
997     CV_WRAP virtual float getRegressionAccuracy() const = 0;
998     /** @copybrief getRegressionAccuracy @see getRegressionAccuracy */
999     CV_WRAP virtual void setRegressionAccuracy(float val) = 0;
1000 
1001     /** @brief The array of a priori class probabilities, sorted by the class label value.
1002 
1003     The parameter can be used to tune the decision tree preferences toward a certain class. For
1004     example, if you want to detect some rare anomaly occurrence, the training base will likely
1005     contain much more normal cases than anomalies, so a very good classification performance
1006     will be achieved just by considering every case as normal. To avoid this, the priors can be
1007     specified, where the anomaly probability is artificially increased (up to 0.5 or even
1008     greater), so the weight of the misclassified anomalies becomes much bigger, and the tree is
1009     adjusted properly.
1010 
1011     You can also think about this parameter as weights of prediction categories which determine
1012     relative weights that you give to misclassification. That is, if the weight of the first
1013     category is 1 and the weight of the second category is 10, then each mistake in predicting
1014     the second category is equivalent to making 10 mistakes in predicting the first category.
1015     Default value is empty Mat.*/
1016     /** @see setPriors */
1017     CV_WRAP virtual cv::Mat getPriors() const = 0;
1018     /** @copybrief getPriors @see getPriors */
1019     CV_WRAP virtual void setPriors(const cv::Mat &val) = 0;
1020 
1021     /** @brief The class represents a decision tree node.
1022      */
1023     class CV_EXPORTS Node
1024     {
1025     public:
1026         Node();
1027         double value; //!< Value at the node: a class label in case of classification or estimated
1028                       //!< function value in case of regression.
1029         int classIdx; //!< Class index normalized to 0..class_count-1 range and assigned to the
1030                       //!< node. It is used internally in classification trees and tree ensembles.
1031         int parent; //!< Index of the parent node
1032         int left; //!< Index of the left child node
1033         int right; //!< Index of right child node
1034         int defaultDir; //!< Default direction where to go (-1: left or +1: right). It helps in the
1035                         //!< case of missing values.
1036         int split; //!< Index of the first split
1037     };
1038 
1039     /** @brief The class represents split in a decision tree.
1040      */
1041     class CV_EXPORTS Split
1042     {
1043     public:
1044         Split();
1045         int varIdx; //!< Index of variable on which the split is created.
1046         bool inversed; //!< If true, then the inverse split rule is used (i.e. left and right
1047                        //!< branches are exchanged in the rule expressions below).
1048         float quality; //!< The split quality, a positive number. It is used to choose the best split.
1049         int next; //!< Index of the next split in the list of splits for the node
1050         float c; /**< The threshold value in case of split on an ordered variable.
1051                       The rule is:
1052                       @code{.none}
1053                       if var_value < c
1054                         then next_node <- left
1055                         else next_node <- right
1056                       @endcode */
1057         int subsetOfs; /**< Offset of the bitset used by the split on a categorical variable.
1058                             The rule is:
1059                             @code{.none}
1060                             if bitset[var_value] == 1
1061                                 then next_node <- left
1062                                 else next_node <- right
1063                             @endcode */
1064     };
1065 
1066     /** @brief Returns indices of root nodes
1067     */
1068     virtual const std::vector<int>& getRoots() const = 0;
1069     /** @brief Returns all the nodes
1070 
1071     all the node indices are indices in the returned vector
1072      */
1073     virtual const std::vector<Node>& getNodes() const = 0;
1074     /** @brief Returns all the splits
1075 
1076     all the split indices are indices in the returned vector
1077      */
1078     virtual const std::vector<Split>& getSplits() const = 0;
1079     /** @brief Returns all the bitsets for categorical splits
1080 
1081     Split::subsetOfs is an offset in the returned vector
1082      */
1083     virtual const std::vector<int>& getSubsets() const = 0;
1084 
1085     /** @brief Creates the empty model
1086 
1087     The static method creates empty decision tree with the specified parameters. It should be then
1088     trained using train method (see StatModel::train). Alternatively, you can load the model from
1089     file using Algorithm::load\<DTrees\>(filename).
1090      */
1091     CV_WRAP static Ptr<DTrees> create();
1092 };
1093 
1094 /****************************************************************************************\
1095 *                                   Random Trees Classifier                              *
1096 \****************************************************************************************/
1097 
1098 /** @brief The class implements the random forest predictor.
1099 
1100 @sa @ref ml_intro_rtrees
1101  */
1102 class CV_EXPORTS_W RTrees : public DTrees
1103 {
1104 public:
1105 
1106     /** If true then variable importance will be calculated and then it can be retrieved by RTrees::getVarImportance.
1107     Default value is false.*/
1108     /** @see setCalculateVarImportance */
1109     CV_WRAP virtual bool getCalculateVarImportance() const = 0;
1110     /** @copybrief getCalculateVarImportance @see getCalculateVarImportance */
1111     CV_WRAP virtual void setCalculateVarImportance(bool val) = 0;
1112 
1113     /** The size of the randomly selected subset of features at each tree node and that are used
1114     to find the best split(s).
1115     If you set it to 0 then the size will be set to the square root of the total number of
1116     features. Default value is 0.*/
1117     /** @see setActiveVarCount */
1118     CV_WRAP virtual int getActiveVarCount() const = 0;
1119     /** @copybrief getActiveVarCount @see getActiveVarCount */
1120     CV_WRAP virtual void setActiveVarCount(int val) = 0;
1121 
1122     /** The termination criteria that specifies when the training algorithm stops.
1123     Either when the specified number of trees is trained and added to the ensemble or when
1124     sufficient accuracy (measured as OOB error) is achieved. Typically the more trees you have the
1125     better the accuracy. However, the improvement in accuracy generally diminishes and asymptotes
1126     pass a certain number of trees. Also to keep in mind, the number of tree increases the
1127     prediction time linearly. Default value is TermCriteria(TermCriteria::MAX_ITERS +
1128     TermCriteria::EPS, 50, 0.1)*/
1129     /** @see setTermCriteria */
1130     CV_WRAP virtual TermCriteria getTermCriteria() const = 0;
1131     /** @copybrief getTermCriteria @see getTermCriteria */
1132     CV_WRAP virtual void setTermCriteria(const TermCriteria &val) = 0;
1133 
1134     /** Returns the variable importance array.
1135     The method returns the variable importance vector, computed at the training stage when
1136     CalculateVarImportance is set to true. If this flag was set to false, the empty matrix is
1137     returned.
1138      */
1139     CV_WRAP virtual Mat getVarImportance() const = 0;
1140 
1141     /** Creates the empty model.
1142     Use StatModel::train to train the model, StatModel::train to create and train the model,
1143     Algorithm::load to load the pre-trained model.
1144      */
1145     CV_WRAP static Ptr<RTrees> create();
1146 };
1147 
1148 /****************************************************************************************\
1149 *                                   Boosted tree classifier                              *
1150 \****************************************************************************************/
1151 
1152 /** @brief Boosted tree classifier derived from DTrees
1153 
1154 @sa @ref ml_intro_boost
1155  */
1156 class CV_EXPORTS_W Boost : public DTrees
1157 {
1158 public:
1159     /** Type of the boosting algorithm.
1160     See Boost::Types. Default value is Boost::REAL. */
1161     /** @see setBoostType */
1162     CV_WRAP virtual int getBoostType() const = 0;
1163     /** @copybrief getBoostType @see getBoostType */
1164     CV_WRAP virtual void setBoostType(int val) = 0;
1165 
1166     /** The number of weak classifiers.
1167     Default value is 100. */
1168     /** @see setWeakCount */
1169     CV_WRAP virtual int getWeakCount() const = 0;
1170     /** @copybrief getWeakCount @see getWeakCount */
1171     CV_WRAP virtual void setWeakCount(int val) = 0;
1172 
1173     /** A threshold between 0 and 1 used to save computational time.
1174     Samples with summary weight \f$\leq 1 - weight_trim_rate\f$ do not participate in the *next*
1175     iteration of training. Set this parameter to 0 to turn off this functionality. Default value is 0.95.*/
1176     /** @see setWeightTrimRate */
1177     CV_WRAP virtual double getWeightTrimRate() const = 0;
1178     /** @copybrief getWeightTrimRate @see getWeightTrimRate */
1179     CV_WRAP virtual void setWeightTrimRate(double val) = 0;
1180 
1181     /** Boosting type.
1182     Gentle AdaBoost and Real AdaBoost are often the preferable choices. */
1183     enum Types {
1184         DISCRETE=0, //!< Discrete AdaBoost.
1185         REAL=1, //!< Real AdaBoost. It is a technique that utilizes confidence-rated predictions
1186                 //!< and works well with categorical data.
1187         LOGIT=2, //!< LogitBoost. It can produce good regression fits.
1188         GENTLE=3 //!< Gentle AdaBoost. It puts less weight on outlier data points and for that
1189                  //!<reason is often good with regression data.
1190     };
1191 
1192     /** Creates the empty model.
1193     Use StatModel::train to train the model, Algorithm::load\<Boost\>(filename) to load the pre-trained model. */
1194     CV_WRAP static Ptr<Boost> create();
1195 };
1196 
1197 /****************************************************************************************\
1198 *                                   Gradient Boosted Trees                               *
1199 \****************************************************************************************/
1200 
1201 /*class CV_EXPORTS_W GBTrees : public DTrees
1202 {
1203 public:
1204     struct CV_EXPORTS_W_MAP Params : public DTrees::Params
1205     {
1206         CV_PROP_RW int weakCount;
1207         CV_PROP_RW int lossFunctionType;
1208         CV_PROP_RW float subsamplePortion;
1209         CV_PROP_RW float shrinkage;
1210 
1211         Params();
1212         Params( int lossFunctionType, int weakCount, float shrinkage,
1213                 float subsamplePortion, int maxDepth, bool useSurrogates );
1214     };
1215 
1216     enum {SQUARED_LOSS=0, ABSOLUTE_LOSS, HUBER_LOSS=3, DEVIANCE_LOSS};
1217 
1218     virtual void setK(int k) = 0;
1219 
1220     virtual float predictSerial( InputArray samples,
1221                                  OutputArray weakResponses, int flags) const = 0;
1222 
1223     static Ptr<GBTrees> create(const Params& p);
1224 };*/
1225 
1226 /****************************************************************************************\
1227 *                              Artificial Neural Networks (ANN)                          *
1228 \****************************************************************************************/
1229 
1230 /////////////////////////////////// Multi-Layer Perceptrons //////////////////////////////
1231 
1232 /** @brief Artificial Neural Networks - Multi-Layer Perceptrons.
1233 
1234 Unlike many other models in ML that are constructed and trained at once, in the MLP model these
1235 steps are separated. First, a network with the specified topology is created using the non-default
1236 constructor or the method ANN_MLP::create. All the weights are set to zeros. Then, the network is
1237 trained using a set of input and output vectors. The training procedure can be repeated more than
1238 once, that is, the weights can be adjusted based on the new training data.
1239 
1240 Additional flags for StatModel::train are available: ANN_MLP::TrainFlags.
1241 
1242 @sa @ref ml_intro_ann
1243  */
1244 class CV_EXPORTS_W ANN_MLP : public StatModel
1245 {
1246 public:
1247     /** Available training methods */
1248     enum TrainingMethods {
1249         BACKPROP=0, //!< The back-propagation algorithm.
1250         RPROP=1 //!< The RPROP algorithm. See @cite RPROP93 for details.
1251     };
1252 
1253     /** Sets training method and common parameters.
1254     @param method Default value is ANN_MLP::RPROP. See ANN_MLP::TrainingMethods.
1255     @param param1 passed to setRpropDW0 for ANN_MLP::RPROP and to setBackpropWeightScale for ANN_MLP::BACKPROP
1256     @param param2 passed to setRpropDWMin for ANN_MLP::RPROP and to setBackpropMomentumScale for ANN_MLP::BACKPROP.
1257     */
1258     CV_WRAP virtual void setTrainMethod(int method, double param1 = 0, double param2 = 0) = 0;
1259 
1260     /** Returns current training method */
1261     CV_WRAP virtual int getTrainMethod() const = 0;
1262 
1263     /** Initialize the activation function for each neuron.
1264     Currently the default and the only fully supported activation function is ANN_MLP::SIGMOID_SYM.
1265     @param type The type of activation function. See ANN_MLP::ActivationFunctions.
1266     @param param1 The first parameter of the activation function, \f$\alpha\f$. Default value is 0.
1267     @param param2 The second parameter of the activation function, \f$\beta\f$. Default value is 0.
1268     */
1269     CV_WRAP virtual void setActivationFunction(int type, double param1 = 0, double param2 = 0) = 0;
1270 
1271     /**  Integer vector specifying the number of neurons in each layer including the input and output layers.
1272     The very first element specifies the number of elements in the input layer.
1273     The last element - number of elements in the output layer. Default value is empty Mat.
1274     @sa getLayerSizes */
1275     CV_WRAP virtual void setLayerSizes(InputArray _layer_sizes) = 0;
1276 
1277     /**  Integer vector specifying the number of neurons in each layer including the input and output layers.
1278     The very first element specifies the number of elements in the input layer.
1279     The last element - number of elements in the output layer.
1280     @sa setLayerSizes */
1281     CV_WRAP virtual cv::Mat getLayerSizes() const = 0;
1282 
1283     /** Termination criteria of the training algorithm.
1284     You can specify the maximum number of iterations (maxCount) and/or how much the error could
1285     change between the iterations to make the algorithm continue (epsilon). Default value is
1286     TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 1000, 0.01).*/
1287     /** @see setTermCriteria */
1288     CV_WRAP virtual TermCriteria getTermCriteria() const = 0;
1289     /** @copybrief getTermCriteria @see getTermCriteria */
1290     CV_WRAP virtual void setTermCriteria(TermCriteria val) = 0;
1291 
1292     /** BPROP: Strength of the weight gradient term.
1293     The recommended value is about 0.1. Default value is 0.1.*/
1294     /** @see setBackpropWeightScale */
1295     CV_WRAP virtual double getBackpropWeightScale() const = 0;
1296     /** @copybrief getBackpropWeightScale @see getBackpropWeightScale */
1297     CV_WRAP virtual void setBackpropWeightScale(double val) = 0;
1298 
1299     /** BPROP: Strength of the momentum term (the difference between weights on the 2 previous iterations).
1300     This parameter provides some inertia to smooth the random fluctuations of the weights. It can
1301     vary from 0 (the feature is disabled) to 1 and beyond. The value 0.1 or so is good enough.
1302     Default value is 0.1.*/
1303     /** @see setBackpropMomentumScale */
1304     CV_WRAP virtual double getBackpropMomentumScale() const = 0;
1305     /** @copybrief getBackpropMomentumScale @see getBackpropMomentumScale */
1306     CV_WRAP virtual void setBackpropMomentumScale(double val) = 0;
1307 
1308     /** RPROP: Initial value \f$\Delta_0\f$ of update-values \f$\Delta_{ij}\f$.
1309     Default value is 0.1.*/
1310     /** @see setRpropDW0 */
1311     CV_WRAP virtual double getRpropDW0() const = 0;
1312     /** @copybrief getRpropDW0 @see getRpropDW0 */
1313     CV_WRAP virtual void setRpropDW0(double val) = 0;
1314 
1315     /** RPROP: Increase factor \f$\eta^+\f$.
1316     It must be \>1. Default value is 1.2.*/
1317     /** @see setRpropDWPlus */
1318     CV_WRAP virtual double getRpropDWPlus() const = 0;
1319     /** @copybrief getRpropDWPlus @see getRpropDWPlus */
1320     CV_WRAP virtual void setRpropDWPlus(double val) = 0;
1321 
1322     /** RPROP: Decrease factor \f$\eta^-\f$.
1323     It must be \<1. Default value is 0.5.*/
1324     /** @see setRpropDWMinus */
1325     CV_WRAP virtual double getRpropDWMinus() const = 0;
1326     /** @copybrief getRpropDWMinus @see getRpropDWMinus */
1327     CV_WRAP virtual void setRpropDWMinus(double val) = 0;
1328 
1329     /** RPROP: Update-values lower limit \f$\Delta_{min}\f$.
1330     It must be positive. Default value is FLT_EPSILON.*/
1331     /** @see setRpropDWMin */
1332     CV_WRAP virtual double getRpropDWMin() const = 0;
1333     /** @copybrief getRpropDWMin @see getRpropDWMin */
1334     CV_WRAP virtual void setRpropDWMin(double val) = 0;
1335 
1336     /** RPROP: Update-values upper limit \f$\Delta_{max}\f$.
1337     It must be \>1. Default value is 50.*/
1338     /** @see setRpropDWMax */
1339     CV_WRAP virtual double getRpropDWMax() const = 0;
1340     /** @copybrief getRpropDWMax @see getRpropDWMax */
1341     CV_WRAP virtual void setRpropDWMax(double val) = 0;
1342 
1343     /** possible activation functions */
1344     enum ActivationFunctions {
1345         /** Identity function: \f$f(x)=x\f$ */
1346         IDENTITY = 0,
1347         /** Symmetrical sigmoid: \f$f(x)=\beta*(1-e^{-\alpha x})/(1+e^{-\alpha x}\f$
1348         @note
1349         If you are using the default sigmoid activation function with the default parameter values
1350         fparam1=0 and fparam2=0 then the function used is y = 1.7159\*tanh(2/3 \* x), so the output
1351         will range from [-1.7159, 1.7159], instead of [0,1].*/
1352         SIGMOID_SYM = 1,
1353         /** Gaussian function: \f$f(x)=\beta e^{-\alpha x*x}\f$ */
1354         GAUSSIAN = 2
1355     };
1356 
1357     /** Train options */
1358     enum TrainFlags {
1359         /** Update the network weights, rather than compute them from scratch. In the latter case
1360         the weights are initialized using the Nguyen-Widrow algorithm. */
1361         UPDATE_WEIGHTS = 1,
1362         /** Do not normalize the input vectors. If this flag is not set, the training algorithm
1363         normalizes each input feature independently, shifting its mean value to 0 and making the
1364         standard deviation equal to 1. If the network is assumed to be updated frequently, the new
1365         training data could be much different from original one. In this case, you should take care
1366         of proper normalization. */
1367         NO_INPUT_SCALE = 2,
1368         /** Do not normalize the output vectors. If the flag is not set, the training algorithm
1369         normalizes each output feature independently, by transforming it to the certain range
1370         depending on the used activation function. */
1371         NO_OUTPUT_SCALE = 4
1372     };
1373 
1374     CV_WRAP virtual Mat getWeights(int layerIdx) const = 0;
1375 
1376     /** @brief Creates empty model
1377 
1378     Use StatModel::train to train the model, Algorithm::load\<ANN_MLP\>(filename) to load the pre-trained model.
1379     Note that the train method has optional flags: ANN_MLP::TrainFlags.
1380      */
1381     CV_WRAP static Ptr<ANN_MLP> create();
1382 };
1383 
1384 /****************************************************************************************\
1385 *                           Logistic Regression                                          *
1386 \****************************************************************************************/
1387 
1388 /** @brief Implements Logistic Regression classifier.
1389 
1390 @sa @ref ml_intro_lr
1391  */
1392 class CV_EXPORTS_W LogisticRegression : public StatModel
1393 {
1394 public:
1395 
1396     /** Learning rate. */
1397     /** @see setLearningRate */
1398     CV_WRAP virtual double getLearningRate() const = 0;
1399     /** @copybrief getLearningRate @see getLearningRate */
1400     CV_WRAP virtual void setLearningRate(double val) = 0;
1401 
1402     /** Number of iterations. */
1403     /** @see setIterations */
1404     CV_WRAP virtual int getIterations() const = 0;
1405     /** @copybrief getIterations @see getIterations */
1406     CV_WRAP virtual void setIterations(int val) = 0;
1407 
1408     /** Kind of regularization to be applied. See LogisticRegression::RegKinds. */
1409     /** @see setRegularization */
1410     CV_WRAP virtual int getRegularization() const = 0;
1411     /** @copybrief getRegularization @see getRegularization */
1412     CV_WRAP virtual void setRegularization(int val) = 0;
1413 
1414     /** Kind of training method used. See LogisticRegression::Methods. */
1415     /** @see setTrainMethod */
1416     CV_WRAP virtual int getTrainMethod() const = 0;
1417     /** @copybrief getTrainMethod @see getTrainMethod */
1418     CV_WRAP virtual void setTrainMethod(int val) = 0;
1419 
1420     /** Specifies the number of training samples taken in each step of Mini-Batch Gradient
1421     Descent. Will only be used if using LogisticRegression::MINI_BATCH training algorithm. It
1422     has to take values less than the total number of training samples. */
1423     /** @see setMiniBatchSize */
1424     CV_WRAP virtual int getMiniBatchSize() const = 0;
1425     /** @copybrief getMiniBatchSize @see getMiniBatchSize */
1426     CV_WRAP virtual void setMiniBatchSize(int val) = 0;
1427 
1428     /** Termination criteria of the algorithm. */
1429     /** @see setTermCriteria */
1430     CV_WRAP virtual TermCriteria getTermCriteria() const = 0;
1431     /** @copybrief getTermCriteria @see getTermCriteria */
1432     CV_WRAP virtual void setTermCriteria(TermCriteria val) = 0;
1433 
1434     //! Regularization kinds
1435     enum RegKinds {
1436         REG_DISABLE = -1, //!< Regularization disabled
1437         REG_L1 = 0, //!< %L1 norm
1438         REG_L2 = 1 //!< %L2 norm
1439     };
1440 
1441     //! Training methods
1442     enum Methods {
1443         BATCH = 0,
1444         MINI_BATCH = 1 //!< Set MiniBatchSize to a positive integer when using this method.
1445     };
1446 
1447     /** @brief Predicts responses for input samples and returns a float type.
1448 
1449     @param samples The input data for the prediction algorithm. Matrix [m x n], where each row
1450         contains variables (features) of one object being classified. Should have data type CV_32F.
1451     @param results Predicted labels as a column matrix of type CV_32S.
1452     @param flags Not used.
1453      */
1454     CV_WRAP virtual float predict( InputArray samples, OutputArray results=noArray(), int flags=0 ) const = 0;
1455 
1456     /** @brief This function returns the trained paramters arranged across rows.
1457 
1458     For a two class classifcation problem, it returns a row matrix. It returns learnt paramters of
1459     the Logistic Regression as a matrix of type CV_32F.
1460      */
1461     CV_WRAP virtual Mat get_learnt_thetas() const = 0;
1462 
1463     /** @brief Creates empty model.
1464 
1465     Creates Logistic Regression model with parameters given.
1466      */
1467     CV_WRAP static Ptr<LogisticRegression> create();
1468 };
1469 
1470 /****************************************************************************************\
1471 *                           Auxilary functions declarations                              *
1472 \****************************************************************************************/
1473 
1474 /** @brief Generates _sample_ from multivariate normal distribution
1475 
1476 @param mean an average row vector
1477 @param cov symmetric covariation matrix
1478 @param nsamples returned samples count
1479 @param samples returned samples array
1480 */
1481 CV_EXPORTS void randMVNormal( InputArray mean, InputArray cov, int nsamples, OutputArray samples);
1482 
1483 /** @brief Generates sample from gaussian mixture distribution */
1484 CV_EXPORTS void randGaussMixture( InputArray means, InputArray covs, InputArray weights,
1485                                   int nsamples, OutputArray samples, OutputArray sampClasses );
1486 
1487 /** @brief Creates test set */
1488 CV_EXPORTS void createConcentricSpheresTestSet( int nsamples, int nfeatures, int nclasses,
1489                                                 OutputArray samples, OutputArray responses);
1490 
1491 //! @} ml
1492 
1493 }
1494 }
1495 
1496 #endif // __cplusplus
1497 #endif // __OPENCV_ML_HPP__
1498 
1499 /* End of file. */
1500