• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 #include <iomanip>
2 #include <stdexcept>
3 #include <string>
4 #include "performance.h"
5 #include "opencv2/core/cuda.hpp"
6 
7 using namespace std;
8 using namespace cv;
9 using namespace cv::cuda;
10 
run()11 void TestSystem::run()
12 {
13     if (is_list_mode_)
14     {
15         for (vector<Runnable*>::iterator it = tests_.begin(); it != tests_.end(); ++it)
16             cout << (*it)->name() << endl;
17 
18         return;
19     }
20 
21     // Run test initializers
22     for (vector<Runnable*>::iterator it = inits_.begin(); it != inits_.end(); ++it)
23     {
24         if ((*it)->name().find(test_filter_, 0) != string::npos)
25             (*it)->run();
26     }
27 
28     printHeading();
29 
30     // Run tests
31     for (vector<Runnable*>::iterator it = tests_.begin(); it != tests_.end(); ++it)
32     {
33         try
34         {
35             if ((*it)->name().find(test_filter_, 0) != string::npos)
36             {
37                 cout << endl << (*it)->name() << ":\n";
38                 (*it)->run();
39                 finishCurrentSubtest();
40             }
41         }
42         catch (const Exception&)
43         {
44             // Message is printed via callback
45             resetCurrentSubtest();
46         }
47         catch (const runtime_error& e)
48         {
49             printError(e.what());
50             resetCurrentSubtest();
51         }
52     }
53 
54     printSummary();
55 }
56 
57 
finishCurrentSubtest()58 void TestSystem::finishCurrentSubtest()
59 {
60     if (cur_subtest_is_empty_)
61         // There is no need to print subtest statistics
62         return;
63 
64     double cpu_time = cpu_elapsed_ / getTickFrequency() * 1000.0;
65     double gpu_time = gpu_elapsed_ / getTickFrequency() * 1000.0;
66 
67     double speedup = static_cast<double>(cpu_elapsed_) / std::max(1.0, gpu_elapsed_);
68     speedup_total_ += speedup;
69 
70     printMetrics(cpu_time, gpu_time, speedup);
71 
72     num_subtests_called_++;
73     resetCurrentSubtest();
74 }
75 
76 
meanTime(const vector<int64> & samples)77 double TestSystem::meanTime(const vector<int64> &samples)
78 {
79     double sum = accumulate(samples.begin(), samples.end(), 0.);
80     if (samples.size() > 1)
81         return (sum - samples[0]) / (samples.size() - 1);
82     return sum;
83 }
84 
85 
printHeading()86 void TestSystem::printHeading()
87 {
88     cout << endl;
89     cout << setiosflags(ios_base::left);
90     cout << TAB << setw(10) << "CPU, ms" << setw(10) << "GPU, ms"
91         << setw(14) << "SPEEDUP"
92         << "DESCRIPTION\n";
93     cout << resetiosflags(ios_base::left);
94 }
95 
96 
printSummary()97 void TestSystem::printSummary()
98 {
99     cout << setiosflags(ios_base::fixed);
100     cout << "\naverage GPU speedup: x"
101         << setprecision(3) << speedup_total_ / std::max(1, num_subtests_called_)
102         << endl;
103     cout << resetiosflags(ios_base::fixed);
104 }
105 
106 
printMetrics(double cpu_time,double gpu_time,double speedup)107 void TestSystem::printMetrics(double cpu_time, double gpu_time, double speedup)
108 {
109     cout << TAB << setiosflags(ios_base::left);
110     stringstream stream;
111 
112     stream << cpu_time;
113     cout << setw(10) << stream.str();
114 
115     stream.str("");
116     stream << gpu_time;
117     cout << setw(10) << stream.str();
118 
119     stream.str("");
120     stream << "x" << setprecision(3) << speedup;
121     cout << setw(14) << stream.str();
122 
123     cout << cur_subtest_description_.str();
124     cout << resetiosflags(ios_base::left) << endl;
125 }
126 
127 
printError(const std::string & msg)128 void TestSystem::printError(const std::string& msg)
129 {
130     cout << TAB << "[error: " << msg << "] " << cur_subtest_description_.str() << endl;
131 }
132 
133 
gen(Mat & mat,int rows,int cols,int type,Scalar low,Scalar high)134 void gen(Mat& mat, int rows, int cols, int type, Scalar low, Scalar high)
135 {
136     mat.create(rows, cols, type);
137     RNG rng(0);
138     rng.fill(mat, RNG::UNIFORM, low, high);
139 }
140 
141 
abspath(const string & relpath)142 string abspath(const string& relpath)
143 {
144     return TestSystem::instance().workingDir() + relpath;
145 }
146 
147 
cvErrorCallback(int,const char *,const char * err_msg,const char *,int,void *)148 static int cvErrorCallback(int /*status*/, const char* /*func_name*/,
149                              const char* err_msg, const char* /*file_name*/,
150                              int /*line*/, void* /*userdata*/)
151 {
152     TestSystem::instance().printError(err_msg);
153     return 0;
154 }
155 
156 
main(int argc,const char * argv[])157 int main(int argc, const char* argv[])
158 {
159     int num_devices = getCudaEnabledDeviceCount();
160     if (num_devices == 0)
161     {
162         cerr << "No GPU found or the library was compiled without CUDA support";
163         return -1;
164     }
165 
166     redirectError(cvErrorCallback);
167 
168     const char* keys =
169        "{ h  help    |       | print help message }"
170        "{ f  filter  |       | filter for test }"
171        "{ w  workdir |       | set working directory }"
172        "{ l  list    |       | show all tests }"
173        "{ d  device  | 0     | device id }"
174        "{ i  iters   | 10    | iteration count }";
175 
176     CommandLineParser cmd(argc, argv, keys);
177 
178     if (cmd.has("help") || !cmd.check())
179     {
180         cmd.printMessage();
181         cmd.printErrors();
182         return 0;
183     }
184 
185 
186     int device = cmd.get<int>("device");
187     if (device < 0 || device >= num_devices)
188     {
189         cerr << "Invalid device ID" << endl;
190         return -1;
191     }
192     DeviceInfo dev_info(device);
193     if (!dev_info.isCompatible())
194     {
195         cerr << "CUDA module isn't built for GPU #" << device << " " << dev_info.name() << ", CC " << dev_info.majorVersion() << '.' << dev_info.minorVersion() << endl;
196         return -1;
197     }
198     setDevice(device);
199     printShortCudaDeviceInfo(device);
200 
201     string filter = cmd.get<string>("filter");
202     string workdir = cmd.get<string>("workdir");
203     bool list = cmd.has("list");
204     int iters = cmd.get<int>("iters");
205 
206     if (!filter.empty())
207         TestSystem::instance().setTestFilter(filter);
208 
209     if (!workdir.empty())
210     {
211         if (workdir[workdir.size() - 1] != '/' && workdir[workdir.size() - 1] != '\\')
212             workdir += '/';
213 
214         TestSystem::instance().setWorkingDir(workdir);
215     }
216 
217     if (list)
218         TestSystem::instance().setListMode(true);
219 
220     TestSystem::instance().setNumIters(iters);
221 
222     cout << "\nNote: the timings for GPU don't include data transfer" << endl;
223 
224     TestSystem::instance().run();
225 
226     return 0;
227 }
228