1 /*
2 * Copyright (c) 2018, Alliance for Open Media. All rights reserved
3 *
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 */
11
12 #include "third_party/googletest/src/googletest/include/gtest/gtest.h"
13
14 #include "aom/aom_integer.h"
15 #include "aom_ports/aom_timer.h"
16 #include "av1/encoder/ml.h"
17 #include "config/aom_config.h"
18 #include "config/aom_dsp_rtcd.h"
19 #include "config/av1_rtcd.h"
20 #include "test/util.h"
21 #include "test/register_state_check.h"
22 #include "test/acm_random.h"
23 #include "test/clear_system_state.h"
24
25 namespace {
26 typedef void (*NnPredict_Func)(const float *const input_nodes,
27 const NN_CONFIG *const nn_config,
28 float *const output);
29
30 typedef ::testing::tuple<const NnPredict_Func> NnPredictTestParam;
31
32 const float epsilon = 1e-3f; // Error threshold for functional equivalence
33
34 class NnPredictTest : public ::testing::TestWithParam<NnPredictTestParam> {
35 public:
SetUp()36 virtual void SetUp() {
37 const int MAX_NODES2 = NN_MAX_NODES_PER_LAYER * NN_MAX_NODES_PER_LAYER;
38 // Allocate two massive buffers on the heap for edge weights and node bias
39 // Then set-up the double-dimension arrays pointing into the big buffers
40 weights_buf = (float *)aom_malloc(MAX_NODES2 * (NN_MAX_HIDDEN_LAYERS + 1) *
41 sizeof(*weights_buf));
42 bias_buf =
43 (float *)aom_malloc(NN_MAX_NODES_PER_LAYER *
44 (NN_MAX_HIDDEN_LAYERS + 1) * sizeof(*bias_buf));
45 ASSERT_NE(weights_buf, nullptr);
46 ASSERT_NE(bias_buf, nullptr);
47 for (int i = 0; i < NN_MAX_HIDDEN_LAYERS + 1; i++) {
48 weights[i] = &weights_buf[i * MAX_NODES2];
49 bias[i] = &bias_buf[i * NN_MAX_NODES_PER_LAYER];
50 }
51 target_func_ = GET_PARAM(0);
52 }
TearDown()53 virtual void TearDown() {
54 aom_free(weights_buf);
55 aom_free(bias_buf);
56 }
57 void RunNnPredictTest(const NN_CONFIG *const shape);
58 void RunNnPredictSpeedTest(const NN_CONFIG *const shape, const int run_times);
59 void RunNnPredictTest_all(const NN_CONFIG *const shapes,
60 const int num_shapes);
61 void RunNnPredictSpeedTest_all(const NN_CONFIG *const shapes,
62 const int num_shapes, const int run_times);
63
64 private:
65 NnPredict_Func target_func_;
66 libaom_test::ACMRandom rng_;
67 float *weights[NN_MAX_HIDDEN_LAYERS + 1] = { 0 };
68 float *bias[NN_MAX_HIDDEN_LAYERS + 1] = { 0 };
69 float *weights_buf = nullptr, *bias_buf = nullptr;
70 };
71
RunNnPredictTest(const NN_CONFIG * const shape)72 void NnPredictTest::RunNnPredictTest(const NN_CONFIG *const shape) {
73 libaom_test::ClearSystemState();
74 float inputs[NN_MAX_NODES_PER_LAYER] = { 0 };
75 float outputs_test[NN_MAX_NODES_PER_LAYER] = { 0 };
76 float outputs_ref[NN_MAX_NODES_PER_LAYER] = { 0 };
77
78 NN_CONFIG nn_config;
79 memcpy(&nn_config, shape, sizeof(nn_config));
80
81 char shape_str[32] = { 0 };
82 snprintf(shape_str, sizeof(shape_str), "%d", shape->num_inputs);
83 for (int layer = 0; layer < shape->num_hidden_layers; layer++)
84 snprintf(&shape_str[strlen(shape_str)],
85 sizeof(shape_str) - strlen(shape_str), "x%d",
86 shape->num_hidden_nodes[layer]);
87 snprintf(&shape_str[strlen(shape_str)], sizeof(shape_str) - strlen(shape_str),
88 "x%d", shape->num_outputs);
89
90 for (int i = 0; i < NN_MAX_HIDDEN_LAYERS + 1; i++) {
91 nn_config.weights[i] = weights[i];
92 nn_config.bias[i] = bias[i];
93 }
94
95 for (int iter = 0; iter < 10000 && !HasFatalFailure(); ++iter) {
96 for (int node = 0; node < shape->num_inputs; node++) {
97 inputs[node] = ((float)rng_.Rand31() - (1 << 30)) / (1u << 31);
98 }
99 for (int layer = 0; layer < shape->num_hidden_layers; layer++) {
100 for (int node = 0; node < NN_MAX_NODES_PER_LAYER; node++) {
101 bias[layer][node] = ((float)rng_.Rand31() - (1 << 30)) / (1u << 31);
102 }
103 for (int node = 0; node < NN_MAX_NODES_PER_LAYER * NN_MAX_NODES_PER_LAYER;
104 node++) {
105 weights[layer][node] = ((float)rng_.Rand31() - (1 << 30)) / (1u << 31);
106 }
107 }
108 // Now the outputs:
109 int layer = shape->num_hidden_layers;
110 for (int node = 0; node < NN_MAX_NODES_PER_LAYER; node++) {
111 bias[layer][node] = ((float)rng_.Rand31() - (1 << 30)) / (1u << 31);
112 }
113 for (int node = 0; node < NN_MAX_NODES_PER_LAYER * NN_MAX_NODES_PER_LAYER;
114 node++) {
115 weights[layer][node] = ((float)rng_.Rand31() - (1 << 30)) / (1u << 31);
116 }
117
118 av1_nn_predict_c(inputs, &nn_config, outputs_ref);
119 target_func_(inputs, &nn_config, outputs_test);
120 libaom_test::ClearSystemState();
121
122 for (int node = 0; node < shape->num_outputs; node++) {
123 if (outputs_ref[node] < epsilon) {
124 ASSERT_LE(outputs_test[node], epsilon)
125 << "Reference output was near-zero, test output was not ("
126 << shape_str << ")";
127 } else {
128 const float error = outputs_ref[node] - outputs_test[node];
129 const float relative_error = fabsf(error / outputs_ref[node]);
130 ASSERT_LE(relative_error, epsilon)
131 << "Excessive relative error between reference and test ("
132 << shape_str << ")";
133 }
134 }
135 }
136 }
137
RunNnPredictSpeedTest(const NN_CONFIG * const shape,const int run_times)138 void NnPredictTest::RunNnPredictSpeedTest(const NN_CONFIG *const shape,
139 const int run_times) {
140 libaom_test::ClearSystemState();
141 float inputs[NN_MAX_NODES_PER_LAYER] = { 0 };
142 float outputs_test[NN_MAX_NODES_PER_LAYER] = { 0 };
143 float outputs_ref[NN_MAX_NODES_PER_LAYER] = { 0 };
144
145 NN_CONFIG nn_config;
146 memcpy(&nn_config, shape, sizeof(nn_config));
147
148 for (int i = 0; i < NN_MAX_HIDDEN_LAYERS; i++) {
149 nn_config.weights[i] = weights[i];
150 nn_config.bias[i] = bias[i];
151 }
152 // Don't bother actually changing the values for inputs/weights/bias: it
153 // shouldn't make any difference for a speed test.
154
155 aom_usec_timer timer;
156 aom_usec_timer_start(&timer);
157 for (int i = 0; i < run_times; ++i) {
158 av1_nn_predict_c(inputs, &nn_config, outputs_ref);
159 }
160 aom_usec_timer_mark(&timer);
161 const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer));
162 aom_usec_timer_start(&timer);
163 for (int i = 0; i < run_times; ++i) {
164 target_func_(inputs, &nn_config, outputs_test);
165 }
166 aom_usec_timer_mark(&timer);
167 libaom_test::ClearSystemState();
168 const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer));
169
170 printf("%d", shape->num_inputs);
171 for (int layer = 0; layer < shape->num_hidden_layers; layer++)
172 printf("x%d", shape->num_hidden_nodes[layer]);
173 printf("x%d: ", shape->num_outputs);
174 printf("%7.2f/%7.2fns (%3.2f)\n", time1, time2, time1 / time2);
175 }
176
177 // This is all the neural network shapes observed executed in a few different
178 // runs of the encoder. It also conveniently covers all the kernels
179 // implemented.
180 static const NN_CONFIG shapes[] = {
181 { 10, 16, 1, { 64 }, { 0 }, { 0 } }, { 12, 1, 1, { 12 }, { 0 }, { 0 } },
182 { 12, 1, 1, { 24 }, { 0 }, { 0 } }, { 12, 1, 1, { 32 }, { 0 }, { 0 } },
183 { 18, 4, 1, { 24 }, { 0 }, { 0 } }, { 18, 4, 1, { 32 }, { 0 }, { 0 } },
184 { 4, 1, 1, { 16 }, { 0 }, { 0 } }, { 8, 1, 1, { 16 }, { 0 }, { 0 } },
185 { 8, 4, 1, { 16 }, { 0 }, { 0 } }, { 8, 1, 1, { 24 }, { 0 }, { 0 } },
186 { 8, 1, 1, { 32 }, { 0 }, { 0 } }, { 8, 1, 1, { 64 }, { 0 }, { 0 } },
187 { 9, 3, 1, { 32 }, { 0 }, { 0 } }, { 4, 4, 1, { 8 }, { 0 }, { 0 } },
188 };
189
RunNnPredictTest_all(const NN_CONFIG * const shapes,const int num_shapes)190 void NnPredictTest::RunNnPredictTest_all(const NN_CONFIG *const shapes,
191 const int num_shapes) {
192 for (int i = 0; i < num_shapes; i++) RunNnPredictTest(&shapes[i]);
193 }
194
RunNnPredictSpeedTest_all(const NN_CONFIG * const shapes,const int num_shapes,const int run_times)195 void NnPredictTest::RunNnPredictSpeedTest_all(const NN_CONFIG *const shapes,
196 const int num_shapes,
197 const int run_times) {
198 for (int i = 0; i < num_shapes; i++)
199 NnPredictTest::RunNnPredictSpeedTest(&shapes[i], run_times);
200 }
201
TEST_P(NnPredictTest,RandomValues)202 TEST_P(NnPredictTest, RandomValues) {
203 RunNnPredictTest_all(shapes, sizeof(shapes) / sizeof(*shapes));
204 }
205
TEST_P(NnPredictTest,DISABLED_Speed)206 TEST_P(NnPredictTest, DISABLED_Speed) {
207 RunNnPredictSpeedTest_all(shapes, sizeof(shapes) / sizeof(*shapes), 10000000);
208 }
209
210 #if HAVE_SSE3
211 INSTANTIATE_TEST_CASE_P(SSE3, NnPredictTest,
212 ::testing::Values(av1_nn_predict_sse3));
213 #endif
214
215 } // namespace
216