1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #include <functional>
17 #include <memory>
18
19 #include "tensorflow/core/common_runtime/kernel_benchmark_testlib.h"
20 #include "tensorflow/core/framework/allocator.h"
21 #include "tensorflow/core/framework/fake_input.h"
22 #include "tensorflow/core/framework/node_def_builder.h"
23 #include "tensorflow/core/framework/op_kernel.h"
24 #include "tensorflow/core/framework/tensor.h"
25 #include "tensorflow/core/framework/tensor_testutil.h"
26 #include "tensorflow/core/framework/types.h"
27 #include "tensorflow/core/graph/node_builder.h"
28 #include "tensorflow/core/kernels/ops_testutil.h"
29 #include "tensorflow/core/kernels/ops_util.h"
30 #include "tensorflow/core/lib/core/status_test_util.h"
31 #include "tensorflow/core/lib/random/simple_philox.h"
32 #include "tensorflow/core/platform/test.h"
33 #include "tensorflow/core/platform/test_benchmark.h"
34
35 namespace tensorflow {
36
37 static const float tol_ = 1e-4;
38
39 class LRNFloatTest : public OpsTestBase {
40 protected:
LRNFloatTest()41 LRNFloatTest() : philox_(123, 17), rand_(&philox_) {}
42
GetIntAttr(const string & name)43 int GetIntAttr(const string& name) {
44 int value;
45 TF_CHECK_OK(GetNodeAttr(*node_def(), name, &value));
46 return value;
47 }
48
GetFloatAttr(const string & name)49 float GetFloatAttr(const string& name) {
50 float value;
51 TF_CHECK_OK(GetNodeAttr(*node_def(), name, &value));
52 return value;
53 }
54
Compare()55 bool Compare() {
56 const auto& input = GetInput(0);
57 const int64_t batch_size = input.dim_size(0);
58 const int64_t rows = input.dim_size(1);
59 const int64_t cols = input.dim_size(2);
60 const int64_t depth = input.dim_size(3);
61 const int64_t rest = cols * rows * batch_size;
62
63 const int64_t depth_radius = GetIntAttr("depth_radius");
64 const float bias = GetFloatAttr("bias");
65 const float alpha = GetFloatAttr("alpha");
66 const float beta = GetFloatAttr("beta");
67
68 Eigen::Tensor<float, 4, Eigen::RowMajor> expected(batch_size, rows, cols,
69 depth);
70 auto out = expected.reshape(Eigen::DSizes<Eigen::Index, 2>{rest, depth});
71 auto in = input.shaped<float, 2>({rest, depth});
72
73 for (int64_t i = 0; i < rest; ++i) {
74 Eigen::Tensor<float, 1, Eigen::RowMajor> out_col(depth);
75 for (int64_t d = 0; d < depth; ++d) {
76 float denom = 0.0f;
77 for (int64_t r = std::max(int64_t{0}, d - depth_radius);
78 r < std::min(depth, d + depth_radius + 1); ++r) {
79 denom += in(i, r) * in(i, r);
80 }
81 denom = std::pow(denom * alpha + bias, beta);
82 out_col(d) = in(i, d) / denom;
83 }
84 out.chip<0>(i) = out_col;
85 }
86 auto actual = GetOutput(0)->tensor<float, 4>();
87 Eigen::Tensor<float, 0, Eigen::RowMajor> sum =
88 ((expected - actual).abs() > actual.constant(tol_))
89 .select(actual.constant(1), actual.constant(0))
90 .sum();
91 return sum() == 0;
92 }
93
94 random::PhiloxRandom philox_;
95 random::SimplePhilox rand_;
96 };
97
TEST_F(LRNFloatTest,Depth96)98 TEST_F(LRNFloatTest, Depth96) {
99 TF_ASSERT_OK(NodeDefBuilder("lrn_op", "LRN")
100 .Input(FakeInput())
101 .Attr("depth_radius", 5)
102 .Attr("bias", 1.0f)
103 .Attr("alpha", 0.1f)
104 .Attr("beta", 2.0f)
105 .Finalize(node_def()));
106 TF_ASSERT_OK(InitOp());
107 AddInput<float>(TensorShape({1, 1, 1, 96}),
108 [](int i) -> float { return i + 1; });
109 TF_ASSERT_OK(RunOpKernel());
110 auto actual = GetOutput(0)->tensor<float, 4>();
111
112 // Output for Node 0 with Value 1:
113 // 1 / (1 + 0.1*(1^2 + 2^2 + 3^2 + 4^2 + 5^2 + 6^2))^2
114 EXPECT_NEAR(1. / (10.1 * 10.1), actual(0, 0, 0, 0), tol_);
115
116 // Output for Node 5 with Value 6:
117 // 6 / (1 + 0.1*(1^2 + 2^2 + 3^2 + 4^2 + 5^2 + 6^2 ... + 11^2))^2
118 EXPECT_NEAR(6. / (51.6 * 51.6), actual(0, 0, 0, 5), tol_);
119
120 // Output for Node 63 with value 64:
121 // 64 / (1 + 0.1*(59^2 + 60^2 + 61^2 + 62^2 + 63^2 + 64^2))^2
122 EXPECT_NEAR(64. / (2272.1 * 2272.1), actual(0, 0, 0, 63), tol_);
123
124 // Output for Node 64 with value 65:
125 // 65 / (1 + 0.1*(65^2 + 66^2 + 67^2 + 68^2 + 69^2 + 70^2))^2
126 EXPECT_NEAR(65. / (2736.5 * 2736.5), actual(0, 0, 0, 64), tol_);
127
128 // Output for Node 95 with value 96:
129 // 96 / (1 + 0.1*(91^2 + 92^2 + 93^2 + 94^2 + 95^2 + 96^2))^2
130 EXPECT_NEAR(96. / (5248.1 * 5248.1), actual(0, 0, 0, 95), tol_);
131 EXPECT_TRUE(Compare());
132 }
133
TEST_F(LRNFloatTest,Depth16)134 TEST_F(LRNFloatTest, Depth16) {
135 TF_ASSERT_OK(NodeDefBuilder("lrn_op", "LRN")
136 .Input(FakeInput())
137 .Attr("depth_radius", 5)
138 .Attr("bias", 1.0f)
139 .Attr("alpha", 0.1f)
140 .Attr("beta", 2.0f)
141 .Finalize(node_def()));
142 TF_ASSERT_OK(InitOp());
143 AddInput<float>(TensorShape({1, 1, 1, 16}),
144 [](int i) -> float { return i + 1; });
145 TF_ASSERT_OK(RunOpKernel());
146 auto actual = GetOutput(0)->tensor<float, 4>();
147
148 // Output for Node 0 with Value 1:
149 // 1 / (1 + 0.1*(1^2 + 2^2 + 3^2 + 4^2 + 5^2 + 6^2))^2
150 EXPECT_NEAR(1. / (10.1 * 10.1), actual(0, 0, 0, 0), tol_);
151
152 // Output for Node 5 with Value 6:
153 // 6 / (1 + 0.1*(1^2 + 2^2 + 3^2 + 4^2 + 5^2 + 6^2 ... + 11^2))^2
154 EXPECT_NEAR(6. / (51.6 * 51.6), actual(0, 0, 0, 5), tol_);
155
156 // Output for Node 15 with value 16:
157 // 16 / (1 + 0.1*(11^2 + 12^2 + 13^2 + 14^2 + 15^2 + 16^2))^2
158 EXPECT_NEAR(16. / (112.1 * 112.1), actual(0, 0, 0, 15), tol_);
159 EXPECT_TRUE(Compare());
160 }
161
RndGaussian(random::SimplePhilox * rnd)162 static double RndGaussian(random::SimplePhilox* rnd) {
163 // Box-Muller transformation.
164 // See, for example, http://www.taygeta.com/random/gaussian.html
165 double x1, x2;
166 double r;
167 do {
168 x1 = 2 * rnd->RandDouble() - 1;
169 x2 = 2 * rnd->RandDouble() - 1;
170 r = x1 * x1 + x2 * x2;
171 } while (r == 0 || r >= 1.0);
172 double w = sqrt(-2.0 * log(r) / r);
173 return x1 * w;
174 }
175
176 #define TCASE(NAME, DEPTH, BATCH, DEPTH_RADIUS, BIAS, ALPHA, BETA) \
177 TEST_F(LRNFloatTest, NAME) { \
178 TF_ASSERT_OK(NodeDefBuilder("lrn_op", "LRN") \
179 .Input(FakeInput()) \
180 .Attr("depth_radius", (DEPTH_RADIUS)) \
181 .Attr("bias", (BIAS)) \
182 .Attr("alpha", ((ALPHA) / 10)) \
183 .Attr("beta", (BETA)) \
184 .Finalize(node_def())); \
185 TF_ASSERT_OK(InitOp()); \
186 AddInput<float>(TensorShape({BATCH, 1, 1, DEPTH}), \
187 [this](int i) -> float { return RndGaussian(&rand_); }); \
188 TF_ASSERT_OK(RunOpKernel()); \
189 EXPECT_TRUE(Compare()); \
190 }
191
192 // clang-format off
193 // DEPTH BATCH DEPTH_RADIUS BIAS ALPHA BETA
194 TCASE(T0, 4, 2, 2, 1.0f, 1.0f, 2.0f)
195 TCASE(T1, 16, 1, 5, 1.0f, 1.0f, 2.0f)
196 TCASE(T2, 16, 32, 2, 1.0f, 2.0f, 1.0f)
197 TCASE(T3, 128, 4, 3, 2.0f, 1.0f, 1.0f)
198 // clang-format on
199
200 #undef TCASE
201
MakeRNGrad(int batches,int rows,int cols,int depth,int depth_radius)202 static Graph* MakeRNGrad(int batches, int rows, int cols, int depth,
203 int depth_radius) {
204 Graph* g = new Graph(OpRegistry::Global());
205 Tensor grads(DT_FLOAT, TensorShape({batches, rows, cols, depth}));
206 grads.flat<float>().setRandom();
207
208 Tensor in(DT_FLOAT, TensorShape({batches, rows, cols, depth}));
209 in.flat<float>().setRandom();
210
211 Tensor out(DT_FLOAT, TensorShape({batches, rows, cols, depth}));
212
213 Node* ret;
214 TF_CHECK_OK(NodeBuilder(g->NewName("lrn_grad_op"), "LRNGrad")
215 .Input(test::graph::Constant(g, grads))
216 .Input(test::graph::Constant(g, in))
217 .Input(test::graph::Constant(g, out))
218 .Attr("depth_radius", depth_radius)
219 .Attr("bias", 1.0f)
220 .Attr("alpha", 1.0f / 10)
221 .Attr("beta", 2.0f)
222 .Finalize(g, &ret));
223 return g;
224 }
225
226 #define BM_LRNGradDev(DEVICE, B, R, C, D, DR) \
227 static void BM_LRNGrad_##DEVICE##_##B##_##R##_##C##_##D##_##DR( \
228 ::testing::benchmark::State& state) { \
229 test::Benchmark(#DEVICE, MakeRNGrad(B, R, C, D, DR), \
230 /*old_benchmark_api*/ false) \
231 .Run(state); \
232 state.SetItemsProcessed(static_cast<int64_t>(state.iterations()) * B * R * \
233 C * D * DR * 4); \
234 } \
235 BENCHMARK(BM_LRNGrad_##DEVICE##_##B##_##R##_##C##_##D##_##DR)
236
237 BM_LRNGradDev(cpu, 128, 12, 12, 64, 4);
238 BM_LRNGradDev(cpu, 128, 56, 56, 64, 2);
239 BM_LRNGradDev(cpu, 128, 27, 27, 192, 2);
240
241 } // namespace tensorflow
242