1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #include "tensorflow/compiler/xla/util.h"
17
18 #include <stdarg.h>
19
20 #include <cmath>
21 #include <limits>
22 #include <numeric>
23
24 #include "absl/container/flat_hash_map.h"
25 #include "absl/container/inlined_vector.h"
26 #include "absl/strings/match.h"
27 #include "absl/strings/str_cat.h"
28 #include "absl/strings/str_format.h"
29 #include "absl/strings/str_join.h"
30 #include "absl/strings/str_split.h"
31 #include "tensorflow/compiler/xla/types.h"
32 #include "tensorflow/core/lib/bfloat16/bfloat16.h"
33 #include "tensorflow/core/lib/core/errors.h"
34 #include "tensorflow/core/lib/math/math_util.h"
35 #include "tensorflow/core/lib/strings/numbers.h"
36 #include "tensorflow/core/platform/env.h"
37 #include "tensorflow/core/platform/mutex.h"
38 #include "tensorflow/core/platform/numbers.h"
39 #include "tensorflow/core/platform/stacktrace.h"
40
41 namespace xla {
42
WithLogBacktrace(const Status & status)43 Status WithLogBacktrace(const Status& status) {
44 CHECK(!status.ok());
45 VLOG(1) << status.ToString();
46 VLOG(2) << tensorflow::CurrentStackTrace();
47 return status;
48 }
49
ScopedLoggingTimer(const std::string & label,bool enabled,const char * file,int line,TimerStats * timer_stats)50 ScopedLoggingTimer::ScopedLoggingTimer(const std::string& label, bool enabled,
51 const char* file, int line,
52 TimerStats* timer_stats)
53 : enabled_(enabled),
54 file_(file),
55 line_(line),
56 label_(label),
57 timer_stats_(timer_stats) {
58 if (enabled_) {
59 start_micros_ = tensorflow::Env::Default()->NowMicros();
60 }
61 }
62
StopAndLog()63 void ScopedLoggingTimer::StopAndLog() {
64 if (enabled_) {
65 uint64 end_micros = tensorflow::Env::Default()->NowMicros();
66 double secs = (end_micros - start_micros_) / 1000000.0;
67
68 TimerStats& stats = *timer_stats_;
69 tensorflow::mutex_lock lock(stats.stats_mutex);
70 stats.cumulative_secs += secs;
71 if (secs > stats.max_secs) {
72 stats.max_secs = secs;
73 }
74 stats.times_called++;
75
76 LOG(INFO).AtLocation(file_, line_)
77 << label_
78 << " time: " << tensorflow::strings::HumanReadableElapsedTime(secs)
79 << " (cumulative: "
80 << tensorflow::strings::HumanReadableElapsedTime(stats.cumulative_secs)
81 << ", max: "
82 << tensorflow::strings::HumanReadableElapsedTime(stats.max_secs)
83 << ", #called: " << stats.times_called << ")";
84 enabled_ = false;
85 }
86 }
87
~ScopedLoggingTimer()88 ScopedLoggingTimer::~ScopedLoggingTimer() { StopAndLog(); }
89
AddStatus(Status prior,absl::string_view context)90 Status AddStatus(Status prior, absl::string_view context) {
91 CHECK(!prior.ok());
92 return Status{prior.code(),
93 absl::StrCat(context, ": ", prior.error_message())};
94 }
95
AppendStatus(Status prior,absl::string_view context)96 Status AppendStatus(Status prior, absl::string_view context) {
97 CHECK(!prior.ok());
98 return Status{prior.code(),
99 absl::StrCat(prior.error_message(), ": ", context)};
100 }
101
Reindent(absl::string_view original,const absl::string_view indentation)102 string Reindent(absl::string_view original,
103 const absl::string_view indentation) {
104 std::vector<string> pieces =
105 absl::StrSplit(absl::string_view(original.data(), original.size()), '\n');
106 return absl::StrJoin(pieces, "\n", [indentation](string* out, string s) {
107 absl::StrAppend(out, indentation, absl::StripAsciiWhitespace(s));
108 });
109 }
110
IsPermutation(absl::Span<const int64> permutation,int64 rank)111 bool IsPermutation(absl::Span<const int64> permutation, int64 rank) {
112 if (rank != permutation.size()) {
113 return false;
114 }
115 absl::InlinedVector<int64, 8> trivial_permutation(rank);
116 absl::c_iota(trivial_permutation, 0);
117 return absl::c_is_permutation(permutation, trivial_permutation);
118 }
119
InversePermutation(absl::Span<const int64> input_permutation)120 std::vector<int64> InversePermutation(
121 absl::Span<const int64> input_permutation) {
122 DCHECK(IsPermutation(input_permutation, input_permutation.size()));
123 std::vector<int64> output_permutation(input_permutation.size(), -1);
124 for (size_t i = 0; i < input_permutation.size(); ++i) {
125 output_permutation.at(input_permutation.at(i)) = i;
126 }
127 return output_permutation;
128 }
129
ComposePermutations(absl::Span<const int64> p1,absl::Span<const int64> p2)130 std::vector<int64> ComposePermutations(absl::Span<const int64> p1,
131 absl::Span<const int64> p2) {
132 CHECK_EQ(p1.size(), p2.size());
133 std::vector<int64> output;
134 for (size_t i = 0; i < p1.size(); ++i) {
135 output.push_back(p1.at(p2.at(i)));
136 }
137 return output;
138 }
139
IsIdentityPermutation(absl::Span<const int64> permutation)140 bool IsIdentityPermutation(absl::Span<const int64> permutation) {
141 for (int64 i = 0; i < permutation.size(); ++i) {
142 if (permutation[i] != i) {
143 return false;
144 }
145 }
146 return true;
147 }
148
RoundTripFpToString(tensorflow::bfloat16 value)149 string RoundTripFpToString(tensorflow::bfloat16 value) {
150 return absl::StrFormat("%.4g", static_cast<float>(value));
151 }
152
RoundTripFpToString(Eigen::half value)153 string RoundTripFpToString(Eigen::half value) {
154 return absl::StrFormat("%.5g", static_cast<float>(value));
155 }
156
RoundTripFpToString(float value)157 string RoundTripFpToString(float value) {
158 char buffer[tensorflow::strings::kFastToBufferSize];
159 tensorflow::strings::FloatToBuffer(value, buffer);
160 return buffer;
161 }
162
RoundTripFpToString(double value)163 string RoundTripFpToString(double value) {
164 char buffer[tensorflow::strings::kFastToBufferSize];
165 tensorflow::strings::DoubleToBuffer(value, buffer);
166 return buffer;
167 }
168
MakeNoPaddingConfig(int64 rank)169 PaddingConfig MakeNoPaddingConfig(int64 rank) {
170 PaddingConfig padding_config;
171 for (int64 dnum = 0; dnum < rank; ++dnum) {
172 auto dimension = padding_config.add_dimensions();
173 dimension->set_edge_padding_low(0);
174 dimension->set_edge_padding_high(0);
175 dimension->set_interior_padding(0);
176 }
177 return padding_config;
178 }
179
MakeEdgePaddingConfig(absl::Span<const std::pair<int64,int64>> padding)180 PaddingConfig MakeEdgePaddingConfig(
181 absl::Span<const std::pair<int64, int64>> padding) {
182 PaddingConfig padding_config;
183 for (const std::pair<int64, int64>& dim : padding) {
184 auto dimension = padding_config.add_dimensions();
185 dimension->set_edge_padding_low(dim.first);
186 dimension->set_edge_padding_high(dim.second);
187 dimension->set_interior_padding(0);
188 }
189 return padding_config;
190 }
191
HasInteriorPadding(const PaddingConfig & config)192 bool HasInteriorPadding(const PaddingConfig& config) {
193 for (const auto& dim : config.dimensions()) {
194 if (dim.interior_padding() != 0) {
195 return true;
196 }
197 }
198 return false;
199 }
200
201 namespace {
HumanReadableNumOps(double flops,double nanoseconds,absl::string_view op_prefix)202 string HumanReadableNumOps(double flops, double nanoseconds,
203 absl::string_view op_prefix) {
204 if (nanoseconds == 0) {
205 return absl::StrCat("NaN ", op_prefix, "OP/s");
206 }
207 double nano_flops = flops / nanoseconds;
208 string throughput = tensorflow::strings::HumanReadableNum(
209 static_cast<int64>(nano_flops * 1e9));
210 absl::string_view sp(throughput);
211 // Use the more common "G(FLOPS)", rather than "B(FLOPS)"
212 if (absl::EndsWith(sp, "B") || // Ends in 'B', ignoring case
213 absl::EndsWith(sp, "b")) {
214 *throughput.rbegin() = 'G';
215 }
216 throughput += absl::StrCat(op_prefix, "OP/s");
217 return throughput;
218 }
219 } // namespace
220
HumanReadableNumFlops(double flops,double nanoseconds)221 string HumanReadableNumFlops(double flops, double nanoseconds) {
222 return HumanReadableNumOps(flops, nanoseconds, "FL");
223 }
224
HumanReadableNumTranscendentalOps(double trops,double nanoseconds)225 string HumanReadableNumTranscendentalOps(double trops, double nanoseconds) {
226 return HumanReadableNumOps(trops, nanoseconds, "TR");
227 }
228
LogLines(int sev,absl::string_view text,const char * fname,int lineno)229 void LogLines(int sev, absl::string_view text, const char* fname, int lineno) {
230 const int orig_sev = sev;
231 if (sev == tensorflow::FATAL) {
232 sev = tensorflow::ERROR;
233 }
234
235 // Protect calls with a mutex so we don't interleave calls to LogLines from
236 // multiple threads.
237 static tensorflow::mutex log_lines_mu(tensorflow::LINKER_INITIALIZED);
238 tensorflow::mutex_lock lock(log_lines_mu);
239
240 size_t cur = 0;
241 while (cur < text.size()) {
242 size_t eol = text.find('\n', cur);
243 if (eol == absl::string_view::npos) {
244 eol = text.size();
245 }
246 auto msg = text.substr(cur, eol - cur);
247 tensorflow::internal::LogString(fname, lineno, sev,
248 string(msg.data(), msg.size()));
249 cur = eol + 1;
250 }
251
252 if (orig_sev == tensorflow::FATAL) {
253 tensorflow::internal::LogString(fname, lineno, orig_sev,
254 "Aborting due to errors.");
255 }
256 }
257
Product(absl::Span<const int64> xs)258 int64 Product(absl::Span<const int64> xs) {
259 return std::accumulate(xs.begin(), xs.end(), static_cast<int64>(1),
260 std::multiplies<int64>());
261 }
262
CommonFactors(absl::Span<const int64> a,absl::Span<const int64> b)263 absl::InlinedVector<std::pair<int64, int64>, 8> CommonFactors(
264 absl::Span<const int64> a, absl::Span<const int64> b) {
265 CHECK_EQ(Product(a), Product(b));
266 if (0 == Product(a)) {
267 return {std::make_pair(0, 0), std::make_pair(a.size(), b.size())};
268 }
269
270 absl::InlinedVector<std::pair<int64, int64>, 8> bounds;
271 for (int64 i = 0, j = 0, prior_i = -1, prior_j = -1, partial_size_a = 1,
272 partial_size_b = 1;
273 ;) {
274 if (partial_size_a == partial_size_b && (i > prior_i || j > prior_j)) {
275 std::tie(prior_i, prior_j) = std::make_pair(i, j);
276 bounds.emplace_back(i, j);
277 continue;
278 }
279 bool in_bounds_i = i < a.size();
280 bool in_bounds_j = j < b.size();
281 if (!(in_bounds_i || in_bounds_j)) {
282 break;
283 }
284 bool next_a =
285 partial_size_a < partial_size_b ||
286 (in_bounds_i &&
287 (!in_bounds_j || (partial_size_a == partial_size_b && a[i] <= b[j])));
288 bool next_b =
289 partial_size_b < partial_size_a ||
290 (in_bounds_j &&
291 (!in_bounds_i || (partial_size_b == partial_size_a && b[j] <= a[i])));
292 if (next_a) {
293 partial_size_a *= a[i];
294 ++i;
295 }
296 if (next_b) {
297 partial_size_b *= b[j];
298 ++j;
299 }
300 }
301 return bounds;
302 }
303
SanitizeFileName(string file_name)304 string SanitizeFileName(string file_name) {
305 for (char& c : file_name) {
306 if (c == '/' || c == '\\' || c == '[' || c == ']' || c == ' ') {
307 c = '_';
308 }
309 }
310 return file_name;
311 }
312
313 // Utility function to split a double-precision float (F64) into a pair of F32s.
314 // For a p-bit number, and a splitting point (p/2) <= s <= (p - 1), the
315 // algorithm produces a (p - s)-bit value 'hi' and a non-overlapping (s - 1)-bit
316 // value 'lo'. See Theorem 4 in [1] (attributed to Dekker) or [2] for the
317 // original theorem by Dekker.
318 //
319 // For double-precision F64s, which contain a 53 bit mantissa (52 of them
320 // explicit), we can represent the most significant 49 digits as the unevaluated
321 // sum of two single-precision floats 'hi' and 'lo'. The 'hi' float stores the
322 // most significant 24 bits and the sign bit of 'lo' together with its mantissa
323 // store the remaining 25 bits. The exponent of the resulting representation is
324 // still restricted to 8 bits of F32.
325 //
326 // References:
327 // [1] A. Thall, Extended-Precision Floating-Point Numbers for GPU Computation,
328 // SIGGRAPH Research Posters, 2006.
329 // (http://andrewthall.org/papers/df64_qf128.pdf)
330 // [2] T. J. Dekker, A floating point technique for extending the available
331 // precision, Numerische Mathematik, vol. 18, pp. 224–242, 1971.
SplitF64ToF32(double x)332 std::pair<float, float> SplitF64ToF32(double x) {
333 const float x_f32 = static_cast<float>(x);
334 // Early return if x is an infinity or NaN.
335 if (!std::isfinite(x)) {
336 return std::make_pair(x_f32, 0.0f);
337 }
338
339 // Only values within the range of F32 are supported, unless it is infinity.
340 // Small values with large negative exponents would be rounded to zero.
341 CHECK(std::isfinite(x_f32)) << x;
342
343 // The high float is simply the double rounded to the nearest float. Because
344 // we are rounding to nearest with ties to even, the error introduced in
345 // rounding is less than half an ULP in the high ULP.
346 const float hi = x_f32;
347 // We can compute the low term using Sterbenz' lemma: If a and b are two
348 // positive floating point numbers and a/2 ≤ b ≤ 2a, then their difference can
349 // be computed exactly.
350 // Note: the difference is computed exactly but is rounded to the nearest
351 // float which will introduce additional error.
352 const float lo = static_cast<float>(x - static_cast<double>(hi));
353 return std::make_pair(hi, lo);
354 }
355
356 } // namespace xla
357