1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #include "tensorflow/compiler/xla/util.h"
17
18 #include <stdarg.h>
19
20 #include <cmath>
21 #include <limits>
22 #include <numeric>
23 #include <optional>
24 #include <string>
25
26 #include "absl/algorithm/container.h"
27 #include "absl/base/casts.h"
28 #include "absl/container/flat_hash_map.h"
29 #include "absl/container/inlined_vector.h"
30 #include "absl/strings/match.h"
31 #include "absl/strings/str_cat.h"
32 #include "absl/strings/str_format.h"
33 #include "absl/strings/str_join.h"
34 #include "absl/strings/str_split.h"
35 #include "tensorflow/compiler/xla/types.h"
36 #include "tensorflow/core/lib/core/errors.h"
37 #include "tensorflow/core/lib/math/math_util.h"
38 #include "tensorflow/core/platform/bfloat16.h"
39 #include "tensorflow/core/platform/env.h"
40 #include "tensorflow/core/platform/numbers.h"
41 #include "tensorflow/core/platform/stacktrace.h"
42
43 namespace xla {
44
ToMixedRadix(const int64_t n,absl::Span<const int64_t> bounds)45 std::vector<int64_t> ToMixedRadix(const int64_t n,
46 absl::Span<const int64_t> bounds) {
47 if (bounds.empty()) {
48 return {};
49 }
50
51 std::vector<int64_t> digits;
52 digits.reserve(bounds.size());
53 int64_t divisor = Product(bounds);
54 CHECK_GT(divisor, 0);
55 int64_t remainder = n % divisor;
56 for (const int64_t radix : bounds) {
57 CHECK_GT(radix, 0);
58 divisor /= radix;
59 CHECK_GT(divisor, 0);
60
61 // The divisor is always 1 for the last iteration.
62 digits.push_back(remainder / divisor);
63 remainder = remainder % divisor;
64 }
65 return digits;
66 }
67
WithLogBacktrace(const Status & status)68 Status WithLogBacktrace(const Status& status) {
69 CHECK(!status.ok());
70 VLOG(1) << status.ToString();
71 VLOG(2) << tensorflow::CurrentStackTrace();
72 return status;
73 }
74
ScopedLoggingTimer(absl::string_view label,bool enabled,const char * file,int line,TimerStats * timer_stats)75 ScopedLoggingTimer::ScopedLoggingTimer(absl::string_view label, bool enabled,
76 const char* file, int line,
77 TimerStats* timer_stats)
78 : label_(label),
79 file_(file),
80 line_(line),
81 timer_stats_(timer_stats),
82 enabled_(enabled) {
83 if (enabled_) {
84 start_micros_ = tensorflow::Env::Default()->NowMicros();
85 }
86 }
87
StopAndLog()88 void ScopedLoggingTimer::StopAndLog() {
89 if (enabled_) {
90 uint64_t end_micros = tensorflow::Env::Default()->NowMicros();
91 double secs = (end_micros - start_micros_) / 1000000.0;
92
93 TimerStats& stats = *timer_stats_;
94 absl::MutexLock lock(&stats.stats_mutex);
95 stats.cumulative_secs += secs;
96 if (secs > stats.max_secs) {
97 stats.max_secs = secs;
98 }
99 stats.times_called++;
100
101 LOG(INFO).AtLocation(file_, line_)
102 << label_
103 << " time: " << tensorflow::strings::HumanReadableElapsedTime(secs)
104 << " (cumulative: "
105 << tensorflow::strings::HumanReadableElapsedTime(stats.cumulative_secs)
106 << ", max: "
107 << tensorflow::strings::HumanReadableElapsedTime(stats.max_secs)
108 << ", #called: " << stats.times_called << ")";
109 enabled_ = false;
110 }
111 }
112
~ScopedLoggingTimer()113 ScopedLoggingTimer::~ScopedLoggingTimer() { StopAndLog(); }
114
AddStatus(Status prior,absl::string_view context)115 Status AddStatus(Status prior, absl::string_view context) {
116 CHECK(!prior.ok());
117 return Status{prior.code(),
118 absl::StrCat(context, ": ", prior.error_message())};
119 }
120
AppendStatus(Status prior,absl::string_view context)121 Status AppendStatus(Status prior, absl::string_view context) {
122 CHECK(!prior.ok());
123 return Status{prior.code(),
124 absl::StrCat(prior.error_message(), ": ", context)};
125 }
126
Reindent(absl::string_view original,const absl::string_view indentation)127 std::string Reindent(absl::string_view original,
128 const absl::string_view indentation) {
129 std::vector<std::string> pieces =
130 absl::StrSplit(absl::string_view(original.data(), original.size()), '\n');
131 return absl::StrJoin(
132 pieces, "\n", [indentation](std::string* out, absl::string_view s) {
133 absl::StrAppend(out, indentation, absl::StripAsciiWhitespace(s));
134 });
135 }
136
137 template <typename FloatT>
RoundTripNanPayload(FloatT value,std::string * result)138 static void RoundTripNanPayload(FloatT value, std::string* result) {
139 const int kPayloadBits = NanPayloadBits<FloatT>();
140 if (std::isnan(value) && kPayloadBits > 0) {
141 auto rep = absl::bit_cast<
142 typename UnsignedIntegerTypeForSize<sizeof(FloatT)>::type>(value);
143 auto payload = rep & NanPayloadBitMask<FloatT>();
144 if (payload != QuietNanWithoutPayload<FloatT>()) {
145 absl::StrAppendFormat(result, "(0x%x)", payload);
146 }
147 }
148 }
149
150 template <typename FloatT>
GenericRoundTripFpToString(FloatT value)151 static std::string GenericRoundTripFpToString(FloatT value) {
152 // TODO(majnemer): Remove this temporary variable once Eigen creates a symbol
153 // definition for `max_digits10`.
154 int max_decimal_digits = std::numeric_limits<FloatT>::max_digits10;
155 return absl::StrFormat("%.*g", max_decimal_digits,
156 static_cast<double>(value));
157 }
158
RoundTripFpToString(bfloat16 value)159 std::string RoundTripFpToString(bfloat16 value) {
160 std::string result = GenericRoundTripFpToString(value);
161 RoundTripNanPayload(value, &result);
162 return result;
163 }
164
RoundTripFpToString(half value)165 std::string RoundTripFpToString(half value) {
166 std::string result = GenericRoundTripFpToString(value);
167 RoundTripNanPayload(value, &result);
168 return result;
169 }
170
RoundTripFpToString(float value)171 std::string RoundTripFpToString(float value) {
172 float parsed_result;
173 std::string result =
174 absl::StrFormat("%.*g", std::numeric_limits<float>::digits10, value);
175 if (!absl::SimpleAtof(result, &parsed_result) || parsed_result != value) {
176 result = GenericRoundTripFpToString(value);
177 }
178 RoundTripNanPayload(value, &result);
179 return result;
180 }
181
RoundTripFpToString(double value)182 std::string RoundTripFpToString(double value) {
183 double parsed_result;
184 std::string result =
185 absl::StrFormat("%.*g", std::numeric_limits<double>::digits10, value);
186 if (!absl::SimpleAtod(result, &parsed_result) || parsed_result != value) {
187 result = GenericRoundTripFpToString(value);
188 }
189 RoundTripNanPayload(value, &result);
190 return result;
191 }
192
MakeNoPaddingConfig(int64_t rank)193 PaddingConfig MakeNoPaddingConfig(int64_t rank) {
194 PaddingConfig padding_config;
195 for (int64_t dnum = 0; dnum < rank; ++dnum) {
196 auto dimension = padding_config.add_dimensions();
197 dimension->set_edge_padding_low(0);
198 dimension->set_edge_padding_high(0);
199 dimension->set_interior_padding(0);
200 }
201 return padding_config;
202 }
203
MakeEdgePaddingConfig(absl::Span<const std::pair<int64_t,int64_t>> padding)204 PaddingConfig MakeEdgePaddingConfig(
205 absl::Span<const std::pair<int64_t, int64_t>> padding) {
206 PaddingConfig padding_config;
207 for (const std::pair<int64_t, int64_t>& dim : padding) {
208 auto dimension = padding_config.add_dimensions();
209 dimension->set_edge_padding_low(dim.first);
210 dimension->set_edge_padding_high(dim.second);
211 dimension->set_interior_padding(0);
212 }
213 return padding_config;
214 }
215
HasInteriorPadding(const PaddingConfig & config)216 bool HasInteriorPadding(const PaddingConfig& config) {
217 for (const auto& dim : config.dimensions()) {
218 if (dim.interior_padding() != 0) {
219 return true;
220 }
221 }
222 return false;
223 }
224
225 namespace {
HumanReadableNumOps(double flops,double nanoseconds,absl::string_view op_prefix)226 std::string HumanReadableNumOps(double flops, double nanoseconds,
227 absl::string_view op_prefix) {
228 if (nanoseconds == 0) {
229 return absl::StrCat("NaN ", op_prefix, "OP/s");
230 }
231 double nano_flops = flops / nanoseconds;
232 std::string throughput = tensorflow::strings::HumanReadableNum(
233 static_cast<int64_t>(nano_flops * 1e9));
234 absl::string_view sp(throughput);
235 // Use the more common "G(FLOPS)", rather than "B(FLOPS)"
236 if (absl::EndsWith(sp, "B") || // Ends in 'B', ignoring case
237 absl::EndsWith(sp, "b")) {
238 *throughput.rbegin() = 'G';
239 }
240 throughput += absl::StrCat(op_prefix, "OP/s");
241 return throughput;
242 }
243 } // namespace
244
HumanReadableNumFlops(double flops,double nanoseconds)245 std::string HumanReadableNumFlops(double flops, double nanoseconds) {
246 return HumanReadableNumOps(flops, nanoseconds, "FL");
247 }
248
HumanReadableNumTranscendentalOps(double trops,double nanoseconds)249 std::string HumanReadableNumTranscendentalOps(double trops,
250 double nanoseconds) {
251 return HumanReadableNumOps(trops, nanoseconds, "TR");
252 }
253
LogLines(int sev,absl::string_view text,const char * fname,int lineno)254 void LogLines(int sev, absl::string_view text, const char* fname, int lineno) {
255 const int orig_sev = sev;
256 if (sev == tensorflow::FATAL) {
257 sev = tensorflow::ERROR;
258 }
259
260 // Protect calls with a mutex so we don't interleave calls to LogLines from
261 // multiple threads.
262 static absl::Mutex log_lines_mu(absl::kConstInit);
263 absl::MutexLock lock(&log_lines_mu);
264
265 size_t cur = 0;
266 while (cur < text.size()) {
267 size_t eol = text.find('\n', cur);
268 if (eol == absl::string_view::npos) {
269 eol = text.size();
270 }
271 auto msg = text.substr(cur, eol - cur);
272 tensorflow::internal::LogString(fname, lineno, sev,
273 std::string(msg.data(), msg.size()));
274 cur = eol + 1;
275 }
276
277 if (orig_sev == tensorflow::FATAL) {
278 tensorflow::internal::LogString(fname, lineno, orig_sev,
279 "Aborting due to errors.");
280 }
281 }
282
Product(absl::Span<const int64_t> xs)283 int64_t Product(absl::Span<const int64_t> xs) {
284 return std::accumulate(xs.begin(), xs.end(), static_cast<int64_t>(1),
285 std::multiplies<int64_t>());
286 }
287
CommonFactors(absl::Span<const int64_t> a,absl::Span<const int64_t> b)288 absl::InlinedVector<std::pair<int64_t, int64_t>, 8> CommonFactors(
289 absl::Span<const int64_t> a, absl::Span<const int64_t> b) {
290 CHECK_EQ(Product(a), Product(b));
291 absl::InlinedVector<std::pair<int64_t, int64_t>, 8> bounds;
292 if (absl::c_equal(a, b)) {
293 bounds.reserve(a.size() + 1);
294 for (int64_t i = 0; i <= a.size(); ++i) {
295 bounds.emplace_back(i, i);
296 }
297 return bounds;
298 }
299 int64_t i = 0, j = 0, prior_i = -1, prior_j = -1;
300 while (i < a.size() && j < b.size() && a[i] == b[j]) {
301 std::tie(prior_i, prior_j) = std::make_pair(i, j);
302 bounds.emplace_back(i, j);
303 ++i;
304 ++j;
305 }
306 // If the product is different after filtering out zeros, return full group.
307 // E.g.,:
308 // a={0, 10 ,3}
309 // ^
310 // i=1
311 //
312 // b={0, 3}
313 // ^
314 // j=1
315 if (Product(a.subspan(i)) != Product(b.subspan(j))) {
316 return {std::make_pair(0, 0), std::make_pair(a.size(), b.size())};
317 }
318 if (0 == Product(a.subspan(i))) {
319 bounds.push_back(std::make_pair(i, j));
320 bounds.push_back(std::make_pair(a.size(), b.size()));
321 return bounds;
322 }
323
324 for (int64_t partial_size_a = 1, partial_size_b = 1;;) {
325 if (partial_size_a == partial_size_b && (i > prior_i || j > prior_j)) {
326 std::tie(prior_i, prior_j) = std::make_pair(i, j);
327 bounds.emplace_back(i, j);
328 continue;
329 }
330 bool in_bounds_i = i < a.size();
331 bool in_bounds_j = j < b.size();
332 if (!(in_bounds_i || in_bounds_j)) {
333 break;
334 }
335 bool next_a =
336 partial_size_a < partial_size_b ||
337 (in_bounds_i &&
338 (!in_bounds_j || (partial_size_a == partial_size_b && a[i] <= b[j])));
339 bool next_b =
340 partial_size_b < partial_size_a ||
341 (in_bounds_j &&
342 (!in_bounds_i || (partial_size_b == partial_size_a && b[j] <= a[i])));
343 if (next_a) {
344 partial_size_a *= a[i];
345 ++i;
346 }
347 if (next_b) {
348 partial_size_b *= b[j];
349 ++j;
350 }
351 }
352 return bounds;
353 }
354
ConvertDimensionNumbers(absl::Span<const int64_t> from_dimensions,absl::Span<const int64_t> from_sizes,absl::Span<const int64_t> to_sizes)355 ConvertedDimensionNumbers ConvertDimensionNumbers(
356 absl::Span<const int64_t> from_dimensions,
357 absl::Span<const int64_t> from_sizes, absl::Span<const int64_t> to_sizes) {
358 ConvertedDimensionNumbers dimensions;
359 auto common_factors = CommonFactors(from_sizes, to_sizes);
360 for (int64_t i = 0; i < common_factors.size() - 1; ++i) {
361 bool any_present = false;
362 bool all_present = true;
363 for (int64_t d = common_factors[i].first; d < common_factors[i + 1].first;
364 ++d) {
365 const bool present = absl::c_linear_search(from_dimensions, d);
366 any_present |= present;
367 all_present &= present;
368 }
369 if (all_present) {
370 for (int64_t d = common_factors[i].second;
371 d < common_factors[i + 1].second; ++d) {
372 dimensions.to_dimensions.push_back(d);
373 }
374 for (int64_t d = common_factors[i].first; d < common_factors[i + 1].first;
375 ++d) {
376 dimensions.transformed_from_dimensions.push_back(d);
377 }
378 } else if (any_present) {
379 // Try to find if there is a to dimension that is like (from) [2,32] ->
380 // (to) [4,4,4] to detect that from dimensoin 1 can be partially mapped
381 // into dimension 1 and 2 of the to sizes with a partial size of 2.
382 if (common_factors[i].first + 2 == common_factors[i + 1].first &&
383 absl::c_linear_search(from_dimensions, common_factors[i].first + 1)) {
384 int64_t from_size = from_sizes[common_factors[i + 1].first - 1];
385 bool has_to_dim = false;
386 for (int64_t to_dim = common_factors[i + 1].second - 1;
387 to_dim >= common_factors[i].second; --to_dim) {
388 const int64_t to_size = to_sizes[to_dim];
389 if (from_size % to_size == 0) {
390 has_to_dim = true;
391 from_size /= to_size;
392 dimensions.to_dimensions.push_back(to_dim);
393 } else {
394 break;
395 }
396 }
397 if (has_to_dim) {
398 dimensions.split_from_sizes.push_back(from_size);
399 dimensions.split_from_dimensions.push_back(common_factors[i].first +
400 1);
401 }
402 }
403 for (int64_t d = common_factors[i].first; d < common_factors[i + 1].first;
404 ++d) {
405 if (absl::c_linear_search(from_dimensions, d)) {
406 dimensions.untransformed_from_dimensions.push_back(d);
407 }
408 }
409 }
410 }
411 absl::c_sort(dimensions.to_dimensions);
412 return dimensions;
413 }
SanitizeFileName(std::string file_name)414 std::string SanitizeFileName(std::string file_name) {
415 for (char& c : file_name) {
416 if (c == '/' || c == '\\' || c == '[' || c == ']' || c == ' ') {
417 c = '_';
418 }
419 }
420 return file_name;
421 }
422
423 // Utility function to split a double-precision float (F64) into a pair of F32s.
424 // For a p-bit number, and a splitting point (p/2) <= s <= (p - 1), the
425 // algorithm produces a (p - s)-bit value 'hi' and a non-overlapping (s - 1)-bit
426 // value 'lo'. See Theorem 4 in [1] (attributed to Dekker) or [2] for the
427 // original theorem by Dekker.
428 //
429 // For double-precision F64s, which contain a 53 bit mantissa (52 of them
430 // explicit), we can represent the most significant 49 digits as the unevaluated
431 // sum of two single-precision floats 'hi' and 'lo'. The 'hi' float stores the
432 // most significant 24 bits and the sign bit of 'lo' together with its mantissa
433 // store the remaining 25 bits. The exponent of the resulting representation is
434 // still restricted to 8 bits of F32.
435 //
436 // References:
437 // [1] A. Thall, Extended-Precision Floating-Point Numbers for GPU Computation,
438 // SIGGRAPH Research Posters, 2006.
439 // (http://andrewthall.org/papers/df64_qf128.pdf)
440 // [2] T. J. Dekker, A floating point technique for extending the available
441 // precision, Numerische Mathematik, vol. 18, pp. 224–242, 1971.
SplitF64ToF32(double x)442 std::pair<float, float> SplitF64ToF32(double x) {
443 const float x_f32 = static_cast<float>(x);
444
445 // Early return if x is an infinity or NaN.
446 if (!std::isfinite(x_f32)) {
447 // Only values within the range of F32 are supported, unless it is infinity.
448 // Small values with large negative exponents would be rounded to zero.
449 if (std::isfinite(x)) {
450 LOG(WARNING) << "Out of range F64 constant detected: " << x;
451 }
452 return std::make_pair(x_f32, 0.0f);
453 }
454
455 // The high float is simply the double rounded to the nearest float. Because
456 // we are rounding to nearest with ties to even, the error introduced in
457 // rounding is less than half an ULP in the high ULP.
458 const float hi = x_f32;
459 // We can compute the low term using Sterbenz' lemma: If a and b are two
460 // positive floating point numbers and a/2 ≤ b ≤ 2a, then their difference can
461 // be computed exactly.
462 // Note: the difference is computed exactly but is rounded to the nearest
463 // float which will introduce additional error.
464 const float lo = static_cast<float>(x - static_cast<double>(hi));
465 return std::make_pair(hi, lo);
466 }
467
468 } // namespace xla
469