• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #include "tensorflow/compiler/xla/util.h"
17 
18 #include <stdarg.h>
19 
20 #include <cmath>
21 #include <limits>
22 #include <numeric>
23 #include <optional>
24 #include <string>
25 
26 #include "absl/algorithm/container.h"
27 #include "absl/base/casts.h"
28 #include "absl/container/flat_hash_map.h"
29 #include "absl/container/inlined_vector.h"
30 #include "absl/strings/match.h"
31 #include "absl/strings/str_cat.h"
32 #include "absl/strings/str_format.h"
33 #include "absl/strings/str_join.h"
34 #include "absl/strings/str_split.h"
35 #include "tensorflow/compiler/xla/types.h"
36 #include "tensorflow/core/lib/core/errors.h"
37 #include "tensorflow/core/lib/math/math_util.h"
38 #include "tensorflow/core/platform/bfloat16.h"
39 #include "tensorflow/core/platform/env.h"
40 #include "tensorflow/core/platform/numbers.h"
41 #include "tensorflow/core/platform/stacktrace.h"
42 
43 namespace xla {
44 
ToMixedRadix(const int64_t n,absl::Span<const int64_t> bounds)45 std::vector<int64_t> ToMixedRadix(const int64_t n,
46                                   absl::Span<const int64_t> bounds) {
47   if (bounds.empty()) {
48     return {};
49   }
50 
51   std::vector<int64_t> digits;
52   digits.reserve(bounds.size());
53   int64_t divisor = Product(bounds);
54   CHECK_GT(divisor, 0);
55   int64_t remainder = n % divisor;
56   for (const int64_t radix : bounds) {
57     CHECK_GT(radix, 0);
58     divisor /= radix;
59     CHECK_GT(divisor, 0);
60 
61     // The divisor is always 1 for the last iteration.
62     digits.push_back(remainder / divisor);
63     remainder = remainder % divisor;
64   }
65   return digits;
66 }
67 
WithLogBacktrace(const Status & status)68 Status WithLogBacktrace(const Status& status) {
69   CHECK(!status.ok());
70   VLOG(1) << status.ToString();
71   VLOG(2) << tensorflow::CurrentStackTrace();
72   return status;
73 }
74 
ScopedLoggingTimer(absl::string_view label,bool enabled,const char * file,int line,TimerStats * timer_stats)75 ScopedLoggingTimer::ScopedLoggingTimer(absl::string_view label, bool enabled,
76                                        const char* file, int line,
77                                        TimerStats* timer_stats)
78     : label_(label),
79       file_(file),
80       line_(line),
81       timer_stats_(timer_stats),
82       enabled_(enabled) {
83   if (enabled_) {
84     start_micros_ = tensorflow::Env::Default()->NowMicros();
85   }
86 }
87 
StopAndLog()88 void ScopedLoggingTimer::StopAndLog() {
89   if (enabled_) {
90     uint64_t end_micros = tensorflow::Env::Default()->NowMicros();
91     double secs = (end_micros - start_micros_) / 1000000.0;
92 
93     TimerStats& stats = *timer_stats_;
94     absl::MutexLock lock(&stats.stats_mutex);
95     stats.cumulative_secs += secs;
96     if (secs > stats.max_secs) {
97       stats.max_secs = secs;
98     }
99     stats.times_called++;
100 
101     LOG(INFO).AtLocation(file_, line_)
102         << label_
103         << " time: " << tensorflow::strings::HumanReadableElapsedTime(secs)
104         << " (cumulative: "
105         << tensorflow::strings::HumanReadableElapsedTime(stats.cumulative_secs)
106         << ", max: "
107         << tensorflow::strings::HumanReadableElapsedTime(stats.max_secs)
108         << ", #called: " << stats.times_called << ")";
109     enabled_ = false;
110   }
111 }
112 
~ScopedLoggingTimer()113 ScopedLoggingTimer::~ScopedLoggingTimer() { StopAndLog(); }
114 
AddStatus(Status prior,absl::string_view context)115 Status AddStatus(Status prior, absl::string_view context) {
116   CHECK(!prior.ok());
117   return Status{prior.code(),
118                 absl::StrCat(context, ": ", prior.error_message())};
119 }
120 
AppendStatus(Status prior,absl::string_view context)121 Status AppendStatus(Status prior, absl::string_view context) {
122   CHECK(!prior.ok());
123   return Status{prior.code(),
124                 absl::StrCat(prior.error_message(), ": ", context)};
125 }
126 
Reindent(absl::string_view original,const absl::string_view indentation)127 std::string Reindent(absl::string_view original,
128                      const absl::string_view indentation) {
129   std::vector<std::string> pieces =
130       absl::StrSplit(absl::string_view(original.data(), original.size()), '\n');
131   return absl::StrJoin(
132       pieces, "\n", [indentation](std::string* out, absl::string_view s) {
133         absl::StrAppend(out, indentation, absl::StripAsciiWhitespace(s));
134       });
135 }
136 
137 template <typename FloatT>
RoundTripNanPayload(FloatT value,std::string * result)138 static void RoundTripNanPayload(FloatT value, std::string* result) {
139   const int kPayloadBits = NanPayloadBits<FloatT>();
140   if (std::isnan(value) && kPayloadBits > 0) {
141     auto rep = absl::bit_cast<
142         typename UnsignedIntegerTypeForSize<sizeof(FloatT)>::type>(value);
143     auto payload = rep & NanPayloadBitMask<FloatT>();
144     if (payload != QuietNanWithoutPayload<FloatT>()) {
145       absl::StrAppendFormat(result, "(0x%x)", payload);
146     }
147   }
148 }
149 
150 template <typename FloatT>
GenericRoundTripFpToString(FloatT value)151 static std::string GenericRoundTripFpToString(FloatT value) {
152   // TODO(majnemer): Remove this temporary variable once Eigen creates a symbol
153   // definition for `max_digits10`.
154   int max_decimal_digits = std::numeric_limits<FloatT>::max_digits10;
155   return absl::StrFormat("%.*g", max_decimal_digits,
156                          static_cast<double>(value));
157 }
158 
RoundTripFpToString(bfloat16 value)159 std::string RoundTripFpToString(bfloat16 value) {
160   std::string result = GenericRoundTripFpToString(value);
161   RoundTripNanPayload(value, &result);
162   return result;
163 }
164 
RoundTripFpToString(half value)165 std::string RoundTripFpToString(half value) {
166   std::string result = GenericRoundTripFpToString(value);
167   RoundTripNanPayload(value, &result);
168   return result;
169 }
170 
RoundTripFpToString(float value)171 std::string RoundTripFpToString(float value) {
172   float parsed_result;
173   std::string result =
174       absl::StrFormat("%.*g", std::numeric_limits<float>::digits10, value);
175   if (!absl::SimpleAtof(result, &parsed_result) || parsed_result != value) {
176     result = GenericRoundTripFpToString(value);
177   }
178   RoundTripNanPayload(value, &result);
179   return result;
180 }
181 
RoundTripFpToString(double value)182 std::string RoundTripFpToString(double value) {
183   double parsed_result;
184   std::string result =
185       absl::StrFormat("%.*g", std::numeric_limits<double>::digits10, value);
186   if (!absl::SimpleAtod(result, &parsed_result) || parsed_result != value) {
187     result = GenericRoundTripFpToString(value);
188   }
189   RoundTripNanPayload(value, &result);
190   return result;
191 }
192 
MakeNoPaddingConfig(int64_t rank)193 PaddingConfig MakeNoPaddingConfig(int64_t rank) {
194   PaddingConfig padding_config;
195   for (int64_t dnum = 0; dnum < rank; ++dnum) {
196     auto dimension = padding_config.add_dimensions();
197     dimension->set_edge_padding_low(0);
198     dimension->set_edge_padding_high(0);
199     dimension->set_interior_padding(0);
200   }
201   return padding_config;
202 }
203 
MakeEdgePaddingConfig(absl::Span<const std::pair<int64_t,int64_t>> padding)204 PaddingConfig MakeEdgePaddingConfig(
205     absl::Span<const std::pair<int64_t, int64_t>> padding) {
206   PaddingConfig padding_config;
207   for (const std::pair<int64_t, int64_t>& dim : padding) {
208     auto dimension = padding_config.add_dimensions();
209     dimension->set_edge_padding_low(dim.first);
210     dimension->set_edge_padding_high(dim.second);
211     dimension->set_interior_padding(0);
212   }
213   return padding_config;
214 }
215 
HasInteriorPadding(const PaddingConfig & config)216 bool HasInteriorPadding(const PaddingConfig& config) {
217   for (const auto& dim : config.dimensions()) {
218     if (dim.interior_padding() != 0) {
219       return true;
220     }
221   }
222   return false;
223 }
224 
225 namespace {
HumanReadableNumOps(double flops,double nanoseconds,absl::string_view op_prefix)226 std::string HumanReadableNumOps(double flops, double nanoseconds,
227                                 absl::string_view op_prefix) {
228   if (nanoseconds == 0) {
229     return absl::StrCat("NaN ", op_prefix, "OP/s");
230   }
231   double nano_flops = flops / nanoseconds;
232   std::string throughput = tensorflow::strings::HumanReadableNum(
233       static_cast<int64_t>(nano_flops * 1e9));
234   absl::string_view sp(throughput);
235   // Use the more common "G(FLOPS)", rather than "B(FLOPS)"
236   if (absl::EndsWith(sp, "B") ||  // Ends in 'B', ignoring case
237       absl::EndsWith(sp, "b")) {
238     *throughput.rbegin() = 'G';
239   }
240   throughput += absl::StrCat(op_prefix, "OP/s");
241   return throughput;
242 }
243 }  // namespace
244 
HumanReadableNumFlops(double flops,double nanoseconds)245 std::string HumanReadableNumFlops(double flops, double nanoseconds) {
246   return HumanReadableNumOps(flops, nanoseconds, "FL");
247 }
248 
HumanReadableNumTranscendentalOps(double trops,double nanoseconds)249 std::string HumanReadableNumTranscendentalOps(double trops,
250                                               double nanoseconds) {
251   return HumanReadableNumOps(trops, nanoseconds, "TR");
252 }
253 
LogLines(int sev,absl::string_view text,const char * fname,int lineno)254 void LogLines(int sev, absl::string_view text, const char* fname, int lineno) {
255   const int orig_sev = sev;
256   if (sev == tensorflow::FATAL) {
257     sev = tensorflow::ERROR;
258   }
259 
260   // Protect calls with a mutex so we don't interleave calls to LogLines from
261   // multiple threads.
262   static absl::Mutex log_lines_mu(absl::kConstInit);
263   absl::MutexLock lock(&log_lines_mu);
264 
265   size_t cur = 0;
266   while (cur < text.size()) {
267     size_t eol = text.find('\n', cur);
268     if (eol == absl::string_view::npos) {
269       eol = text.size();
270     }
271     auto msg = text.substr(cur, eol - cur);
272     tensorflow::internal::LogString(fname, lineno, sev,
273                                     std::string(msg.data(), msg.size()));
274     cur = eol + 1;
275   }
276 
277   if (orig_sev == tensorflow::FATAL) {
278     tensorflow::internal::LogString(fname, lineno, orig_sev,
279                                     "Aborting due to errors.");
280   }
281 }
282 
Product(absl::Span<const int64_t> xs)283 int64_t Product(absl::Span<const int64_t> xs) {
284   return std::accumulate(xs.begin(), xs.end(), static_cast<int64_t>(1),
285                          std::multiplies<int64_t>());
286 }
287 
CommonFactors(absl::Span<const int64_t> a,absl::Span<const int64_t> b)288 absl::InlinedVector<std::pair<int64_t, int64_t>, 8> CommonFactors(
289     absl::Span<const int64_t> a, absl::Span<const int64_t> b) {
290   CHECK_EQ(Product(a), Product(b));
291   absl::InlinedVector<std::pair<int64_t, int64_t>, 8> bounds;
292   if (absl::c_equal(a, b)) {
293     bounds.reserve(a.size() + 1);
294     for (int64_t i = 0; i <= a.size(); ++i) {
295       bounds.emplace_back(i, i);
296     }
297     return bounds;
298   }
299   int64_t i = 0, j = 0, prior_i = -1, prior_j = -1;
300   while (i < a.size() && j < b.size() && a[i] == b[j]) {
301     std::tie(prior_i, prior_j) = std::make_pair(i, j);
302     bounds.emplace_back(i, j);
303     ++i;
304     ++j;
305   }
306   // If the product is different after filtering out zeros, return full group.
307   // E.g.,:
308   // a={0, 10 ,3}
309   //       ^
310   //      i=1
311   //
312   // b={0, 3}
313   //       ^
314   //      j=1
315   if (Product(a.subspan(i)) != Product(b.subspan(j))) {
316     return {std::make_pair(0, 0), std::make_pair(a.size(), b.size())};
317   }
318   if (0 == Product(a.subspan(i))) {
319     bounds.push_back(std::make_pair(i, j));
320     bounds.push_back(std::make_pair(a.size(), b.size()));
321     return bounds;
322   }
323 
324   for (int64_t partial_size_a = 1, partial_size_b = 1;;) {
325     if (partial_size_a == partial_size_b && (i > prior_i || j > prior_j)) {
326       std::tie(prior_i, prior_j) = std::make_pair(i, j);
327       bounds.emplace_back(i, j);
328       continue;
329     }
330     bool in_bounds_i = i < a.size();
331     bool in_bounds_j = j < b.size();
332     if (!(in_bounds_i || in_bounds_j)) {
333       break;
334     }
335     bool next_a =
336         partial_size_a < partial_size_b ||
337         (in_bounds_i &&
338          (!in_bounds_j || (partial_size_a == partial_size_b && a[i] <= b[j])));
339     bool next_b =
340         partial_size_b < partial_size_a ||
341         (in_bounds_j &&
342          (!in_bounds_i || (partial_size_b == partial_size_a && b[j] <= a[i])));
343     if (next_a) {
344       partial_size_a *= a[i];
345       ++i;
346     }
347     if (next_b) {
348       partial_size_b *= b[j];
349       ++j;
350     }
351   }
352   return bounds;
353 }
354 
ConvertDimensionNumbers(absl::Span<const int64_t> from_dimensions,absl::Span<const int64_t> from_sizes,absl::Span<const int64_t> to_sizes)355 ConvertedDimensionNumbers ConvertDimensionNumbers(
356     absl::Span<const int64_t> from_dimensions,
357     absl::Span<const int64_t> from_sizes, absl::Span<const int64_t> to_sizes) {
358   ConvertedDimensionNumbers dimensions;
359   auto common_factors = CommonFactors(from_sizes, to_sizes);
360   for (int64_t i = 0; i < common_factors.size() - 1; ++i) {
361     bool any_present = false;
362     bool all_present = true;
363     for (int64_t d = common_factors[i].first; d < common_factors[i + 1].first;
364          ++d) {
365       const bool present = absl::c_linear_search(from_dimensions, d);
366       any_present |= present;
367       all_present &= present;
368     }
369     if (all_present) {
370       for (int64_t d = common_factors[i].second;
371            d < common_factors[i + 1].second; ++d) {
372         dimensions.to_dimensions.push_back(d);
373       }
374       for (int64_t d = common_factors[i].first; d < common_factors[i + 1].first;
375            ++d) {
376         dimensions.transformed_from_dimensions.push_back(d);
377       }
378     } else if (any_present) {
379       // Try to find if there is a to dimension that is like (from) [2,32] ->
380       // (to) [4,4,4] to detect that from dimensoin 1 can be partially mapped
381       // into dimension 1 and 2 of the to sizes with a partial size of 2.
382       if (common_factors[i].first + 2 == common_factors[i + 1].first &&
383           absl::c_linear_search(from_dimensions, common_factors[i].first + 1)) {
384         int64_t from_size = from_sizes[common_factors[i + 1].first - 1];
385         bool has_to_dim = false;
386         for (int64_t to_dim = common_factors[i + 1].second - 1;
387              to_dim >= common_factors[i].second; --to_dim) {
388           const int64_t to_size = to_sizes[to_dim];
389           if (from_size % to_size == 0) {
390             has_to_dim = true;
391             from_size /= to_size;
392             dimensions.to_dimensions.push_back(to_dim);
393           } else {
394             break;
395           }
396         }
397         if (has_to_dim) {
398           dimensions.split_from_sizes.push_back(from_size);
399           dimensions.split_from_dimensions.push_back(common_factors[i].first +
400                                                      1);
401         }
402       }
403       for (int64_t d = common_factors[i].first; d < common_factors[i + 1].first;
404            ++d) {
405         if (absl::c_linear_search(from_dimensions, d)) {
406           dimensions.untransformed_from_dimensions.push_back(d);
407         }
408       }
409     }
410   }
411   absl::c_sort(dimensions.to_dimensions);
412   return dimensions;
413 }
SanitizeFileName(std::string file_name)414 std::string SanitizeFileName(std::string file_name) {
415   for (char& c : file_name) {
416     if (c == '/' || c == '\\' || c == '[' || c == ']' || c == ' ') {
417       c = '_';
418     }
419   }
420   return file_name;
421 }
422 
423 // Utility function to split a double-precision float (F64) into a pair of F32s.
424 // For a p-bit number, and a splitting point (p/2) <= s <= (p - 1), the
425 // algorithm produces a (p - s)-bit value 'hi' and a non-overlapping (s - 1)-bit
426 // value 'lo'. See Theorem 4 in [1] (attributed to Dekker) or [2] for the
427 // original theorem by Dekker.
428 //
429 // For double-precision F64s, which contain a 53 bit mantissa (52 of them
430 // explicit), we can represent the most significant 49 digits as the unevaluated
431 // sum of two single-precision floats 'hi' and 'lo'. The 'hi' float stores the
432 // most significant 24 bits and the sign bit of 'lo' together with its mantissa
433 // store the remaining 25 bits. The exponent of the resulting representation is
434 // still restricted to 8 bits of F32.
435 //
436 // References:
437 // [1] A. Thall, Extended-Precision Floating-Point Numbers for GPU Computation,
438 //     SIGGRAPH Research Posters, 2006.
439 //     (http://andrewthall.org/papers/df64_qf128.pdf)
440 // [2] T. J. Dekker, A floating point technique for extending the available
441 //     precision, Numerische Mathematik, vol. 18, pp. 224–242, 1971.
SplitF64ToF32(double x)442 std::pair<float, float> SplitF64ToF32(double x) {
443   const float x_f32 = static_cast<float>(x);
444 
445   // Early return if x is an infinity or NaN.
446   if (!std::isfinite(x_f32)) {
447     // Only values within the range of F32 are supported, unless it is infinity.
448     // Small values with large negative exponents would be rounded to zero.
449     if (std::isfinite(x)) {
450       LOG(WARNING) << "Out of range F64 constant detected: " << x;
451     }
452     return std::make_pair(x_f32, 0.0f);
453   }
454 
455   // The high float is simply the double rounded to the nearest float. Because
456   // we are rounding to nearest with ties to even, the error introduced in
457   // rounding is less than half an ULP in the high ULP.
458   const float hi = x_f32;
459   // We can compute the low term using Sterbenz' lemma: If a and b are two
460   // positive floating point numbers and a/2 ≤ b ≤ 2a, then their difference can
461   // be computed exactly.
462   // Note: the difference is computed exactly but is rounded to the nearest
463   // float which will introduce additional error.
464   const float lo = static_cast<float>(x - static_cast<double>(hi));
465   return std::make_pair(hi, lo);
466 }
467 
468 }  // namespace xla
469