• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #include "tensorflow/compiler/xla/util.h"
17 
18 #include <stdarg.h>
19 
20 #include <cmath>
21 #include <limits>
22 #include <numeric>
23 #include <string>
24 
25 #include "absl/algorithm/container.h"
26 #include "absl/base/casts.h"
27 #include "absl/container/flat_hash_map.h"
28 #include "absl/container/inlined_vector.h"
29 #include "absl/strings/match.h"
30 #include "absl/strings/str_cat.h"
31 #include "absl/strings/str_format.h"
32 #include "absl/strings/str_join.h"
33 #include "absl/strings/str_split.h"
34 #include "absl/types/optional.h"
35 #include "tensorflow/compiler/xla/types.h"
36 #include "tensorflow/core/lib/core/errors.h"
37 #include "tensorflow/core/lib/math/math_util.h"
38 #include "tensorflow/core/lib/strings/numbers.h"
39 #include "tensorflow/core/platform/bfloat16.h"
40 #include "tensorflow/core/platform/env.h"
41 #include "tensorflow/core/platform/mutex.h"
42 #include "tensorflow/core/platform/numbers.h"
43 #include "tensorflow/core/platform/stacktrace.h"
44 
45 namespace xla {
46 
WithLogBacktrace(const Status & status)47 Status WithLogBacktrace(const Status& status) {
48   CHECK(!status.ok());
49   VLOG(1) << status.ToString();
50   VLOG(2) << tensorflow::CurrentStackTrace();
51   return status;
52 }
53 
ScopedLoggingTimer(const std::string & label,bool enabled,const char * file,int line,TimerStats * timer_stats)54 ScopedLoggingTimer::ScopedLoggingTimer(const std::string& label, bool enabled,
55                                        const char* file, int line,
56                                        TimerStats* timer_stats)
57     : enabled_(enabled),
58       file_(file),
59       line_(line),
60       label_(label),
61       timer_stats_(timer_stats) {
62   if (enabled_) {
63     start_micros_ = tensorflow::Env::Default()->NowMicros();
64   }
65 }
66 
StopAndLog()67 void ScopedLoggingTimer::StopAndLog() {
68   if (enabled_) {
69     uint64 end_micros = tensorflow::Env::Default()->NowMicros();
70     double secs = (end_micros - start_micros_) / 1000000.0;
71 
72     TimerStats& stats = *timer_stats_;
73     tensorflow::mutex_lock lock(stats.stats_mutex);
74     stats.cumulative_secs += secs;
75     if (secs > stats.max_secs) {
76       stats.max_secs = secs;
77     }
78     stats.times_called++;
79 
80     LOG(INFO).AtLocation(file_, line_)
81         << label_
82         << " time: " << tensorflow::strings::HumanReadableElapsedTime(secs)
83         << " (cumulative: "
84         << tensorflow::strings::HumanReadableElapsedTime(stats.cumulative_secs)
85         << ", max: "
86         << tensorflow::strings::HumanReadableElapsedTime(stats.max_secs)
87         << ", #called: " << stats.times_called << ")";
88     enabled_ = false;
89   }
90 }
91 
~ScopedLoggingTimer()92 ScopedLoggingTimer::~ScopedLoggingTimer() { StopAndLog(); }
93 
AddStatus(Status prior,absl::string_view context)94 Status AddStatus(Status prior, absl::string_view context) {
95   CHECK(!prior.ok());
96   return Status{prior.code(),
97                 absl::StrCat(context, ": ", prior.error_message())};
98 }
99 
AppendStatus(Status prior,absl::string_view context)100 Status AppendStatus(Status prior, absl::string_view context) {
101   CHECK(!prior.ok());
102   return Status{prior.code(),
103                 absl::StrCat(prior.error_message(), ": ", context)};
104 }
105 
Reindent(absl::string_view original,const absl::string_view indentation)106 string Reindent(absl::string_view original,
107                 const absl::string_view indentation) {
108   std::vector<string> pieces =
109       absl::StrSplit(absl::string_view(original.data(), original.size()), '\n');
110   return absl::StrJoin(pieces, "\n", [indentation](string* out, string s) {
111     absl::StrAppend(out, indentation, absl::StripAsciiWhitespace(s));
112   });
113 }
114 
115 template <typename IntT, typename FloatT>
RoundTripNanPayload(FloatT value,std::string * result)116 static void RoundTripNanPayload(FloatT value, std::string* result) {
117   const int kPayloadBits = NanPayloadBits<FloatT>();
118   if (std::isnan(value) && kPayloadBits > 0) {
119     auto rep = absl::bit_cast<IntT>(value);
120     auto payload = rep & NanPayloadBitMask<FloatT>();
121     if (payload != QuietNanWithoutPayload<FloatT>()) {
122       absl::StrAppendFormat(result, "(0x%x)", payload);
123     }
124   }
125 }
126 
RoundTripFpToString(tensorflow::bfloat16 value)127 string RoundTripFpToString(tensorflow::bfloat16 value) {
128   std::string result = absl::StrFormat("%.4g", static_cast<float>(value));
129   RoundTripNanPayload<uint16_t>(value, &result);
130   return result;
131 }
132 
RoundTripFpToString(Eigen::half value)133 string RoundTripFpToString(Eigen::half value) {
134   std::string result = absl::StrFormat("%.5g", static_cast<float>(value));
135   RoundTripNanPayload<uint16_t>(value, &result);
136   return result;
137 }
138 
RoundTripFpToString(float value)139 string RoundTripFpToString(float value) {
140   char buffer[tensorflow::strings::kFastToBufferSize];
141   tensorflow::strings::FloatToBuffer(value, buffer);
142   std::string result = buffer;
143   RoundTripNanPayload<uint32_t>(value, &result);
144   return result;
145 }
146 
RoundTripFpToString(double value)147 string RoundTripFpToString(double value) {
148   char buffer[tensorflow::strings::kFastToBufferSize];
149   tensorflow::strings::DoubleToBuffer(value, buffer);
150   std::string result = buffer;
151   RoundTripNanPayload<uint64_t>(value, &result);
152   return result;
153 }
154 
MakeNoPaddingConfig(int64_t rank)155 PaddingConfig MakeNoPaddingConfig(int64_t rank) {
156   PaddingConfig padding_config;
157   for (int64_t dnum = 0; dnum < rank; ++dnum) {
158     auto dimension = padding_config.add_dimensions();
159     dimension->set_edge_padding_low(0);
160     dimension->set_edge_padding_high(0);
161     dimension->set_interior_padding(0);
162   }
163   return padding_config;
164 }
165 
MakeEdgePaddingConfig(absl::Span<const std::pair<int64,int64>> padding)166 PaddingConfig MakeEdgePaddingConfig(
167     absl::Span<const std::pair<int64, int64>> padding) {
168   PaddingConfig padding_config;
169   for (const std::pair<int64, int64>& dim : padding) {
170     auto dimension = padding_config.add_dimensions();
171     dimension->set_edge_padding_low(dim.first);
172     dimension->set_edge_padding_high(dim.second);
173     dimension->set_interior_padding(0);
174   }
175   return padding_config;
176 }
177 
HasInteriorPadding(const PaddingConfig & config)178 bool HasInteriorPadding(const PaddingConfig& config) {
179   for (const auto& dim : config.dimensions()) {
180     if (dim.interior_padding() != 0) {
181       return true;
182     }
183   }
184   return false;
185 }
186 
187 namespace {
HumanReadableNumOps(double flops,double nanoseconds,absl::string_view op_prefix)188 string HumanReadableNumOps(double flops, double nanoseconds,
189                            absl::string_view op_prefix) {
190   if (nanoseconds == 0) {
191     return absl::StrCat("NaN ", op_prefix, "OP/s");
192   }
193   double nano_flops = flops / nanoseconds;
194   string throughput = tensorflow::strings::HumanReadableNum(
195       static_cast<int64>(nano_flops * 1e9));
196   absl::string_view sp(throughput);
197   // Use the more common "G(FLOPS)", rather than "B(FLOPS)"
198   if (absl::EndsWith(sp, "B") ||  // Ends in 'B', ignoring case
199       absl::EndsWith(sp, "b")) {
200     *throughput.rbegin() = 'G';
201   }
202   throughput += absl::StrCat(op_prefix, "OP/s");
203   return throughput;
204 }
205 }  // namespace
206 
HumanReadableNumFlops(double flops,double nanoseconds)207 string HumanReadableNumFlops(double flops, double nanoseconds) {
208   return HumanReadableNumOps(flops, nanoseconds, "FL");
209 }
210 
HumanReadableNumTranscendentalOps(double trops,double nanoseconds)211 string HumanReadableNumTranscendentalOps(double trops, double nanoseconds) {
212   return HumanReadableNumOps(trops, nanoseconds, "TR");
213 }
214 
LogLines(int sev,absl::string_view text,const char * fname,int lineno)215 void LogLines(int sev, absl::string_view text, const char* fname, int lineno) {
216   const int orig_sev = sev;
217   if (sev == tensorflow::FATAL) {
218     sev = tensorflow::ERROR;
219   }
220 
221   // Protect calls with a mutex so we don't interleave calls to LogLines from
222   // multiple threads.
223   static tensorflow::mutex log_lines_mu(tensorflow::LINKER_INITIALIZED);
224   tensorflow::mutex_lock lock(log_lines_mu);
225 
226   size_t cur = 0;
227   while (cur < text.size()) {
228     size_t eol = text.find('\n', cur);
229     if (eol == absl::string_view::npos) {
230       eol = text.size();
231     }
232     auto msg = text.substr(cur, eol - cur);
233     tensorflow::internal::LogString(fname, lineno, sev,
234                                     string(msg.data(), msg.size()));
235     cur = eol + 1;
236   }
237 
238   if (orig_sev == tensorflow::FATAL) {
239     tensorflow::internal::LogString(fname, lineno, orig_sev,
240                                     "Aborting due to errors.");
241   }
242 }
243 
Product(absl::Span<const int64> xs)244 int64 Product(absl::Span<const int64> xs) {
245   return std::accumulate(xs.begin(), xs.end(), static_cast<int64>(1),
246                          std::multiplies<int64>());
247 }
248 
CommonFactors(absl::Span<const int64> a,absl::Span<const int64> b)249 absl::InlinedVector<std::pair<int64, int64>, 8> CommonFactors(
250     absl::Span<const int64> a, absl::Span<const int64> b) {
251   CHECK_EQ(Product(a), Product(b));
252   absl::InlinedVector<std::pair<int64, int64>, 8> bounds;
253   if (absl::c_equal(a, b)) {
254     bounds.reserve(a.size() + 1);
255     for (int64_t i = 0; i <= a.size(); ++i) {
256       bounds.emplace_back(i, i);
257     }
258     return bounds;
259   }
260   int64_t i = 0, j = 0, prior_i = -1, prior_j = -1;
261   while (i < a.size() && j < b.size() && a[i] == b[j]) {
262     std::tie(prior_i, prior_j) = std::make_pair(i, j);
263     bounds.emplace_back(i, j);
264     ++i;
265     ++j;
266   }
267   // If the product is different after filtering out zeros, return full group.
268   // E.g.,:
269   // a={0, 10 ,3}
270   //       ^
271   //      i=1
272   //
273   // b={0, 3}
274   //       ^
275   //      j=1
276   if (Product(a.subspan(i)) != Product(b.subspan(j))) {
277     return {std::make_pair(0, 0), std::make_pair(a.size(), b.size())};
278   }
279   if (0 == Product(a.subspan(i))) {
280     bounds.push_back(std::make_pair(i, j));
281     bounds.push_back(std::make_pair(a.size(), b.size()));
282     return bounds;
283   }
284 
285   for (int64_t partial_size_a = 1, partial_size_b = 1;;) {
286     if (partial_size_a == partial_size_b && (i > prior_i || j > prior_j)) {
287       std::tie(prior_i, prior_j) = std::make_pair(i, j);
288       bounds.emplace_back(i, j);
289       continue;
290     }
291     if (partial_size_a == partial_size_b && (i > prior_i || j > prior_j)) {
292       std::tie(prior_i, prior_j) = std::make_pair(i, j);
293       bounds.emplace_back(i, j);
294       continue;
295     }
296     bool in_bounds_i = i < a.size();
297     bool in_bounds_j = j < b.size();
298     if (!(in_bounds_i || in_bounds_j)) {
299       break;
300     }
301     bool next_a =
302         partial_size_a < partial_size_b ||
303         (in_bounds_i &&
304          (!in_bounds_j || (partial_size_a == partial_size_b && a[i] <= b[j])));
305     bool next_b =
306         partial_size_b < partial_size_a ||
307         (in_bounds_j &&
308          (!in_bounds_i || (partial_size_b == partial_size_a && b[j] <= a[i])));
309     if (next_a) {
310       partial_size_a *= a[i];
311       ++i;
312     }
313     if (next_b) {
314       partial_size_b *= b[j];
315       ++j;
316     }
317   }
318   return bounds;
319 }
320 
ConvertDimensionNumbers(absl::Span<const int64> from_dimensions,absl::Span<const int64> from_sizes,absl::Span<const int64> to_sizes)321 ConvertedDimensionNumbers ConvertDimensionNumbers(
322     absl::Span<const int64> from_dimensions, absl::Span<const int64> from_sizes,
323     absl::Span<const int64> to_sizes) {
324   ConvertedDimensionNumbers dimensions;
325   auto common_factors = CommonFactors(from_sizes, to_sizes);
326   for (int64_t i = 0; i < common_factors.size() - 1; ++i) {
327     bool any_present = false;
328     bool all_present = true;
329     for (int64_t d = common_factors[i].first; d < common_factors[i + 1].first;
330          ++d) {
331       const bool present = absl::c_linear_search(from_dimensions, d);
332       any_present |= present;
333       all_present &= present;
334     }
335     if (all_present) {
336       for (int64_t d = common_factors[i].second;
337            d < common_factors[i + 1].second; ++d) {
338         dimensions.to_dimensions.push_back(d);
339       }
340       for (int64_t d = common_factors[i].first; d < common_factors[i + 1].first;
341            ++d) {
342         dimensions.transformed_from_dimensions.push_back(d);
343       }
344     } else if (any_present) {
345       for (int64_t d = common_factors[i].first; d < common_factors[i + 1].first;
346            ++d) {
347         if (absl::c_linear_search(from_dimensions, d)) {
348           dimensions.untransformed_from_dimensions.push_back(d);
349         }
350       }
351     }
352   }
353   return dimensions;
354 }
SanitizeFileName(string file_name)355 string SanitizeFileName(string file_name) {
356   for (char& c : file_name) {
357     if (c == '/' || c == '\\' || c == '[' || c == ']' || c == ' ') {
358       c = '_';
359     }
360   }
361   return file_name;
362 }
363 
364 // Utility function to split a double-precision float (F64) into a pair of F32s.
365 // For a p-bit number, and a splitting point (p/2) <= s <= (p - 1), the
366 // algorithm produces a (p - s)-bit value 'hi' and a non-overlapping (s - 1)-bit
367 // value 'lo'. See Theorem 4 in [1] (attributed to Dekker) or [2] for the
368 // original theorem by Dekker.
369 //
370 // For double-precision F64s, which contain a 53 bit mantissa (52 of them
371 // explicit), we can represent the most significant 49 digits as the unevaluated
372 // sum of two single-precision floats 'hi' and 'lo'. The 'hi' float stores the
373 // most significant 24 bits and the sign bit of 'lo' together with its mantissa
374 // store the remaining 25 bits. The exponent of the resulting representation is
375 // still restricted to 8 bits of F32.
376 //
377 // References:
378 // [1] A. Thall, Extended-Precision Floating-Point Numbers for GPU Computation,
379 //     SIGGRAPH Research Posters, 2006.
380 //     (http://andrewthall.org/papers/df64_qf128.pdf)
381 // [2] T. J. Dekker, A floating point technique for extending the available
382 //     precision, Numerische Mathematik, vol. 18, pp. 224–242, 1971.
SplitF64ToF32(double x)383 std::pair<float, float> SplitF64ToF32(double x) {
384   const float x_f32 = static_cast<float>(x);
385 
386   // Early return if x is an infinity or NaN.
387   if (!std::isfinite(x_f32)) {
388     // Only values within the range of F32 are supported, unless it is infinity.
389     // Small values with large negative exponents would be rounded to zero.
390     if (std::isfinite(x)) {
391       LOG(WARNING) << "Out of range F64 constant detected: " << x;
392     }
393     return std::make_pair(x_f32, 0.0f);
394   }
395 
396   // The high float is simply the double rounded to the nearest float. Because
397   // we are rounding to nearest with ties to even, the error introduced in
398   // rounding is less than half an ULP in the high ULP.
399   const float hi = x_f32;
400   // We can compute the low term using Sterbenz' lemma: If a and b are two
401   // positive floating point numbers and a/2 ≤ b ≤ 2a, then their difference can
402   // be computed exactly.
403   // Note: the difference is computed exactly but is rounded to the nearest
404   // float which will introduce additional error.
405   const float lo = static_cast<float>(x - static_cast<double>(hi));
406   return std::make_pair(hi, lo);
407 }
408 
409 }  // namespace xla
410