• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #include "tensorflow/compiler/xla/util.h"
17 
18 #include <stdarg.h>
19 
20 #include <cmath>
21 #include <limits>
22 #include <numeric>
23 
24 #include "absl/algorithm/container.h"
25 #include "absl/container/flat_hash_map.h"
26 #include "absl/container/inlined_vector.h"
27 #include "absl/strings/match.h"
28 #include "absl/strings/str_cat.h"
29 #include "absl/strings/str_format.h"
30 #include "absl/strings/str_join.h"
31 #include "absl/strings/str_split.h"
32 #include "absl/types/optional.h"
33 #include "tensorflow/compiler/xla/types.h"
34 #include "tensorflow/core/lib/core/errors.h"
35 #include "tensorflow/core/lib/math/math_util.h"
36 #include "tensorflow/core/lib/strings/numbers.h"
37 #include "tensorflow/core/platform/bfloat16.h"
38 #include "tensorflow/core/platform/env.h"
39 #include "tensorflow/core/platform/mutex.h"
40 #include "tensorflow/core/platform/numbers.h"
41 #include "tensorflow/core/platform/stacktrace.h"
42 
43 namespace xla {
44 
WithLogBacktrace(const Status & status)45 Status WithLogBacktrace(const Status& status) {
46   CHECK(!status.ok());
47   VLOG(1) << status.ToString();
48   VLOG(2) << tensorflow::CurrentStackTrace();
49   return status;
50 }
51 
ScopedLoggingTimer(const std::string & label,bool enabled,const char * file,int line,TimerStats * timer_stats)52 ScopedLoggingTimer::ScopedLoggingTimer(const std::string& label, bool enabled,
53                                        const char* file, int line,
54                                        TimerStats* timer_stats)
55     : enabled_(enabled),
56       file_(file),
57       line_(line),
58       label_(label),
59       timer_stats_(timer_stats) {
60   if (enabled_) {
61     start_micros_ = tensorflow::Env::Default()->NowMicros();
62   }
63 }
64 
StopAndLog()65 void ScopedLoggingTimer::StopAndLog() {
66   if (enabled_) {
67     uint64 end_micros = tensorflow::Env::Default()->NowMicros();
68     double secs = (end_micros - start_micros_) / 1000000.0;
69 
70     TimerStats& stats = *timer_stats_;
71     tensorflow::mutex_lock lock(stats.stats_mutex);
72     stats.cumulative_secs += secs;
73     if (secs > stats.max_secs) {
74       stats.max_secs = secs;
75     }
76     stats.times_called++;
77 
78     LOG(INFO).AtLocation(file_, line_)
79         << label_
80         << " time: " << tensorflow::strings::HumanReadableElapsedTime(secs)
81         << " (cumulative: "
82         << tensorflow::strings::HumanReadableElapsedTime(stats.cumulative_secs)
83         << ", max: "
84         << tensorflow::strings::HumanReadableElapsedTime(stats.max_secs)
85         << ", #called: " << stats.times_called << ")";
86     enabled_ = false;
87   }
88 }
89 
~ScopedLoggingTimer()90 ScopedLoggingTimer::~ScopedLoggingTimer() { StopAndLog(); }
91 
AddStatus(Status prior,absl::string_view context)92 Status AddStatus(Status prior, absl::string_view context) {
93   CHECK(!prior.ok());
94   return Status{prior.code(),
95                 absl::StrCat(context, ": ", prior.error_message())};
96 }
97 
AppendStatus(Status prior,absl::string_view context)98 Status AppendStatus(Status prior, absl::string_view context) {
99   CHECK(!prior.ok());
100   return Status{prior.code(),
101                 absl::StrCat(prior.error_message(), ": ", context)};
102 }
103 
Reindent(absl::string_view original,const absl::string_view indentation)104 string Reindent(absl::string_view original,
105                 const absl::string_view indentation) {
106   std::vector<string> pieces =
107       absl::StrSplit(absl::string_view(original.data(), original.size()), '\n');
108   return absl::StrJoin(pieces, "\n", [indentation](string* out, string s) {
109     absl::StrAppend(out, indentation, absl::StripAsciiWhitespace(s));
110   });
111 }
112 
RoundTripFpToString(tensorflow::bfloat16 value)113 string RoundTripFpToString(tensorflow::bfloat16 value) {
114   return absl::StrFormat("%.4g", static_cast<float>(value));
115 }
116 
RoundTripFpToString(Eigen::half value)117 string RoundTripFpToString(Eigen::half value) {
118   return absl::StrFormat("%.5g", static_cast<float>(value));
119 }
120 
RoundTripFpToString(float value)121 string RoundTripFpToString(float value) {
122   char buffer[tensorflow::strings::kFastToBufferSize];
123   tensorflow::strings::FloatToBuffer(value, buffer);
124   return buffer;
125 }
126 
RoundTripFpToString(double value)127 string RoundTripFpToString(double value) {
128   char buffer[tensorflow::strings::kFastToBufferSize];
129   tensorflow::strings::DoubleToBuffer(value, buffer);
130   return buffer;
131 }
132 
MakeNoPaddingConfig(int64 rank)133 PaddingConfig MakeNoPaddingConfig(int64 rank) {
134   PaddingConfig padding_config;
135   for (int64 dnum = 0; dnum < rank; ++dnum) {
136     auto dimension = padding_config.add_dimensions();
137     dimension->set_edge_padding_low(0);
138     dimension->set_edge_padding_high(0);
139     dimension->set_interior_padding(0);
140   }
141   return padding_config;
142 }
143 
MakeEdgePaddingConfig(absl::Span<const std::pair<int64,int64>> padding)144 PaddingConfig MakeEdgePaddingConfig(
145     absl::Span<const std::pair<int64, int64>> padding) {
146   PaddingConfig padding_config;
147   for (const std::pair<int64, int64>& dim : padding) {
148     auto dimension = padding_config.add_dimensions();
149     dimension->set_edge_padding_low(dim.first);
150     dimension->set_edge_padding_high(dim.second);
151     dimension->set_interior_padding(0);
152   }
153   return padding_config;
154 }
155 
HasInteriorPadding(const PaddingConfig & config)156 bool HasInteriorPadding(const PaddingConfig& config) {
157   for (const auto& dim : config.dimensions()) {
158     if (dim.interior_padding() != 0) {
159       return true;
160     }
161   }
162   return false;
163 }
164 
165 namespace {
HumanReadableNumOps(double flops,double nanoseconds,absl::string_view op_prefix)166 string HumanReadableNumOps(double flops, double nanoseconds,
167                            absl::string_view op_prefix) {
168   if (nanoseconds == 0) {
169     return absl::StrCat("NaN ", op_prefix, "OP/s");
170   }
171   double nano_flops = flops / nanoseconds;
172   string throughput = tensorflow::strings::HumanReadableNum(
173       static_cast<int64>(nano_flops * 1e9));
174   absl::string_view sp(throughput);
175   // Use the more common "G(FLOPS)", rather than "B(FLOPS)"
176   if (absl::EndsWith(sp, "B") ||  // Ends in 'B', ignoring case
177       absl::EndsWith(sp, "b")) {
178     *throughput.rbegin() = 'G';
179   }
180   throughput += absl::StrCat(op_prefix, "OP/s");
181   return throughput;
182 }
183 }  // namespace
184 
HumanReadableNumFlops(double flops,double nanoseconds)185 string HumanReadableNumFlops(double flops, double nanoseconds) {
186   return HumanReadableNumOps(flops, nanoseconds, "FL");
187 }
188 
HumanReadableNumTranscendentalOps(double trops,double nanoseconds)189 string HumanReadableNumTranscendentalOps(double trops, double nanoseconds) {
190   return HumanReadableNumOps(trops, nanoseconds, "TR");
191 }
192 
LogLines(int sev,absl::string_view text,const char * fname,int lineno)193 void LogLines(int sev, absl::string_view text, const char* fname, int lineno) {
194   const int orig_sev = sev;
195   if (sev == tensorflow::FATAL) {
196     sev = tensorflow::ERROR;
197   }
198 
199   // Protect calls with a mutex so we don't interleave calls to LogLines from
200   // multiple threads.
201   static tensorflow::mutex log_lines_mu(tensorflow::LINKER_INITIALIZED);
202   tensorflow::mutex_lock lock(log_lines_mu);
203 
204   size_t cur = 0;
205   while (cur < text.size()) {
206     size_t eol = text.find('\n', cur);
207     if (eol == absl::string_view::npos) {
208       eol = text.size();
209     }
210     auto msg = text.substr(cur, eol - cur);
211     tensorflow::internal::LogString(fname, lineno, sev,
212                                     string(msg.data(), msg.size()));
213     cur = eol + 1;
214   }
215 
216   if (orig_sev == tensorflow::FATAL) {
217     tensorflow::internal::LogString(fname, lineno, orig_sev,
218                                     "Aborting due to errors.");
219   }
220 }
221 
Product(absl::Span<const int64> xs)222 int64 Product(absl::Span<const int64> xs) {
223   return std::accumulate(xs.begin(), xs.end(), static_cast<int64>(1),
224                          std::multiplies<int64>());
225 }
226 
CommonFactors(absl::Span<const int64> a,absl::Span<const int64> b)227 absl::InlinedVector<std::pair<int64, int64>, 8> CommonFactors(
228     absl::Span<const int64> a, absl::Span<const int64> b) {
229   CHECK_EQ(Product(a), Product(b));
230   absl::InlinedVector<std::pair<int64, int64>, 8> bounds;
231   if (absl::c_equal(a, b)) {
232     bounds.reserve(a.size() + 1);
233     for (int64 i = 0; i <= a.size(); ++i) {
234       bounds.emplace_back(i, i);
235     }
236     return bounds;
237   }
238   if (0 == Product(a)) {
239     return {std::make_pair(0, 0), std::make_pair(a.size(), b.size())};
240   }
241 
242   for (int64 i = 0, j = 0, prior_i = -1, prior_j = -1, partial_size_a = 1,
243              partial_size_b = 1;
244        ;) {
245     if (partial_size_a == partial_size_b && (i > prior_i || j > prior_j)) {
246       std::tie(prior_i, prior_j) = std::make_pair(i, j);
247       bounds.emplace_back(i, j);
248       continue;
249     }
250     bool in_bounds_i = i < a.size();
251     bool in_bounds_j = j < b.size();
252     if (!(in_bounds_i || in_bounds_j)) {
253       break;
254     }
255     bool next_a =
256         partial_size_a < partial_size_b ||
257         (in_bounds_i &&
258          (!in_bounds_j || (partial_size_a == partial_size_b && a[i] <= b[j])));
259     bool next_b =
260         partial_size_b < partial_size_a ||
261         (in_bounds_j &&
262          (!in_bounds_i || (partial_size_b == partial_size_a && b[j] <= a[i])));
263     if (next_a) {
264       partial_size_a *= a[i];
265       ++i;
266     }
267     if (next_b) {
268       partial_size_b *= b[j];
269       ++j;
270     }
271   }
272   return bounds;
273 }
274 
ConvertDimensionNumbers(absl::Span<const int64> from_dimensions,absl::Span<const int64> from_sizes,absl::Span<const int64> to_sizes)275 ConvertedDimensionNumbers ConvertDimensionNumbers(
276     absl::Span<const int64> from_dimensions, absl::Span<const int64> from_sizes,
277     absl::Span<const int64> to_sizes) {
278   ConvertedDimensionNumbers dimensions;
279   auto common_factors = CommonFactors(from_sizes, to_sizes);
280   for (int64 i = 0; i < common_factors.size() - 1; ++i) {
281     bool any_present = false;
282     bool all_present = true;
283     for (int64 d = common_factors[i].first; d < common_factors[i + 1].first;
284          ++d) {
285       const bool present = absl::c_linear_search(from_dimensions, d);
286       any_present |= present;
287       all_present &= present;
288     }
289     if (all_present) {
290       for (int64 d = common_factors[i].second; d < common_factors[i + 1].second;
291            ++d) {
292         dimensions.to_dimensions.push_back(d);
293       }
294       for (int64 d = common_factors[i].first; d < common_factors[i + 1].first;
295            ++d) {
296         dimensions.transformed_from_dimensions.push_back(d);
297       }
298     } else if (any_present) {
299       for (int64 d = common_factors[i].first; d < common_factors[i + 1].first;
300            ++d) {
301         if (absl::c_linear_search(from_dimensions, d)) {
302           dimensions.untransformed_from_dimensions.push_back(d);
303         }
304       }
305     }
306   }
307   return dimensions;
308 }
SanitizeFileName(string file_name)309 string SanitizeFileName(string file_name) {
310   for (char& c : file_name) {
311     if (c == '/' || c == '\\' || c == '[' || c == ']' || c == ' ') {
312       c = '_';
313     }
314   }
315   return file_name;
316 }
317 
318 // Utility function to split a double-precision float (F64) into a pair of F32s.
319 // For a p-bit number, and a splitting point (p/2) <= s <= (p - 1), the
320 // algorithm produces a (p - s)-bit value 'hi' and a non-overlapping (s - 1)-bit
321 // value 'lo'. See Theorem 4 in [1] (attributed to Dekker) or [2] for the
322 // original theorem by Dekker.
323 //
324 // For double-precision F64s, which contain a 53 bit mantissa (52 of them
325 // explicit), we can represent the most significant 49 digits as the unevaluated
326 // sum of two single-precision floats 'hi' and 'lo'. The 'hi' float stores the
327 // most significant 24 bits and the sign bit of 'lo' together with its mantissa
328 // store the remaining 25 bits. The exponent of the resulting representation is
329 // still restricted to 8 bits of F32.
330 //
331 // References:
332 // [1] A. Thall, Extended-Precision Floating-Point Numbers for GPU Computation,
333 //     SIGGRAPH Research Posters, 2006.
334 //     (http://andrewthall.org/papers/df64_qf128.pdf)
335 // [2] T. J. Dekker, A floating point technique for extending the available
336 //     precision, Numerische Mathematik, vol. 18, pp. 224–242, 1971.
SplitF64ToF32(double x)337 std::pair<float, float> SplitF64ToF32(double x) {
338   const float x_f32 = static_cast<float>(x);
339 
340   // Early return if x is an infinity or NaN.
341   if (!std::isfinite(x_f32)) {
342     // Only values within the range of F32 are supported, unless it is infinity.
343     // Small values with large negative exponents would be rounded to zero.
344     if (std::isfinite(x)) {
345       LOG(WARNING) << "Out of range F64 constant detected: " << x;
346     }
347     return std::make_pair(x_f32, 0.0f);
348   }
349 
350   // The high float is simply the double rounded to the nearest float. Because
351   // we are rounding to nearest with ties to even, the error introduced in
352   // rounding is less than half an ULP in the high ULP.
353   const float hi = x_f32;
354   // We can compute the low term using Sterbenz' lemma: If a and b are two
355   // positive floating point numbers and a/2 ≤ b ≤ 2a, then their difference can
356   // be computed exactly.
357   // Note: the difference is computed exactly but is rounded to the nearest
358   // float which will introduce additional error.
359   const float lo = static_cast<float>(x - static_cast<double>(hi));
360   return std::make_pair(hi, lo);
361 }
362 
363 }  // namespace xla
364