1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #include "tensorflow/compiler/xla/util.h"
17
18 #include <stdarg.h>
19
20 #include <cmath>
21 #include <limits>
22 #include <numeric>
23 #include <string>
24
25 #include "absl/algorithm/container.h"
26 #include "absl/base/casts.h"
27 #include "absl/container/flat_hash_map.h"
28 #include "absl/container/inlined_vector.h"
29 #include "absl/strings/match.h"
30 #include "absl/strings/str_cat.h"
31 #include "absl/strings/str_format.h"
32 #include "absl/strings/str_join.h"
33 #include "absl/strings/str_split.h"
34 #include "absl/types/optional.h"
35 #include "tensorflow/compiler/xla/types.h"
36 #include "tensorflow/core/lib/core/errors.h"
37 #include "tensorflow/core/lib/math/math_util.h"
38 #include "tensorflow/core/lib/strings/numbers.h"
39 #include "tensorflow/core/platform/bfloat16.h"
40 #include "tensorflow/core/platform/env.h"
41 #include "tensorflow/core/platform/mutex.h"
42 #include "tensorflow/core/platform/numbers.h"
43 #include "tensorflow/core/platform/stacktrace.h"
44
45 namespace xla {
46
WithLogBacktrace(const Status & status)47 Status WithLogBacktrace(const Status& status) {
48 CHECK(!status.ok());
49 VLOG(1) << status.ToString();
50 VLOG(2) << tensorflow::CurrentStackTrace();
51 return status;
52 }
53
ScopedLoggingTimer(const std::string & label,bool enabled,const char * file,int line,TimerStats * timer_stats)54 ScopedLoggingTimer::ScopedLoggingTimer(const std::string& label, bool enabled,
55 const char* file, int line,
56 TimerStats* timer_stats)
57 : enabled_(enabled),
58 file_(file),
59 line_(line),
60 label_(label),
61 timer_stats_(timer_stats) {
62 if (enabled_) {
63 start_micros_ = tensorflow::Env::Default()->NowMicros();
64 }
65 }
66
StopAndLog()67 void ScopedLoggingTimer::StopAndLog() {
68 if (enabled_) {
69 uint64 end_micros = tensorflow::Env::Default()->NowMicros();
70 double secs = (end_micros - start_micros_) / 1000000.0;
71
72 TimerStats& stats = *timer_stats_;
73 tensorflow::mutex_lock lock(stats.stats_mutex);
74 stats.cumulative_secs += secs;
75 if (secs > stats.max_secs) {
76 stats.max_secs = secs;
77 }
78 stats.times_called++;
79
80 LOG(INFO).AtLocation(file_, line_)
81 << label_
82 << " time: " << tensorflow::strings::HumanReadableElapsedTime(secs)
83 << " (cumulative: "
84 << tensorflow::strings::HumanReadableElapsedTime(stats.cumulative_secs)
85 << ", max: "
86 << tensorflow::strings::HumanReadableElapsedTime(stats.max_secs)
87 << ", #called: " << stats.times_called << ")";
88 enabled_ = false;
89 }
90 }
91
~ScopedLoggingTimer()92 ScopedLoggingTimer::~ScopedLoggingTimer() { StopAndLog(); }
93
AddStatus(Status prior,absl::string_view context)94 Status AddStatus(Status prior, absl::string_view context) {
95 CHECK(!prior.ok());
96 return Status{prior.code(),
97 absl::StrCat(context, ": ", prior.error_message())};
98 }
99
AppendStatus(Status prior,absl::string_view context)100 Status AppendStatus(Status prior, absl::string_view context) {
101 CHECK(!prior.ok());
102 return Status{prior.code(),
103 absl::StrCat(prior.error_message(), ": ", context)};
104 }
105
Reindent(absl::string_view original,const absl::string_view indentation)106 string Reindent(absl::string_view original,
107 const absl::string_view indentation) {
108 std::vector<string> pieces =
109 absl::StrSplit(absl::string_view(original.data(), original.size()), '\n');
110 return absl::StrJoin(pieces, "\n", [indentation](string* out, string s) {
111 absl::StrAppend(out, indentation, absl::StripAsciiWhitespace(s));
112 });
113 }
114
115 template <typename IntT, typename FloatT>
RoundTripNanPayload(FloatT value,std::string * result)116 static void RoundTripNanPayload(FloatT value, std::string* result) {
117 const int kPayloadBits = NanPayloadBits<FloatT>();
118 if (std::isnan(value) && kPayloadBits > 0) {
119 auto rep = absl::bit_cast<IntT>(value);
120 auto payload = rep & NanPayloadBitMask<FloatT>();
121 if (payload != QuietNanWithoutPayload<FloatT>()) {
122 absl::StrAppendFormat(result, "(0x%x)", payload);
123 }
124 }
125 }
126
RoundTripFpToString(tensorflow::bfloat16 value)127 string RoundTripFpToString(tensorflow::bfloat16 value) {
128 std::string result = absl::StrFormat("%.4g", static_cast<float>(value));
129 RoundTripNanPayload<uint16_t>(value, &result);
130 return result;
131 }
132
RoundTripFpToString(Eigen::half value)133 string RoundTripFpToString(Eigen::half value) {
134 std::string result = absl::StrFormat("%.5g", static_cast<float>(value));
135 RoundTripNanPayload<uint16_t>(value, &result);
136 return result;
137 }
138
RoundTripFpToString(float value)139 string RoundTripFpToString(float value) {
140 char buffer[tensorflow::strings::kFastToBufferSize];
141 tensorflow::strings::FloatToBuffer(value, buffer);
142 std::string result = buffer;
143 RoundTripNanPayload<uint32_t>(value, &result);
144 return result;
145 }
146
RoundTripFpToString(double value)147 string RoundTripFpToString(double value) {
148 char buffer[tensorflow::strings::kFastToBufferSize];
149 tensorflow::strings::DoubleToBuffer(value, buffer);
150 std::string result = buffer;
151 RoundTripNanPayload<uint64_t>(value, &result);
152 return result;
153 }
154
MakeNoPaddingConfig(int64_t rank)155 PaddingConfig MakeNoPaddingConfig(int64_t rank) {
156 PaddingConfig padding_config;
157 for (int64_t dnum = 0; dnum < rank; ++dnum) {
158 auto dimension = padding_config.add_dimensions();
159 dimension->set_edge_padding_low(0);
160 dimension->set_edge_padding_high(0);
161 dimension->set_interior_padding(0);
162 }
163 return padding_config;
164 }
165
MakeEdgePaddingConfig(absl::Span<const std::pair<int64,int64>> padding)166 PaddingConfig MakeEdgePaddingConfig(
167 absl::Span<const std::pair<int64, int64>> padding) {
168 PaddingConfig padding_config;
169 for (const std::pair<int64, int64>& dim : padding) {
170 auto dimension = padding_config.add_dimensions();
171 dimension->set_edge_padding_low(dim.first);
172 dimension->set_edge_padding_high(dim.second);
173 dimension->set_interior_padding(0);
174 }
175 return padding_config;
176 }
177
HasInteriorPadding(const PaddingConfig & config)178 bool HasInteriorPadding(const PaddingConfig& config) {
179 for (const auto& dim : config.dimensions()) {
180 if (dim.interior_padding() != 0) {
181 return true;
182 }
183 }
184 return false;
185 }
186
187 namespace {
HumanReadableNumOps(double flops,double nanoseconds,absl::string_view op_prefix)188 string HumanReadableNumOps(double flops, double nanoseconds,
189 absl::string_view op_prefix) {
190 if (nanoseconds == 0) {
191 return absl::StrCat("NaN ", op_prefix, "OP/s");
192 }
193 double nano_flops = flops / nanoseconds;
194 string throughput = tensorflow::strings::HumanReadableNum(
195 static_cast<int64>(nano_flops * 1e9));
196 absl::string_view sp(throughput);
197 // Use the more common "G(FLOPS)", rather than "B(FLOPS)"
198 if (absl::EndsWith(sp, "B") || // Ends in 'B', ignoring case
199 absl::EndsWith(sp, "b")) {
200 *throughput.rbegin() = 'G';
201 }
202 throughput += absl::StrCat(op_prefix, "OP/s");
203 return throughput;
204 }
205 } // namespace
206
HumanReadableNumFlops(double flops,double nanoseconds)207 string HumanReadableNumFlops(double flops, double nanoseconds) {
208 return HumanReadableNumOps(flops, nanoseconds, "FL");
209 }
210
HumanReadableNumTranscendentalOps(double trops,double nanoseconds)211 string HumanReadableNumTranscendentalOps(double trops, double nanoseconds) {
212 return HumanReadableNumOps(trops, nanoseconds, "TR");
213 }
214
LogLines(int sev,absl::string_view text,const char * fname,int lineno)215 void LogLines(int sev, absl::string_view text, const char* fname, int lineno) {
216 const int orig_sev = sev;
217 if (sev == tensorflow::FATAL) {
218 sev = tensorflow::ERROR;
219 }
220
221 // Protect calls with a mutex so we don't interleave calls to LogLines from
222 // multiple threads.
223 static tensorflow::mutex log_lines_mu(tensorflow::LINKER_INITIALIZED);
224 tensorflow::mutex_lock lock(log_lines_mu);
225
226 size_t cur = 0;
227 while (cur < text.size()) {
228 size_t eol = text.find('\n', cur);
229 if (eol == absl::string_view::npos) {
230 eol = text.size();
231 }
232 auto msg = text.substr(cur, eol - cur);
233 tensorflow::internal::LogString(fname, lineno, sev,
234 string(msg.data(), msg.size()));
235 cur = eol + 1;
236 }
237
238 if (orig_sev == tensorflow::FATAL) {
239 tensorflow::internal::LogString(fname, lineno, orig_sev,
240 "Aborting due to errors.");
241 }
242 }
243
Product(absl::Span<const int64> xs)244 int64 Product(absl::Span<const int64> xs) {
245 return std::accumulate(xs.begin(), xs.end(), static_cast<int64>(1),
246 std::multiplies<int64>());
247 }
248
CommonFactors(absl::Span<const int64> a,absl::Span<const int64> b)249 absl::InlinedVector<std::pair<int64, int64>, 8> CommonFactors(
250 absl::Span<const int64> a, absl::Span<const int64> b) {
251 CHECK_EQ(Product(a), Product(b));
252 absl::InlinedVector<std::pair<int64, int64>, 8> bounds;
253 if (absl::c_equal(a, b)) {
254 bounds.reserve(a.size() + 1);
255 for (int64_t i = 0; i <= a.size(); ++i) {
256 bounds.emplace_back(i, i);
257 }
258 return bounds;
259 }
260 int64_t i = 0, j = 0, prior_i = -1, prior_j = -1;
261 while (i < a.size() && j < b.size() && a[i] == b[j]) {
262 std::tie(prior_i, prior_j) = std::make_pair(i, j);
263 bounds.emplace_back(i, j);
264 ++i;
265 ++j;
266 }
267 // If the product is different after filtering out zeros, return full group.
268 // E.g.,:
269 // a={0, 10 ,3}
270 // ^
271 // i=1
272 //
273 // b={0, 3}
274 // ^
275 // j=1
276 if (Product(a.subspan(i)) != Product(b.subspan(j))) {
277 return {std::make_pair(0, 0), std::make_pair(a.size(), b.size())};
278 }
279 if (0 == Product(a.subspan(i))) {
280 bounds.push_back(std::make_pair(i, j));
281 bounds.push_back(std::make_pair(a.size(), b.size()));
282 return bounds;
283 }
284
285 for (int64_t partial_size_a = 1, partial_size_b = 1;;) {
286 if (partial_size_a == partial_size_b && (i > prior_i || j > prior_j)) {
287 std::tie(prior_i, prior_j) = std::make_pair(i, j);
288 bounds.emplace_back(i, j);
289 continue;
290 }
291 if (partial_size_a == partial_size_b && (i > prior_i || j > prior_j)) {
292 std::tie(prior_i, prior_j) = std::make_pair(i, j);
293 bounds.emplace_back(i, j);
294 continue;
295 }
296 bool in_bounds_i = i < a.size();
297 bool in_bounds_j = j < b.size();
298 if (!(in_bounds_i || in_bounds_j)) {
299 break;
300 }
301 bool next_a =
302 partial_size_a < partial_size_b ||
303 (in_bounds_i &&
304 (!in_bounds_j || (partial_size_a == partial_size_b && a[i] <= b[j])));
305 bool next_b =
306 partial_size_b < partial_size_a ||
307 (in_bounds_j &&
308 (!in_bounds_i || (partial_size_b == partial_size_a && b[j] <= a[i])));
309 if (next_a) {
310 partial_size_a *= a[i];
311 ++i;
312 }
313 if (next_b) {
314 partial_size_b *= b[j];
315 ++j;
316 }
317 }
318 return bounds;
319 }
320
ConvertDimensionNumbers(absl::Span<const int64> from_dimensions,absl::Span<const int64> from_sizes,absl::Span<const int64> to_sizes)321 ConvertedDimensionNumbers ConvertDimensionNumbers(
322 absl::Span<const int64> from_dimensions, absl::Span<const int64> from_sizes,
323 absl::Span<const int64> to_sizes) {
324 ConvertedDimensionNumbers dimensions;
325 auto common_factors = CommonFactors(from_sizes, to_sizes);
326 for (int64_t i = 0; i < common_factors.size() - 1; ++i) {
327 bool any_present = false;
328 bool all_present = true;
329 for (int64_t d = common_factors[i].first; d < common_factors[i + 1].first;
330 ++d) {
331 const bool present = absl::c_linear_search(from_dimensions, d);
332 any_present |= present;
333 all_present &= present;
334 }
335 if (all_present) {
336 for (int64_t d = common_factors[i].second;
337 d < common_factors[i + 1].second; ++d) {
338 dimensions.to_dimensions.push_back(d);
339 }
340 for (int64_t d = common_factors[i].first; d < common_factors[i + 1].first;
341 ++d) {
342 dimensions.transformed_from_dimensions.push_back(d);
343 }
344 } else if (any_present) {
345 for (int64_t d = common_factors[i].first; d < common_factors[i + 1].first;
346 ++d) {
347 if (absl::c_linear_search(from_dimensions, d)) {
348 dimensions.untransformed_from_dimensions.push_back(d);
349 }
350 }
351 }
352 }
353 return dimensions;
354 }
SanitizeFileName(string file_name)355 string SanitizeFileName(string file_name) {
356 for (char& c : file_name) {
357 if (c == '/' || c == '\\' || c == '[' || c == ']' || c == ' ') {
358 c = '_';
359 }
360 }
361 return file_name;
362 }
363
364 // Utility function to split a double-precision float (F64) into a pair of F32s.
365 // For a p-bit number, and a splitting point (p/2) <= s <= (p - 1), the
366 // algorithm produces a (p - s)-bit value 'hi' and a non-overlapping (s - 1)-bit
367 // value 'lo'. See Theorem 4 in [1] (attributed to Dekker) or [2] for the
368 // original theorem by Dekker.
369 //
370 // For double-precision F64s, which contain a 53 bit mantissa (52 of them
371 // explicit), we can represent the most significant 49 digits as the unevaluated
372 // sum of two single-precision floats 'hi' and 'lo'. The 'hi' float stores the
373 // most significant 24 bits and the sign bit of 'lo' together with its mantissa
374 // store the remaining 25 bits. The exponent of the resulting representation is
375 // still restricted to 8 bits of F32.
376 //
377 // References:
378 // [1] A. Thall, Extended-Precision Floating-Point Numbers for GPU Computation,
379 // SIGGRAPH Research Posters, 2006.
380 // (http://andrewthall.org/papers/df64_qf128.pdf)
381 // [2] T. J. Dekker, A floating point technique for extending the available
382 // precision, Numerische Mathematik, vol. 18, pp. 224–242, 1971.
SplitF64ToF32(double x)383 std::pair<float, float> SplitF64ToF32(double x) {
384 const float x_f32 = static_cast<float>(x);
385
386 // Early return if x is an infinity or NaN.
387 if (!std::isfinite(x_f32)) {
388 // Only values within the range of F32 are supported, unless it is infinity.
389 // Small values with large negative exponents would be rounded to zero.
390 if (std::isfinite(x)) {
391 LOG(WARNING) << "Out of range F64 constant detected: " << x;
392 }
393 return std::make_pair(x_f32, 0.0f);
394 }
395
396 // The high float is simply the double rounded to the nearest float. Because
397 // we are rounding to nearest with ties to even, the error introduced in
398 // rounding is less than half an ULP in the high ULP.
399 const float hi = x_f32;
400 // We can compute the low term using Sterbenz' lemma: If a and b are two
401 // positive floating point numbers and a/2 ≤ b ≤ 2a, then their difference can
402 // be computed exactly.
403 // Note: the difference is computed exactly but is rounded to the nearest
404 // float which will introduce additional error.
405 const float lo = static_cast<float>(x - static_cast<double>(hi));
406 return std::make_pair(hi, lo);
407 }
408
409 } // namespace xla
410