• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #include "tensorflow/compiler/xla/util.h"
17 
18 #include <stdarg.h>
19 
20 #include <cmath>
21 #include <limits>
22 #include <numeric>
23 
24 #include "absl/container/flat_hash_map.h"
25 #include "absl/container/inlined_vector.h"
26 #include "absl/strings/match.h"
27 #include "absl/strings/str_cat.h"
28 #include "absl/strings/str_format.h"
29 #include "absl/strings/str_join.h"
30 #include "absl/strings/str_split.h"
31 #include "tensorflow/compiler/xla/types.h"
32 #include "tensorflow/core/lib/bfloat16/bfloat16.h"
33 #include "tensorflow/core/lib/core/errors.h"
34 #include "tensorflow/core/lib/math/math_util.h"
35 #include "tensorflow/core/lib/strings/numbers.h"
36 #include "tensorflow/core/platform/env.h"
37 #include "tensorflow/core/platform/mutex.h"
38 #include "tensorflow/core/platform/numbers.h"
39 #include "tensorflow/core/platform/stacktrace.h"
40 
41 namespace xla {
42 
WithLogBacktrace(const Status & status)43 Status WithLogBacktrace(const Status& status) {
44   CHECK(!status.ok());
45   VLOG(1) << status.ToString();
46   VLOG(2) << tensorflow::CurrentStackTrace();
47   return status;
48 }
49 
ScopedLoggingTimer(const std::string & label,bool enabled,const char * file,int line,TimerStats * timer_stats)50 ScopedLoggingTimer::ScopedLoggingTimer(const std::string& label, bool enabled,
51                                        const char* file, int line,
52                                        TimerStats* timer_stats)
53     : enabled_(enabled),
54       file_(file),
55       line_(line),
56       label_(label),
57       timer_stats_(timer_stats) {
58   if (enabled_) {
59     start_micros_ = tensorflow::Env::Default()->NowMicros();
60   }
61 }
62 
StopAndLog()63 void ScopedLoggingTimer::StopAndLog() {
64   if (enabled_) {
65     uint64 end_micros = tensorflow::Env::Default()->NowMicros();
66     double secs = (end_micros - start_micros_) / 1000000.0;
67 
68     TimerStats& stats = *timer_stats_;
69     tensorflow::mutex_lock lock(stats.stats_mutex);
70     stats.cumulative_secs += secs;
71     if (secs > stats.max_secs) {
72       stats.max_secs = secs;
73     }
74     stats.times_called++;
75 
76     LOG(INFO).AtLocation(file_, line_)
77         << label_
78         << " time: " << tensorflow::strings::HumanReadableElapsedTime(secs)
79         << " (cumulative: "
80         << tensorflow::strings::HumanReadableElapsedTime(stats.cumulative_secs)
81         << ", max: "
82         << tensorflow::strings::HumanReadableElapsedTime(stats.max_secs)
83         << ", #called: " << stats.times_called << ")";
84     enabled_ = false;
85   }
86 }
87 
~ScopedLoggingTimer()88 ScopedLoggingTimer::~ScopedLoggingTimer() { StopAndLog(); }
89 
AddStatus(Status prior,absl::string_view context)90 Status AddStatus(Status prior, absl::string_view context) {
91   CHECK(!prior.ok());
92   return Status{prior.code(),
93                 absl::StrCat(context, ": ", prior.error_message())};
94 }
95 
AppendStatus(Status prior,absl::string_view context)96 Status AppendStatus(Status prior, absl::string_view context) {
97   CHECK(!prior.ok());
98   return Status{prior.code(),
99                 absl::StrCat(prior.error_message(), ": ", context)};
100 }
101 
Reindent(absl::string_view original,const absl::string_view indentation)102 string Reindent(absl::string_view original,
103                 const absl::string_view indentation) {
104   std::vector<string> pieces =
105       absl::StrSplit(absl::string_view(original.data(), original.size()), '\n');
106   return absl::StrJoin(pieces, "\n", [indentation](string* out, string s) {
107     absl::StrAppend(out, indentation, absl::StripAsciiWhitespace(s));
108   });
109 }
110 
IsPermutation(absl::Span<const int64> permutation,int64 rank)111 bool IsPermutation(absl::Span<const int64> permutation, int64 rank) {
112   if (rank != permutation.size()) {
113     return false;
114   }
115   absl::InlinedVector<int64, 8> trivial_permutation(rank);
116   absl::c_iota(trivial_permutation, 0);
117   return absl::c_is_permutation(permutation, trivial_permutation);
118 }
119 
InversePermutation(absl::Span<const int64> input_permutation)120 std::vector<int64> InversePermutation(
121     absl::Span<const int64> input_permutation) {
122   DCHECK(IsPermutation(input_permutation, input_permutation.size()));
123   std::vector<int64> output_permutation(input_permutation.size(), -1);
124   for (size_t i = 0; i < input_permutation.size(); ++i) {
125     output_permutation.at(input_permutation.at(i)) = i;
126   }
127   return output_permutation;
128 }
129 
ComposePermutations(absl::Span<const int64> p1,absl::Span<const int64> p2)130 std::vector<int64> ComposePermutations(absl::Span<const int64> p1,
131                                        absl::Span<const int64> p2) {
132   CHECK_EQ(p1.size(), p2.size());
133   std::vector<int64> output;
134   for (size_t i = 0; i < p1.size(); ++i) {
135     output.push_back(p1.at(p2.at(i)));
136   }
137   return output;
138 }
139 
IsIdentityPermutation(absl::Span<const int64> permutation)140 bool IsIdentityPermutation(absl::Span<const int64> permutation) {
141   for (int64 i = 0; i < permutation.size(); ++i) {
142     if (permutation[i] != i) {
143       return false;
144     }
145   }
146   return true;
147 }
148 
RoundTripFpToString(tensorflow::bfloat16 value)149 string RoundTripFpToString(tensorflow::bfloat16 value) {
150   return absl::StrFormat("%.4g", static_cast<float>(value));
151 }
152 
RoundTripFpToString(Eigen::half value)153 string RoundTripFpToString(Eigen::half value) {
154   return absl::StrFormat("%.5g", static_cast<float>(value));
155 }
156 
RoundTripFpToString(float value)157 string RoundTripFpToString(float value) {
158   char buffer[tensorflow::strings::kFastToBufferSize];
159   tensorflow::strings::FloatToBuffer(value, buffer);
160   return buffer;
161 }
162 
RoundTripFpToString(double value)163 string RoundTripFpToString(double value) {
164   char buffer[tensorflow::strings::kFastToBufferSize];
165   tensorflow::strings::DoubleToBuffer(value, buffer);
166   return buffer;
167 }
168 
MakeNoPaddingConfig(int64 rank)169 PaddingConfig MakeNoPaddingConfig(int64 rank) {
170   PaddingConfig padding_config;
171   for (int64 dnum = 0; dnum < rank; ++dnum) {
172     auto dimension = padding_config.add_dimensions();
173     dimension->set_edge_padding_low(0);
174     dimension->set_edge_padding_high(0);
175     dimension->set_interior_padding(0);
176   }
177   return padding_config;
178 }
179 
MakeEdgePaddingConfig(absl::Span<const std::pair<int64,int64>> padding)180 PaddingConfig MakeEdgePaddingConfig(
181     absl::Span<const std::pair<int64, int64>> padding) {
182   PaddingConfig padding_config;
183   for (const std::pair<int64, int64>& dim : padding) {
184     auto dimension = padding_config.add_dimensions();
185     dimension->set_edge_padding_low(dim.first);
186     dimension->set_edge_padding_high(dim.second);
187     dimension->set_interior_padding(0);
188   }
189   return padding_config;
190 }
191 
HasInteriorPadding(const PaddingConfig & config)192 bool HasInteriorPadding(const PaddingConfig& config) {
193   for (const auto& dim : config.dimensions()) {
194     if (dim.interior_padding() != 0) {
195       return true;
196     }
197   }
198   return false;
199 }
200 
201 namespace {
HumanReadableNumOps(double flops,double nanoseconds,absl::string_view op_prefix)202 string HumanReadableNumOps(double flops, double nanoseconds,
203                            absl::string_view op_prefix) {
204   if (nanoseconds == 0) {
205     return absl::StrCat("NaN ", op_prefix, "OP/s");
206   }
207   double nano_flops = flops / nanoseconds;
208   string throughput = tensorflow::strings::HumanReadableNum(
209       static_cast<int64>(nano_flops * 1e9));
210   absl::string_view sp(throughput);
211   // Use the more common "G(FLOPS)", rather than "B(FLOPS)"
212   if (absl::EndsWith(sp, "B") ||  // Ends in 'B', ignoring case
213       absl::EndsWith(sp, "b")) {
214     *throughput.rbegin() = 'G';
215   }
216   throughput += absl::StrCat(op_prefix, "OP/s");
217   return throughput;
218 }
219 }  // namespace
220 
HumanReadableNumFlops(double flops,double nanoseconds)221 string HumanReadableNumFlops(double flops, double nanoseconds) {
222   return HumanReadableNumOps(flops, nanoseconds, "FL");
223 }
224 
HumanReadableNumTranscendentalOps(double trops,double nanoseconds)225 string HumanReadableNumTranscendentalOps(double trops, double nanoseconds) {
226   return HumanReadableNumOps(trops, nanoseconds, "TR");
227 }
228 
LogLines(int sev,absl::string_view text,const char * fname,int lineno)229 void LogLines(int sev, absl::string_view text, const char* fname, int lineno) {
230   const int orig_sev = sev;
231   if (sev == tensorflow::FATAL) {
232     sev = tensorflow::ERROR;
233   }
234 
235   // Protect calls with a mutex so we don't interleave calls to LogLines from
236   // multiple threads.
237   static tensorflow::mutex log_lines_mu(tensorflow::LINKER_INITIALIZED);
238   tensorflow::mutex_lock lock(log_lines_mu);
239 
240   size_t cur = 0;
241   while (cur < text.size()) {
242     size_t eol = text.find('\n', cur);
243     if (eol == absl::string_view::npos) {
244       eol = text.size();
245     }
246     auto msg = text.substr(cur, eol - cur);
247     tensorflow::internal::LogString(fname, lineno, sev,
248                                     string(msg.data(), msg.size()));
249     cur = eol + 1;
250   }
251 
252   if (orig_sev == tensorflow::FATAL) {
253     tensorflow::internal::LogString(fname, lineno, orig_sev,
254                                     "Aborting due to errors.");
255   }
256 }
257 
Product(absl::Span<const int64> xs)258 int64 Product(absl::Span<const int64> xs) {
259   return std::accumulate(xs.begin(), xs.end(), static_cast<int64>(1),
260                          std::multiplies<int64>());
261 }
262 
CommonFactors(absl::Span<const int64> a,absl::Span<const int64> b)263 absl::InlinedVector<std::pair<int64, int64>, 8> CommonFactors(
264     absl::Span<const int64> a, absl::Span<const int64> b) {
265   CHECK_EQ(Product(a), Product(b));
266   if (0 == Product(a)) {
267     return {std::make_pair(0, 0), std::make_pair(a.size(), b.size())};
268   }
269 
270   absl::InlinedVector<std::pair<int64, int64>, 8> bounds;
271   for (int64 i = 0, j = 0, prior_i = -1, prior_j = -1, partial_size_a = 1,
272              partial_size_b = 1;
273        ;) {
274     if (partial_size_a == partial_size_b && (i > prior_i || j > prior_j)) {
275       std::tie(prior_i, prior_j) = std::make_pair(i, j);
276       bounds.emplace_back(i, j);
277       continue;
278     }
279     bool in_bounds_i = i < a.size();
280     bool in_bounds_j = j < b.size();
281     if (!(in_bounds_i || in_bounds_j)) {
282       break;
283     }
284     bool next_a =
285         partial_size_a < partial_size_b ||
286         (in_bounds_i &&
287          (!in_bounds_j || (partial_size_a == partial_size_b && a[i] <= b[j])));
288     bool next_b =
289         partial_size_b < partial_size_a ||
290         (in_bounds_j &&
291          (!in_bounds_i || (partial_size_b == partial_size_a && b[j] <= a[i])));
292     if (next_a) {
293       partial_size_a *= a[i];
294       ++i;
295     }
296     if (next_b) {
297       partial_size_b *= b[j];
298       ++j;
299     }
300   }
301   return bounds;
302 }
303 
SanitizeFileName(string file_name)304 string SanitizeFileName(string file_name) {
305   for (char& c : file_name) {
306     if (c == '/' || c == '\\' || c == '[' || c == ']' || c == ' ') {
307       c = '_';
308     }
309   }
310   return file_name;
311 }
312 
313 // Utility function to split a double-precision float (F64) into a pair of F32s.
314 // For a p-bit number, and a splitting point (p/2) <= s <= (p - 1), the
315 // algorithm produces a (p - s)-bit value 'hi' and a non-overlapping (s - 1)-bit
316 // value 'lo'. See Theorem 4 in [1] (attributed to Dekker) or [2] for the
317 // original theorem by Dekker.
318 //
319 // For double-precision F64s, which contain a 53 bit mantissa (52 of them
320 // explicit), we can represent the most significant 49 digits as the unevaluated
321 // sum of two single-precision floats 'hi' and 'lo'. The 'hi' float stores the
322 // most significant 24 bits and the sign bit of 'lo' together with its mantissa
323 // store the remaining 25 bits. The exponent of the resulting representation is
324 // still restricted to 8 bits of F32.
325 //
326 // References:
327 // [1] A. Thall, Extended-Precision Floating-Point Numbers for GPU Computation,
328 //     SIGGRAPH Research Posters, 2006.
329 //     (http://andrewthall.org/papers/df64_qf128.pdf)
330 // [2] T. J. Dekker, A floating point technique for extending the available
331 //     precision, Numerische Mathematik, vol. 18, pp. 224–242, 1971.
SplitF64ToF32(double x)332 std::pair<float, float> SplitF64ToF32(double x) {
333   const float x_f32 = static_cast<float>(x);
334   // Early return if x is an infinity or NaN.
335   if (!std::isfinite(x)) {
336     return std::make_pair(x_f32, 0.0f);
337   }
338 
339   // Only values within the range of F32 are supported, unless it is infinity.
340   // Small values with large negative exponents would be rounded to zero.
341   CHECK(std::isfinite(x_f32)) << x;
342 
343   // The high float is simply the double rounded to the nearest float. Because
344   // we are rounding to nearest with ties to even, the error introduced in
345   // rounding is less than half an ULP in the high ULP.
346   const float hi = x_f32;
347   // We can compute the low term using Sterbenz' lemma: If a and b are two
348   // positive floating point numbers and a/2 ≤ b ≤ 2a, then their difference can
349   // be computed exactly.
350   // Note: the difference is computed exactly but is rounded to the nearest
351   // float which will introduce additional error.
352   const float lo = static_cast<float>(x - static_cast<double>(hi));
353   return std::make_pair(hi, lo);
354 }
355 
356 }  // namespace xla
357