/external/tensorflow/tensorflow/compiler/xla/stream_executor/cuda/ |
D | cuda_fft.cc | 100 fft::Type type, int batch_count, ScratchAllocator *scratch_allocator) { in Initialize() argument 105 scratch_allocator_ = scratch_allocator; in Initialize() 125 if (scratch_allocator == nullptr) { in Initialize() 206 return UpdateScratchAllocator(stream, scratch_allocator); in Initialize() 210 if (scratch_allocator == nullptr) { in Initialize() 250 return UpdateScratchAllocator(stream, scratch_allocator); in Initialize() 259 ScratchAllocator *scratch_allocator) { in Initialize() argument 264 /*output_distance=*/0, type, 1, scratch_allocator); in Initialize() 268 Stream *stream, ScratchAllocator *scratch_allocator) { in UpdateScratchAllocator() argument 269 scratch_allocator_ = scratch_allocator; in UpdateScratchAllocator() [all …]
|
D | cuda_fft.h | 73 int batch_count, ScratchAllocator* scratch_allocator); 78 ScratchAllocator* scratch_allocator); 81 ScratchAllocator *scratch_allocator);
|
D | cuda_blas_lt.h | 160 ScratchAllocator& scratch_allocator, 196 algorithm, scratch_allocator, bias, profile_result); 205 ScratchAllocator& scratch_allocator,
|
D | cuda_blas.cc | 1193 ScratchAllocator *scratch_allocator) { in DoBlasInternalImpl() argument 1223 if (scratch_allocator != nullptr) { in DoBlasInternalImpl() 1225 scratch_allocator->AllocateBytes(size)); in DoBlasInternalImpl() 1227 scratch_allocator->AllocateBytes(size)); in DoBlasInternalImpl() 1229 scratch_allocator->AllocateBytes(size)); in DoBlasInternalImpl() 1330 ScratchAllocator *scratch_allocator) { in DoBlasInternalImpl() argument 1335 b_array, ldb, beta, c_array, ldc, batch_count, scratch_allocator); in DoBlasInternalImpl() 1347 ScratchAllocator *scratch_allocator) { in DoBlasInternalImpl() argument 1350 b_array, ldb, beta, c_array, ldc, batch_count, scratch_allocator); in DoBlasInternalImpl() 1363 ScratchAllocator *scratch_allocator) { in DoBlasInternalImpl() argument [all …]
|
/external/tensorflow/tensorflow/stream_executor/rocm/ |
D | rocm_fft.cc | 161 fft::Type type, int batch_count, ScratchAllocator *scratch_allocator) { in Initialize() argument 166 scratch_allocator_ = scratch_allocator; in Initialize() 181 if (scratch_allocator == nullptr) { in Initialize() 272 return UpdateScratchAllocator(stream, scratch_allocator); in Initialize() 276 if (scratch_allocator == nullptr) { in Initialize() 313 return UpdateScratchAllocator(stream, scratch_allocator); in Initialize() 322 ScratchAllocator *scratch_allocator) { in Initialize() argument 327 /*output_distance=*/0, type, 1, scratch_allocator); in Initialize() 331 Stream *stream, ScratchAllocator *scratch_allocator) { in UpdateScratchAllocator() argument 332 scratch_allocator_ = scratch_allocator; in UpdateScratchAllocator() [all …]
|
D | rocm_fft.h | 79 int batch_count, ScratchAllocator *scratch_allocator); 84 ScratchAllocator *scratch_allocator); 87 ScratchAllocator *scratch_allocator);
|
D | rocm_blas.cc | 712 int batch_count, uint64_t batch_stride, ScratchAllocator *scratch_allocator, in AllocateStridedBuffer() argument 743 if (scratch_allocator != nullptr) { in AllocateStridedBuffer() 746 scratch_allocator->AllocateBytes(matrix_batch_byte_size)); in AllocateStridedBuffer() 771 int ldc, int batch_count, ScratchAllocator *scratch_allocator) { in DoBlasGemmBatchedInternal() argument 816 a_raw_ptrs, batch_count, batch_stride_a, scratch_allocator, stream, in DoBlasGemmBatchedInternal() 825 b_raw_ptrs, batch_count, batch_stride_b, scratch_allocator, stream, in DoBlasGemmBatchedInternal() 834 c_raw_ptrs, batch_count, batch_stride_c, scratch_allocator, stream, in DoBlasGemmBatchedInternal() 865 int batch_count, ScratchAllocator *scratch_allocator) { in DoBlasGemmBatched() argument 873 scratch_allocator); in DoBlasGemmBatched() 887 int batch_count, ScratchAllocator *scratch_allocator) { in DoBlasGemmBatched() argument [all …]
|
D | rocm_blas.h | 142 ScratchAllocator *scratch_allocator, Stream *stream, 172 int ldc, int batch_count, ScratchAllocator *scratch_allocator);
|
D | rocm_dnn.h | 249 bool use_fallback, ScratchAllocator* scratch_allocator, 269 ScratchAllocator* scratch_allocator, 363 DeviceMemoryBase output_data, ScratchAllocator* scratch_allocator, 770 ScratchAllocator* scratch_allocator, dnn::AlgorithmDesc* algorithm_desc, 789 ScratchAllocator* scratch_allocator, DeviceMemory<uint8>* scratch_memory, 800 ScratchAllocator* scratch_allocator, 811 ScratchAllocator* scratch_allocator,
|
D | BUILD | 199 "//tensorflow/stream_executor:scratch_allocator", 234 "//tensorflow/stream_executor:scratch_allocator", 276 "//tensorflow/stream_executor:scratch_allocator",
|
/external/tensorflow/tensorflow/compiler/xla/stream_executor/ |
D | fft.h | 111 ScratchAllocator *scratch_allocator) = 0; 116 bool in_place_fft, ScratchAllocator *scratch_allocator) = 0; 121 bool in_place_fft, ScratchAllocator *scratch_allocator) = 0; 168 ScratchAllocator *scratch_allocator) = 0; 177 Stream *stream, Plan *plan, ScratchAllocator *scratch_allocator) = 0; 226 ScratchAllocator *scratch_allocator) override; \ 229 bool in_place_fft, ScratchAllocator *scratch_allocator) override; \ 232 fft::Type type, bool in_place_fft, ScratchAllocator *scratch_allocator) \ 243 bool in_place_fft, int batch_count, ScratchAllocator *scratch_allocator) \ 246 ScratchAllocator *scratch_allocator) \
|
D | blas.h | 434 int ldc, int batch_count, ScratchAllocator *scratch_allocator) = 0; 442 int ldc, int batch_count, ScratchAllocator *scratch_allocator) = 0; 451 int ldc, int batch_count, ScratchAllocator *scratch_allocator) = 0; 458 int ldc, int batch_count, ScratchAllocator *scratch_allocator) = 0; 466 int batch_count, ScratchAllocator *scratch_allocator) = 0; 699 ScratchAllocator *scratch_allocator) override; \ 706 ScratchAllocator *scratch_allocator) override; \ 713 ScratchAllocator *scratch_allocator) override; \ 721 int batch_count, ScratchAllocator *scratch_allocator) override; \ 729 int batch_count, ScratchAllocator *scratch_allocator) override; \
|
/external/tensorflow/tensorflow/core/kernels/ |
D | conv_ops_gpu.h | 133 AllocateScratchOrFallback(se::ScratchAllocator* scratch_allocator, in AllocateScratchOrFallback() argument 142 auto scratch_or = scratch_allocator->AllocateBytes(workspace_size); in AllocateScratchOrFallback() 162 DnnScratchAllocator* scratch_allocator, in LaunchAutotunedConv() argument 189 scratch_allocator, primary, no_scratch_fallback)); in LaunchAutotunedConv() 197 conv_desc, scratch_allocator, autotune_entry.GetAlgorithmConfig(), in LaunchAutotunedConv()
|
D | matmul_util.h | 92 se::ScratchAllocator& scratch_allocator, 104 algorithm, scratch_allocator, bias, 110 algorithm, scratch_allocator, bias, profile_result);
|
D | matmul_op_fused.cc | 204 BlasScratchAllocator scratch_allocator(context); in AutotuneMatmul() local 207 launch_func(scratch_allocator, profile_algorithm, &profile_result); in AutotuneMatmul() 302 auto launch_func = [&](BlasScratchAllocator& scratch_allocator, in operator ()() 306 scratch_allocator, bias_ptr, profile_result); in operator ()() 318 BlasScratchAllocator scratch_allocator(context); in operator ()() local 319 OP_REQUIRES_OK(context, launch_func(scratch_allocator, algorithm, nullptr)); in operator ()()
|
D | conv_ops_gpu.cc | 55 DnnScratchAllocator scratch_allocator(scratch_size_limit, ctx); in AutotuneConvImpl() local 59 : static_cast<se::ScratchAllocator*>(&scratch_allocator); in AutotuneConvImpl() 82 : scratch_allocator.TotalByteSize()); in AutotuneConvImpl() 384 DnnScratchAllocator scratch_allocator(scratch_size_limit, ctx); in AutotuneUnfusedConv() local 390 &scratch_allocator, &algorithms)) { in AutotuneUnfusedConv() 413 output_ptr, conv_desc, &scratch_allocator, in AutotuneUnfusedConv() 422 result.set_scratch_bytes(scratch_allocator.TotalByteSize()); in AutotuneUnfusedConv()
|
D | cudnn_pooling_gpu.cc | 108 DnnScratchAllocator scratch_allocator(PoolingScratchSize, context); in Compute() local 111 &output_data, &scratch_allocator)); in Compute() 244 DnnScratchAllocator scratch_allocator(PoolingScratchSize, context); in Compute() local 249 &input_backprop_data, &scratch_allocator)); in Compute()
|
D | matmul_op_impl.h | 457 BlasScratchAllocator scratch_allocator(context, max_scratch_size); 460 profile_algorithm, scratch_allocator, 491 BlasScratchAllocator scratch_allocator(context, max_scratch_size); 502 *c_ptrs[0], algorithm, scratch_allocator)); 519 BlasScratchAllocator scratch_allocator(context, max_scratch_size); 527 &scratch_allocator) 635 BlasScratchAllocator scratch_allocator(context); 643 &scratch_allocator)
|
/external/tensorflow/tensorflow/compiler/xla/service/gpu/ |
D | cublas_lt_matmul_thunk.cc | 61 se::OwningScratchAllocator<> scratch_allocator(allocs.device_ordinal(), in ExecuteOnStream() local 66 allocs.GetDeviceAddress(d_buffer_), bias, *algorithm_, scratch_allocator); in ExecuteOnStream()
|
D | gpu_conv_algorithm_picker.cc | 190 ScratchAllocator* scratch_allocator, se::Stream* stream) { in GetMIOpenAlgorithms() argument 208 params.config->conv_desc, /* use_fallback = */ false, scratch_allocator, in GetMIOpenAlgorithms() 489 se::RedzoneAllocator scratch_allocator( in AutotuneOneConvRunner() local 505 auto scratch_or = scratch_allocator.AllocateBytes(*workspace_size); in AutotuneOneConvRunner() 539 scratch_allocator.TotalAllocatedBytesExcludingRedzones(); in AutotuneOneConvRunner() 561 CheckRedzones(scratch_allocator, stream, "scratch", instr, &result)); in AutotuneOneConvRunner() 828 ScratchAllocator scratch_allocator(device_ordinal, allocator); in PickBestAlgorithmNoCacheRocm() local 833 stream_exec_, &scratch_allocator, stream)); in PickBestAlgorithmNoCacheRocm() 866 scratch_allocator.AllocateBytes(runner->GetWorkspaceSize())); in PickBestAlgorithmNoCacheRocm() 894 int64_t scratch_bytes_used = scratch_allocator.TotalAllocatedBytes(); in PickBestAlgorithmNoCacheRocm()
|
D | matmul_utils.cc | 693 se::ScratchAllocator& scratch_allocator, in DoMatmul() argument 713 output, algorithm, scratch_allocator, in DoMatmul() 722 se::ScratchAllocator& scratch_allocator, in ExecuteOnStream() argument 732 scratch_allocator, profile_result); in ExecuteOnStream() 736 algorithm, scratch_allocator, profile_result); in ExecuteOnStream() 739 bias_buffer, algorithm, scratch_allocator, in ExecuteOnStream() 743 bias_buffer, algorithm, scratch_allocator, in ExecuteOnStream() 747 bias_buffer, algorithm, scratch_allocator, in ExecuteOnStream() 752 scratch_allocator, profile_result); in ExecuteOnStream()
|
D | fft_thunk.cc | 104 se::OwningScratchAllocator<2> scratch_allocator(device_ordinal, in RunFft() local 142 batch_size, &scratch_allocator); in RunFft() 148 stream, fft_plan.get(), &scratch_allocator); in RunFft()
|
D | matmul_utils.h | 139 se::ScratchAllocator& scratch_allocator, 159 se::ScratchAllocator& scratch_allocator,
|
/external/tensorflow/tensorflow/stream_executor/ |
D | BUILD | 245 name = "scratch_allocator", 246 hdrs = ["scratch_allocator.h"], 248 "//tensorflow/compiler/xla/stream_executor:scratch_allocator",
|
/external/tensorflow/tensorflow/tsl/platform/default/build_config/ |
D | BUILD | 55 "//tensorflow/stream_executor:scratch_allocator", 107 "//tensorflow/stream_executor:scratch_allocator",
|