/external/tensorflow/tensorflow/core/kernels/ |
D | eigen_pooling.h | 96 post_reduce_dims[idxRows] = Eigen::divup( 99 post_reduce_dims[idxCols] = Eigen::divup( 103 post_reduce_dims[idxRows] = Eigen::divup( 105 post_reduce_dims[idxCols] = Eigen::divup( 209 post_reduce_dims[idxPlanes] = Eigen::divup( in CuboidMaxPooling() 212 post_reduce_dims[idxRows] = Eigen::divup( in CuboidMaxPooling() 215 post_reduce_dims[idxCols] = Eigen::divup( in CuboidMaxPooling() 219 post_reduce_dims[idxPlanes] = Eigen::divup( in CuboidMaxPooling() 221 post_reduce_dims[idxRows] = Eigen::divup( in CuboidMaxPooling() 223 post_reduce_dims[idxCols] = Eigen::divup( in CuboidMaxPooling() [all …]
|
D | redux_functor.h | 76 const Eigen::Index inner_block_size = Eigen::divup(inner_dim, num_blocks); in operator() 134 Eigen::divup(min_block_workload, parallel_cell_size); in operator() 136 max_parallelism, Eigen::divup(total_workload, min_block_size)); in operator() 142 const Eigen::Index outer_block_size = Eigen::divup(outer_dim, num_blocks); in operator() 247 Eigen::divup(min_block_workload, parallel_cell_size); in operator() 249 std::min(max_parallelism, Eigen::divup(total_workload, min_block_size)); in operator() 257 Eigen::divup(total_workload, num_blocks); in operator()
|
D | eigen_volume_patch.h | 99 m_outputPlanes = Eigen::divup( in CustomTensorEvaluator() 104 m_outputRows = Eigen::divup( in CustomTensorEvaluator() 109 m_outputCols = Eigen::divup( in CustomTensorEvaluator() 121 m_outputPlanes = Eigen::divup( in CustomTensorEvaluator() 123 m_outputRows = Eigen::divup(m_input_rows_eff - m_patch_rows_eff + 1, in CustomTensorEvaluator() 125 m_outputCols = Eigen::divup(m_input_cols_eff - m_patch_cols_eff + 1, in CustomTensorEvaluator() 132 m_outputPlanes = Eigen::divup(m_input_planes_eff, m_plane_strides); in CustomTensorEvaluator() 133 m_outputRows = Eigen::divup(m_input_rows_eff, m_row_strides); in CustomTensorEvaluator() 134 m_outputCols = Eigen::divup(m_input_cols_eff, m_col_strides); in CustomTensorEvaluator()
|
D | relu_op_gpu.cu.cc | 181 int32 half8_count = Eigen::divup(count, VectorSizeElements); in operator ()() 182 int32 kBlock = Eigen::divup(half8_count, kThreadInBlock); in operator ()() 187 int32 half2_count = Eigen::divup(count, 2); in operator ()() 227 int32 vect_count = Eigen::divup(count, 4); in operator ()()
|
D | eigen_contraction_kernel.h | 315 m, Eigen::divup(static_cast<StorageIndex>(mc_ * kScaleM), kUnrollM) * 318 n, Eigen::divup(static_cast<StorageIndex>(nc_ * kScaleN), kUnrollN) * 323 (std::max)(StorageIndex(1), Eigen::divup(k, kc_)); 327 Eigen::divup(k / target_k_slices, packet_size) * packet_size; 637 bn > 0 ? divup(n, bn) : 0, lhsBlock); \ 657 bm > 0 ? divup(m, bm) : 0, rhsBlock); \ 831 bn > 0 ? divup(n, bn) : 0, lhsBlock); \ 845 bm > 0 ? divup(m, bm) : 0, rhsBlock); \
|
D | softmax_op_gpu.cu.cc | 255 Eigen::divup(rows * cols, numThreadsPerBlock * kUnroll); in Compute() 267 Eigen::divup(rows * cols, numThreadsPerBlock * kUnroll); in Compute()
|
D | eigen_backward_spatial_convolutions_test.cc | 348 const int output_rows = divup(input_rows - patch_rows + 1, stride); in test_batched_strided_spatial_convolution_backward_input_valid() 349 const int output_cols = divup(input_cols - patch_cols + 1, stride); in test_batched_strided_spatial_convolution_backward_input_valid() 424 const int output_rows = divup(input_rows - patch_rows + 1, stride); in test_batched_strided_spatial_convolution_backward_input_valid_row_major() 425 const int output_cols = divup(input_cols - patch_cols + 1, stride); in test_batched_strided_spatial_convolution_backward_input_valid_row_major()
|
D | reduction_gpu_kernels.cu.h | 663 std::min(TF_RED_WARPSIZE, Eigen::divup(in_size, num_threads)); 767 std::min(Eigen::divup(extent_x, rows_per_warp), (1024 / TF_RED_WARPSIZE)), 770 Eigen::divup(static_cast<unsigned int>(extent_x), 863 int num_blocks = Eigen::divup(extent_y, threads_per_block); 923 Eigen::divup(extent_x * n_group_out * n_size, threads_per_block); 938 num_blocks = Eigen::divup(extent_x * n_size, threads_per_block);
|
D | eigen_cuboid_convolution.h | 1893 out_planes = Eigen::divup(inputPlanes - kernelPlanes + 1, 1895 out_height = Eigen::divup(inputRows - kernelRows + 1, 1897 out_width = Eigen::divup(inputCols - kernelCols + 1, 1902 Eigen::divup(inputPlanes, static_cast<TensorIndex>(stridePlanes)); 1904 Eigen::divup(inputRows, static_cast<TensorIndex>(strideRows)); 1905 out_width = Eigen::divup(inputCols, static_cast<TensorIndex>(strideCols));
|
D | segment_reduction_ops_gpu.cu.cc | 166 Eigen::divup(input_outer_dim_size, Index(OuterDimTileSize)); in operator ()()
|
D | eigen_spatial_convolutions-inl.h | 1657 out_height = divup(InputRowsEff - kernelRowsEff + 1, row_stride); 1658 out_width = divup(InputColsEff - kernelColsEff + 1, col_stride); 1663 out_height = divup(InputRows, row_stride); 1664 out_width = divup(InputCols, col_stride);
|
D | fused_batch_norm_op.cu.cc | 345 std::is_same<T, Eigen::half>::value ? Eigen::divup(count, 2) : count, d, \ in operator ()()
|
D | parameterized_truncated_normal_op.cc | 674 int32 adjusted_batches = Eigen::divup(size, adjusted_samples); in Compute()
|
D | fused_batch_norm_op.cc | 679 Eigen::divup(byte_size, static_cast<int64>(sizeof(T))); in AllocateBytes() 733 Eigen::divup(byte_size, static_cast<int64>(sizeof(T))); in AllocateBytes()
|
D | cudnn_rnn_ops.cc | 374 Eigen::divup(byte_size, static_cast<int64>(sizeof(T))); in AllocateBytes() 418 Eigen::divup(byte_size, static_cast<int64>(sizeof(T))); in AllocateBytes()
|
/external/llvm-project/mlir/lib/Dialect/Async/Transforms/ |
D | AsyncParallelFor.cpp | 126 auto divup = [&](Value x, Value y) -> Value { in matchAndRewrite() local 138 tripCounts[i] = divup(range, step); in matchAndRewrite() 158 blockSize[0] = divup(tripCounts[0], targetNumBlocks[0]); in matchAndRewrite() 159 numBlocks[0] = divup(tripCounts[0], blockSize[0]); in matchAndRewrite() 163 targetNumBlocks[i] = divup(targetNumBlocks[i - 1], numBlocks[i - 1]); in matchAndRewrite() 164 blockSize[i] = divup(tripCounts[i], targetNumBlocks[i]); in matchAndRewrite() 165 numBlocks[i] = divup(tripCounts[i], blockSize[i]); in matchAndRewrite()
|
/external/eigen/unsupported/Eigen/CXX11/src/Tensor/ |
D | TensorDeviceThreadPool.h | 207 Index block_count = divup(n, block_size); in parallelFor() 212 (divup<int>(block_count, numThreads()) * numThreads()); in parallelFor() 218 Index coarser_block_size = divup(n, prev_block_count - 1); in parallelFor() 228 const Index coarser_block_count = divup(n, coarser_block_size); in parallelFor() 233 (divup<int>(coarser_block_count, numThreads()) * numThreads()); in parallelFor() 257 Index mid = first + divup((last - first) / 2, block_size) * block_size; in parallelFor()
|
D | TensorContractionThreadPool.h | 244 Index nm0 = divup(m, bm); 245 Index nn0 = divup(n, bn); 246 Index nk = divup(k, bk); 264 Index nm = divup(nm0, gm); 265 Index nn = divup(nn0, gn); 358 divup<size_t>(bm_ * bk_ * sizeof(LhsScalar), align) * align; 360 divup<size_t>(bn_ * bk_ * sizeof(RhsScalar), align) * align; 659 Index nm0 = divup(m, bm); 665 while (gm1 <= nm0 && nm1 == divup(nm0, gm1)) gm1++; 671 nm1 = divup(nm0, gm1); [all …]
|
D | TensorReductionCuda.h | 293 const int num_blocks = divup<int>(num_coeffs, block_size * num_per_thread); 320 const int num_blocks = divup<int>(num_coeffs, block_size * num_per_thread); 386 const Index input_col_blocks = divup<Index>(num_coeffs_to_reduce, blockDim.x * NumPerThread); 457 const Index input_col_blocks = divup<Index>(num_coeffs_to_reduce, blockDim.x * NumPerThread * 2); 458 const Index num_input_blocks = divup<Index>(input_col_blocks * num_preserved_coeffs, 2); 561 const int dyn_blocks = divup<int>(num_coeffs, block_size * num_per_thread); 569 const int dyn_blocks = divup<int>(num_preserved_vals, 1024); 607 const int dyn_blocks = divup<int>(num_coeffs, block_size * num_per_thread); 615 const int dyn_blocks = divup<int>(num_preserved_vals, 1024); 680 const Index max_iter = num_preserved_coeffs * divup<Index>(num_coeffs_to_reduce, NumPerThread); [all …]
|
D | TensorMeta.h | 30 T divup(const X x, const Y y) { in divup() function 36 T divup(const T x, const T y) { in divup() function
|
D | TensorExecutor.h | 258 …const int num_blocks = numext::maxi<int>(numext::mini<int>(max_blocks, divup<int>(size, block_size…
|
/external/tensorflow/tensorflow/core/kernels/rnn/ |
D | lstm_ops_gpu.cu.cc | 249 Eigen::divup(batch_size * (cell_size + input_size), block_dim); in LSTMBlockCellFpropWithCUDA() 266 dim3 grid_dim_2d(Eigen::divup(batch_size, static_cast<int>(block_dim_2d.x)), in LSTMBlockCellFpropWithCUDA() 267 Eigen::divup(cell_size, static_cast<int>(block_dim_2d.y))); in LSTMBlockCellFpropWithCUDA() 384 dim3 grid_dim_2d(Eigen::divup(batch_size, static_cast<int>(block_dim_2d.x)), in LSTMBlockCellBpropWithCUDA() 385 Eigen::divup(cell_size, static_cast<int>(block_dim_2d.y))); in LSTMBlockCellBpropWithCUDA()
|