Home
last modified time | relevance | path

Searched refs:divup (Results 1 – 22 of 22) sorted by relevance

/external/tensorflow/tensorflow/core/kernels/
Deigen_pooling.h96 post_reduce_dims[idxRows] = Eigen::divup(
99 post_reduce_dims[idxCols] = Eigen::divup(
103 post_reduce_dims[idxRows] = Eigen::divup(
105 post_reduce_dims[idxCols] = Eigen::divup(
209 post_reduce_dims[idxPlanes] = Eigen::divup( in CuboidMaxPooling()
212 post_reduce_dims[idxRows] = Eigen::divup( in CuboidMaxPooling()
215 post_reduce_dims[idxCols] = Eigen::divup( in CuboidMaxPooling()
219 post_reduce_dims[idxPlanes] = Eigen::divup( in CuboidMaxPooling()
221 post_reduce_dims[idxRows] = Eigen::divup( in CuboidMaxPooling()
223 post_reduce_dims[idxCols] = Eigen::divup( in CuboidMaxPooling()
[all …]
Dredux_functor.h76 const Eigen::Index inner_block_size = Eigen::divup(inner_dim, num_blocks); in operator()
134 Eigen::divup(min_block_workload, parallel_cell_size); in operator()
136 max_parallelism, Eigen::divup(total_workload, min_block_size)); in operator()
142 const Eigen::Index outer_block_size = Eigen::divup(outer_dim, num_blocks); in operator()
247 Eigen::divup(min_block_workload, parallel_cell_size); in operator()
249 std::min(max_parallelism, Eigen::divup(total_workload, min_block_size)); in operator()
257 Eigen::divup(total_workload, num_blocks); in operator()
Deigen_volume_patch.h99 m_outputPlanes = Eigen::divup( in CustomTensorEvaluator()
104 m_outputRows = Eigen::divup( in CustomTensorEvaluator()
109 m_outputCols = Eigen::divup( in CustomTensorEvaluator()
121 m_outputPlanes = Eigen::divup( in CustomTensorEvaluator()
123 m_outputRows = Eigen::divup(m_input_rows_eff - m_patch_rows_eff + 1, in CustomTensorEvaluator()
125 m_outputCols = Eigen::divup(m_input_cols_eff - m_patch_cols_eff + 1, in CustomTensorEvaluator()
132 m_outputPlanes = Eigen::divup(m_input_planes_eff, m_plane_strides); in CustomTensorEvaluator()
133 m_outputRows = Eigen::divup(m_input_rows_eff, m_row_strides); in CustomTensorEvaluator()
134 m_outputCols = Eigen::divup(m_input_cols_eff, m_col_strides); in CustomTensorEvaluator()
Drelu_op_gpu.cu.cc181 int32 half8_count = Eigen::divup(count, VectorSizeElements); in operator ()()
182 int32 kBlock = Eigen::divup(half8_count, kThreadInBlock); in operator ()()
187 int32 half2_count = Eigen::divup(count, 2); in operator ()()
227 int32 vect_count = Eigen::divup(count, 4); in operator ()()
Deigen_contraction_kernel.h315 m, Eigen::divup(static_cast<StorageIndex>(mc_ * kScaleM), kUnrollM) *
318 n, Eigen::divup(static_cast<StorageIndex>(nc_ * kScaleN), kUnrollN) *
323 (std::max)(StorageIndex(1), Eigen::divup(k, kc_));
327 Eigen::divup(k / target_k_slices, packet_size) * packet_size;
637 bn > 0 ? divup(n, bn) : 0, lhsBlock); \
657 bm > 0 ? divup(m, bm) : 0, rhsBlock); \
831 bn > 0 ? divup(n, bn) : 0, lhsBlock); \
845 bm > 0 ? divup(m, bm) : 0, rhsBlock); \
Dsoftmax_op_gpu.cu.cc255 Eigen::divup(rows * cols, numThreadsPerBlock * kUnroll); in Compute()
267 Eigen::divup(rows * cols, numThreadsPerBlock * kUnroll); in Compute()
Deigen_backward_spatial_convolutions_test.cc348 const int output_rows = divup(input_rows - patch_rows + 1, stride); in test_batched_strided_spatial_convolution_backward_input_valid()
349 const int output_cols = divup(input_cols - patch_cols + 1, stride); in test_batched_strided_spatial_convolution_backward_input_valid()
424 const int output_rows = divup(input_rows - patch_rows + 1, stride); in test_batched_strided_spatial_convolution_backward_input_valid_row_major()
425 const int output_cols = divup(input_cols - patch_cols + 1, stride); in test_batched_strided_spatial_convolution_backward_input_valid_row_major()
Dreduction_gpu_kernels.cu.h663 std::min(TF_RED_WARPSIZE, Eigen::divup(in_size, num_threads));
767 std::min(Eigen::divup(extent_x, rows_per_warp), (1024 / TF_RED_WARPSIZE)),
770 Eigen::divup(static_cast<unsigned int>(extent_x),
863 int num_blocks = Eigen::divup(extent_y, threads_per_block);
923 Eigen::divup(extent_x * n_group_out * n_size, threads_per_block);
938 num_blocks = Eigen::divup(extent_x * n_size, threads_per_block);
Deigen_cuboid_convolution.h1893 out_planes = Eigen::divup(inputPlanes - kernelPlanes + 1,
1895 out_height = Eigen::divup(inputRows - kernelRows + 1,
1897 out_width = Eigen::divup(inputCols - kernelCols + 1,
1902 Eigen::divup(inputPlanes, static_cast<TensorIndex>(stridePlanes));
1904 Eigen::divup(inputRows, static_cast<TensorIndex>(strideRows));
1905 out_width = Eigen::divup(inputCols, static_cast<TensorIndex>(strideCols));
Dsegment_reduction_ops_gpu.cu.cc166 Eigen::divup(input_outer_dim_size, Index(OuterDimTileSize)); in operator ()()
Deigen_spatial_convolutions-inl.h1657 out_height = divup(InputRowsEff - kernelRowsEff + 1, row_stride);
1658 out_width = divup(InputColsEff - kernelColsEff + 1, col_stride);
1663 out_height = divup(InputRows, row_stride);
1664 out_width = divup(InputCols, col_stride);
Dfused_batch_norm_op.cu.cc345 std::is_same<T, Eigen::half>::value ? Eigen::divup(count, 2) : count, d, \ in operator ()()
Dparameterized_truncated_normal_op.cc674 int32 adjusted_batches = Eigen::divup(size, adjusted_samples); in Compute()
Dfused_batch_norm_op.cc679 Eigen::divup(byte_size, static_cast<int64>(sizeof(T))); in AllocateBytes()
733 Eigen::divup(byte_size, static_cast<int64>(sizeof(T))); in AllocateBytes()
Dcudnn_rnn_ops.cc374 Eigen::divup(byte_size, static_cast<int64>(sizeof(T))); in AllocateBytes()
418 Eigen::divup(byte_size, static_cast<int64>(sizeof(T))); in AllocateBytes()
/external/llvm-project/mlir/lib/Dialect/Async/Transforms/
DAsyncParallelFor.cpp126 auto divup = [&](Value x, Value y) -> Value { in matchAndRewrite() local
138 tripCounts[i] = divup(range, step); in matchAndRewrite()
158 blockSize[0] = divup(tripCounts[0], targetNumBlocks[0]); in matchAndRewrite()
159 numBlocks[0] = divup(tripCounts[0], blockSize[0]); in matchAndRewrite()
163 targetNumBlocks[i] = divup(targetNumBlocks[i - 1], numBlocks[i - 1]); in matchAndRewrite()
164 blockSize[i] = divup(tripCounts[i], targetNumBlocks[i]); in matchAndRewrite()
165 numBlocks[i] = divup(tripCounts[i], blockSize[i]); in matchAndRewrite()
/external/eigen/unsupported/Eigen/CXX11/src/Tensor/
DTensorDeviceThreadPool.h207 Index block_count = divup(n, block_size); in parallelFor()
212 (divup<int>(block_count, numThreads()) * numThreads()); in parallelFor()
218 Index coarser_block_size = divup(n, prev_block_count - 1); in parallelFor()
228 const Index coarser_block_count = divup(n, coarser_block_size); in parallelFor()
233 (divup<int>(coarser_block_count, numThreads()) * numThreads()); in parallelFor()
257 Index mid = first + divup((last - first) / 2, block_size) * block_size; in parallelFor()
DTensorContractionThreadPool.h244 Index nm0 = divup(m, bm);
245 Index nn0 = divup(n, bn);
246 Index nk = divup(k, bk);
264 Index nm = divup(nm0, gm);
265 Index nn = divup(nn0, gn);
358 divup<size_t>(bm_ * bk_ * sizeof(LhsScalar), align) * align;
360 divup<size_t>(bn_ * bk_ * sizeof(RhsScalar), align) * align;
659 Index nm0 = divup(m, bm);
665 while (gm1 <= nm0 && nm1 == divup(nm0, gm1)) gm1++;
671 nm1 = divup(nm0, gm1);
[all …]
DTensorReductionCuda.h293 const int num_blocks = divup<int>(num_coeffs, block_size * num_per_thread);
320 const int num_blocks = divup<int>(num_coeffs, block_size * num_per_thread);
386 const Index input_col_blocks = divup<Index>(num_coeffs_to_reduce, blockDim.x * NumPerThread);
457 const Index input_col_blocks = divup<Index>(num_coeffs_to_reduce, blockDim.x * NumPerThread * 2);
458 const Index num_input_blocks = divup<Index>(input_col_blocks * num_preserved_coeffs, 2);
561 const int dyn_blocks = divup<int>(num_coeffs, block_size * num_per_thread);
569 const int dyn_blocks = divup<int>(num_preserved_vals, 1024);
607 const int dyn_blocks = divup<int>(num_coeffs, block_size * num_per_thread);
615 const int dyn_blocks = divup<int>(num_preserved_vals, 1024);
680 const Index max_iter = num_preserved_coeffs * divup<Index>(num_coeffs_to_reduce, NumPerThread);
[all …]
DTensorMeta.h30 T divup(const X x, const Y y) { in divup() function
36 T divup(const T x, const T y) { in divup() function
DTensorExecutor.h258 …const int num_blocks = numext::maxi<int>(numext::mini<int>(max_blocks, divup<int>(size, block_size…
/external/tensorflow/tensorflow/core/kernels/rnn/
Dlstm_ops_gpu.cu.cc249 Eigen::divup(batch_size * (cell_size + input_size), block_dim); in LSTMBlockCellFpropWithCUDA()
266 dim3 grid_dim_2d(Eigen::divup(batch_size, static_cast<int>(block_dim_2d.x)), in LSTMBlockCellFpropWithCUDA()
267 Eigen::divup(cell_size, static_cast<int>(block_dim_2d.y))); in LSTMBlockCellFpropWithCUDA()
384 dim3 grid_dim_2d(Eigen::divup(batch_size, static_cast<int>(block_dim_2d.x)), in LSTMBlockCellBpropWithCUDA()
385 Eigen::divup(cell_size, static_cast<int>(block_dim_2d.y))); in LSTMBlockCellBpropWithCUDA()