/external/tensorflow/tensorflow/core/kernels/ |
D | eigen_pooling.h | 96 post_reduce_dims[idxRows] = Eigen::divup( 99 post_reduce_dims[idxCols] = Eigen::divup( 103 post_reduce_dims[idxRows] = Eigen::divup( 105 post_reduce_dims[idxCols] = Eigen::divup( 209 post_reduce_dims[idxPlanes] = Eigen::divup( in CuboidMaxPooling() 212 post_reduce_dims[idxRows] = Eigen::divup( in CuboidMaxPooling() 215 post_reduce_dims[idxCols] = Eigen::divup( in CuboidMaxPooling() 219 post_reduce_dims[idxPlanes] = Eigen::divup( in CuboidMaxPooling() 221 post_reduce_dims[idxRows] = Eigen::divup( in CuboidMaxPooling() 223 post_reduce_dims[idxCols] = Eigen::divup( in CuboidMaxPooling() [all …]
|
D | eigen_volume_patch.h | 99 m_outputPlanes = Eigen::divup( in CustomTensorEvaluator() 104 m_outputRows = Eigen::divup( in CustomTensorEvaluator() 109 m_outputCols = Eigen::divup( in CustomTensorEvaluator() 121 m_outputPlanes = Eigen::divup( in CustomTensorEvaluator() 123 m_outputRows = Eigen::divup(m_input_rows_eff - m_patch_rows_eff + 1, in CustomTensorEvaluator() 125 m_outputCols = Eigen::divup(m_input_cols_eff - m_patch_cols_eff + 1, in CustomTensorEvaluator() 132 m_outputPlanes = Eigen::divup(m_input_planes_eff, m_plane_strides); in CustomTensorEvaluator() 133 m_outputRows = Eigen::divup(m_input_rows_eff, m_row_strides); in CustomTensorEvaluator() 134 m_outputCols = Eigen::divup(m_input_cols_eff, m_col_strides); in CustomTensorEvaluator()
|
D | redux_functor.h | 54 Eigen::divup(min_block_workload, parallel_cell_size); in operator() 56 std::min(max_parallelism, Eigen::divup(total_workload, min_block_size)); in operator() 63 const int64 outer_block_size = Eigen::divup(outer_dim, num_blocks); in operator()
|
D | eigen_contraction_kernel.h | 212 m, Eigen::divup(static_cast<StorageIndex>(mc_ * kScaleM), kUnrollM) * 215 n, Eigen::divup(static_cast<StorageIndex>(nc_ * kScaleN), kUnrollN) * 220 (std::max)(StorageIndex(1), Eigen::divup(k, kc_)); 224 Eigen::divup(k / target_k_slices, packet_size) * packet_size;
|
D | relu_op_gpu.cu.cc | 105 int32 half2_count = Eigen::divup(count, 2); in operator ()() 134 int32 vect_count = Eigen::divup(count, 4); in operator ()()
|
D | eigen_cuboid_convolution.h | 1699 out_planes = Eigen::divup(inputPlanes - kernelPlanes + 1, 1701 out_height = Eigen::divup(inputRows - kernelRows + 1, 1703 out_width = Eigen::divup(inputCols - kernelCols + 1, 1708 Eigen::divup(inputPlanes, static_cast<TensorIndex>(stridePlanes)); 1710 Eigen::divup(inputRows, static_cast<TensorIndex>(strideRows)); 1711 out_width = Eigen::divup(inputCols, static_cast<TensorIndex>(strideCols));
|
D | softmax_op_gpu.cu.cc | 165 const int numBlocks = Eigen::divup(rows * cols, numThreads); in Compute()
|
D | segment_reduction_ops_gpu.cu.cc | 160 Eigen::divup(input_outer_dim_size, Index(OuterDimTileSize)); in operator ()()
|
D | reduction_gpu_kernels.cu.h | 520 const int num_blocks = std::min(32, Eigen::divup(in_size, num_threads)); 621 dim3 block_dim(32, std::min(Eigen::divup(extent_x, rows_per_warp), 32), 1); 623 Eigen::divup(static_cast<unsigned int>(extent_x), 710 int num_blocks = Eigen::divup(extent_y, threads_per_block);
|
D | parameterized_truncated_normal_op.cc | 383 int32 adjusted_batches = Eigen::divup(size, adjusted_samples); in Compute()
|
D | eigen_backward_spatial_convolutions_test.cc | 750 const int output_rows = divup(input_rows - patch_rows + 1, stride); in test_batched_strided_spatial_convolution_backward_input_valid() 751 const int output_cols = divup(input_cols - patch_cols + 1, stride); in test_batched_strided_spatial_convolution_backward_input_valid() 826 const int output_rows = divup(input_rows - patch_rows + 1, stride); in test_batched_strided_spatial_convolution_backward_input_valid_row_major() 827 const int output_cols = divup(input_cols - patch_cols + 1, stride); in test_batched_strided_spatial_convolution_backward_input_valid_row_major()
|
D | cudnn_rnn_ops.cc | 375 Eigen::divup(byte_size, static_cast<int64>(sizeof(T))); in AllocateBytes() 420 Eigen::divup(byte_size, static_cast<int64>(sizeof(T))); in AllocateBytes()
|
/external/eigen/unsupported/Eigen/CXX11/src/Tensor/ |
D | TensorContractionThreadPool.h | 244 Index nm0 = divup(m, bm); 245 Index nn0 = divup(n, bn); 246 Index nk = divup(k, bk); 264 Index nm = divup(nm0, gm); 265 Index nn = divup(nn0, gn); 358 divup<size_t>(bm_ * bk_ * sizeof(LhsScalar), align) * align; 360 divup<size_t>(bn_ * bk_ * sizeof(RhsScalar), align) * align; 659 Index nm0 = divup(m, bm); 665 while (gm1 <= nm0 && nm1 == divup(nm0, gm1)) gm1++; 671 nm1 = divup(nm0, gm1); [all …]
|
D | TensorDeviceThreadPool.h | 207 Index block_count = divup(n, block_size); in parallelFor() 212 (divup<int>(block_count, numThreads()) * numThreads()); in parallelFor() 218 Index coarser_block_size = divup(n, prev_block_count - 1); in parallelFor() 228 const Index coarser_block_count = divup(n, coarser_block_size); in parallelFor() 233 (divup<int>(coarser_block_count, numThreads()) * numThreads()); in parallelFor() 257 Index mid = first + divup((last - first) / 2, block_size) * block_size; in parallelFor()
|
D | TensorReductionCuda.h | 293 const int num_blocks = divup<int>(num_coeffs, block_size * num_per_thread); 320 const int num_blocks = divup<int>(num_coeffs, block_size * num_per_thread); 386 const Index input_col_blocks = divup<Index>(num_coeffs_to_reduce, blockDim.x * NumPerThread); 457 const Index input_col_blocks = divup<Index>(num_coeffs_to_reduce, blockDim.x * NumPerThread * 2); 458 const Index num_input_blocks = divup<Index>(input_col_blocks * num_preserved_coeffs, 2); 561 const int dyn_blocks = divup<int>(num_coeffs, block_size * num_per_thread); 569 const int dyn_blocks = divup<int>(num_preserved_vals, 1024); 607 const int dyn_blocks = divup<int>(num_coeffs, block_size * num_per_thread); 615 const int dyn_blocks = divup<int>(num_preserved_vals, 1024); 680 const Index max_iter = num_preserved_coeffs * divup<Index>(num_coeffs_to_reduce, NumPerThread); [all …]
|
D | TensorMeta.h | 30 T divup(const X x, const Y y) { in divup() function 36 T divup(const T x, const T y) { in divup() function
|
D | TensorExecutor.h | 258 …const int num_blocks = numext::maxi<int>(numext::mini<int>(max_blocks, divup<int>(size, block_size…
|
/external/tensorflow/tensorflow/contrib/rnn/kernels/ |
D | lstm_ops_gpu.cu.cc | 244 Eigen::divup(batch_size * (cell_size + input_size), block_dim); in LSTMBlockCellFpropWithCUDA() 261 dim3 grid_dim_2d(Eigen::divup(batch_size, static_cast<int>(block_dim_2d.x)), in LSTMBlockCellFpropWithCUDA() 262 Eigen::divup(cell_size, static_cast<int>(block_dim_2d.y))); in LSTMBlockCellFpropWithCUDA() 377 dim3 grid_dim_2d(Eigen::divup(batch_size, static_cast<int>(block_dim_2d.x)), in LSTMBlockCellBpropWithCUDA() 378 Eigen::divup(cell_size, static_cast<int>(block_dim_2d.y))); in LSTMBlockCellBpropWithCUDA()
|