/external/llvm-project/polly/test/ScopInfo/ |
D | invariant_load_complex_condition.ll | 26 ; CHECK-NEXT: [block_y, block_x] -> { Stmt_entry_split[] }; 28 ; CHECK-NEXT: [block_y, block_x] -> { Stmt_entry_split[] -> [] }; 30 …block_x] -> { Stmt_entry_split[] -> MemRef4[o0] : (-3 <= block_y < 0 and block_x <= -4 and -8 + bl… 32 ; CHECK-NEXT: [block_y, block_x] -> { Stmt_entry_split[] -> MemRef0[] }; 38 define void @dct_luma(i32 %block_x, i32 %block_y) #0 { 43 %div = sdiv i32 %block_x, 4
|
/external/tensorflow/tensorflow/lite/delegates/gpu/common/tasks/ |
D | fully_connected.h | 71 for (int block_x = 0; 4 * block_x < padded_src_channels; block_x++) { in RearrangeFCWeightsToIOO4I4() local 74 int x = 4 * block_x + x_in_block; in RearrangeFCWeightsToIOO4I4() 77 int dst_index = block_x * padded_dst_channels * 4 + block_y * 16 + in RearrangeFCWeightsToIOO4I4()
|
/external/tensorflow/tensorflow/lite/delegates/gpu/common/tasks/special/ |
D | fc_fc_add.h | 49 for (int block_x = 0; 4 * block_x < padded_src_channels; block_x++) { in RearrangeFCWeightsToIOO4I4() local 52 int x = 4 * block_x + x_in_block; in RearrangeFCWeightsToIOO4I4() 53 int dst_index = block_x * padded_dst_channels * 4 + block_y * 16 + in RearrangeFCWeightsToIOO4I4()
|
/external/llvm-project/mlir/test/Dialect/GPU/ |
D | outlining.mlir | 28 threads(%tx, %ty, %tz) in (%block_x = %bDimX, %block_y = %bDimY, 31 "some_op"(%bx, %block_x) : (index, index) -> () 70 threads(%tx, %ty, %tz) in (%block_x = %cst, %block_y = %cst, 101 threads(%tx, %ty, %tz) in (%block_x = %cst, %block_y = %cst, 125 threads(%tx, %ty, %tz) in (%block_x = %cst, %block_y = %cst, 153 threads(%tx, %ty, %tz) in (%block_x = %cst, %block_y = %cst, 180 threads(%tx, %ty, %tz) in (%block_x = %c1, %block_y = %c1, 206 threads(%tx, %ty, %tz) in (%block_x = %c1, %block_y = %c1, 225 threads(%tx, %ty, %tz) in (%block_x = %cst, %block_y = %cst,
|
D | multiple-all-reduce.mlir | 11 threads(%tx, %ty, %tz) in (%block_x = %c1, %block_y = %c1, %block_z = %c1) {
|
D | ops.mlir | 9 threads(%tx, %ty, %tz) in (%block_x = %sz, %block_y = %sz, %block_z = %sz) { 20 threads(%tx, %ty, %tz) in (%block_x = %thrd, %block_y = %thrd, %block_z = %thrd) {
|
/external/pdfium/third_party/libopenjpeg20/ |
D | sparse_array.c | 127 OPJ_UINT32 x, block_x; in opj_sparse_array_int32_read_or_write() local 134 block_x = x0 / block_width; in opj_sparse_array_int32_read_or_write() 135 for (x = x0; x < x1; block_x ++, x += x_incr) { in opj_sparse_array_int32_read_or_write() 142 src_block = sa->data_blocks[block_y * sa->block_count_hor + block_x]; in opj_sparse_array_int32_read_or_write() 242 sa->data_blocks[block_y * sa->block_count_hor + block_x] = src_block; in opj_sparse_array_int32_read_or_write()
|
/external/llvm-project/mlir/test/mlir-cuda-runner/ |
D | two-modules.mlir | 13 threads(%tx, %ty, %tz) in (%block_x = %sx, %block_y = %one, %block_z = %one) { 19 threads(%tx, %ty, %tz) in (%block_x = %sx, %block_y = %one, %block_z = %one) {
|
D | shuffle.mlir | 13 threads(%tx, %ty, %tz) in (%block_x = %sx, %block_y = %one, %block_z = %one) { 16 %width = index_cast %block_x : index to i32
|
D | all-reduce-op.mlir | 16 threads(%tx, %ty, %tz) in (%block_x = %sx, %block_y = %sy, %block_z = %sz) { 19 %t2 = muli %t1, %block_x : index
|
D | gpu-to-cubin.mlir | 8 threads(%tx, %ty, %tz) in (%block_x = %cst2, %block_y = %cst, %block_z = %cst) {
|
D | all-reduce-region.mlir | 13 threads(%tx, %ty, %tz) in (%block_x = %sx, %block_y = %one, %block_z = %one) {
|
D | all-reduce-max.mlir | 48 threads(%tx, %ty, %tz) in (%block_x = %c6, %block_y = %c1, %block_z = %c1) {
|
D | all-reduce-or.mlir | 48 threads(%tx, %ty, %tz) in (%block_x = %c6, %block_y = %c1, %block_z = %c1) {
|
D | all-reduce-min.mlir | 48 threads(%tx, %ty, %tz) in (%block_x = %c6, %block_y = %c1, %block_z = %c1) {
|
D | all-reduce-xor.mlir | 48 threads(%tx, %ty, %tz) in (%block_x = %c6, %block_y = %c1, %block_z = %c1) {
|
D | all-reduce-and.mlir | 48 threads(%tx, %ty, %tz) in (%block_x = %c6, %block_y = %c1, %block_z = %c1) {
|
D | multiple-all-reduce.mlir | 51 threads(%tx, %ty, %tz) in (%block_x = %c6, %block_y = %c1, %block_z = %c1) {
|
/external/llvm-project/mlir/test/mlir-rocm-runner/ |
D | two-modules.mlir | 14 threads(%tx, %ty, %tz) in (%block_x = %sx, %block_y = %c1, %block_z = %c1) { 20 threads(%tx, %ty, %tz) in (%block_x = %sx, %block_y = %c1, %block_z = %c1) {
|
D | vector-transferops.mlir | 6 threads(%tx, %ty, %tz) in (%block_x = %cst, %block_y = %cst, %block_z = %cst) { 28 threads(%tx, %ty, %tz) in (%block_x = %cst, %block_y = %cst, %block_z = %cst) {
|
D | gpu-to-hsaco.mlir | 8 threads(%tx, %ty, %tz) in (%block_x = %block_dim, %block_y = %c1, %block_z = %c1) {
|
D | vecadd.mlir | 8 threads(%tx, %ty, %tz) in (%block_x = %block_dim, %block_y = %c1, %block_z = %c1) {
|
/external/tensorflow/tensorflow/core/profiler/internal/gpu/ |
D | cupti_collector.h | 85 uint32 block_x; member 105 kernel_info.grid_z, " block:", kernel_info.block_x, ",", in ToXStat()
|
/external/tensorflow/tensorflow/core/profiler/utils/ |
D | kernel_stats_utils_test.cc | 75 kernel_info.block_x = 32; in TEST()
|
/external/llvm/test/CodeGen/ARM/ |
D | 2007-01-19-InfiniteLoop.ll | 12 define fastcc i32 @dct_luma_sp(i32 %block_x, i32 %block_y, i32* %coeff_cost) {
|