/external/tensorflow/tensorflow/compiler/xla/service/ |
D | hlo_cost_analysis_test.cc | 156 EXPECT_EQ(analysis.bytes_accessed(), in TEST_F() 190 EXPECT_EQ(analysis.bytes_accessed(), in TEST_F() 226 EXPECT_EQ(analysis.bytes_accessed(), in TEST_F() 256 EXPECT_EQ(analysis.bytes_accessed(), in TEST_F() 280 EXPECT_EQ(analysis.bytes_accessed(), 80); in TEST_F() 312 EXPECT_EQ(analysis.bytes_accessed(), in TEST_F() 392 EXPECT_EQ(analysis.bytes_accessed(), in TEST_F() 420 EXPECT_EQ(analysis.bytes_accessed(), sizeof(float) * (10 * 20 + 1 + 10)); in TEST_F() 444 EXPECT_EQ(analysis.bytes_accessed(), sizeof(float) * (10 * 20 + 1 + 2 * 4)); in TEST_F() 479 EXPECT_EQ(analysis.bytes_accessed(), sizeof(float) * (10 * 20 * 2 + 2 * 3)); in TEST_F() [all …]
|
D | human_readable_profile_builder.cc | 51 if (op.cycles > 0 && op.bytes_accessed >= 0) { in ToString() 53 HumanReadableNumBytes(op.bytes_accessed / CyclesToSeconds(op.cycles)), in ToString() 55 double bpc = static_cast<double>(op.bytes_accessed) / op.cycles; in ToString() 56 if (op.bytes_accessed > op.cycles) { in ToString() 119 total_bytes += std::max(op.bytes_accessed, int64{0}); in ToString() 211 entry.metric = static_cast<double>(op.bytes_accessed) / (1 << 20); in ToString()
|
D | human_readable_profile_builder.h | 50 int64 transcendental_count, int64 bytes_accessed, in AddOp() argument 54 bytes_accessed, optimal_seconds}); in AddOp() 68 int64 bytes_accessed; // -1 if unknown member
|
D | hlo_cost_analysis.cc | 57 float bytes_accessed = GetShapeSize(hlo->shape()); in Preprocess() local 61 bytes_accessed += GetShapeSize(operand->shape()); in Preprocess() 64 current_properties_[kBytesAccessedKey] = bytes_accessed; in Preprocess() 711 float bytes_accessed = GetShapeSize(hlo->shape()); in HandleTriangularSolve() local 713 bytes_accessed += GetShapeSize(hlo->operand(0)->shape()) / 2.0f; in HandleTriangularSolve() 715 bytes_accessed += GetShapeSize(hlo->operand(1)->shape()); in HandleTriangularSolve() 717 current_properties_[kBytesAccessedKey] = bytes_accessed; in HandleTriangularSolve() 730 float bytes_accessed = GetShapeSize(hlo->operand(0)->shape()) / 2.0f; in HandleCholesky() local 732 bytes_accessed += GetShapeSize(hlo->operand(0)->shape()) / 2.0f; in HandleCholesky() 734 current_properties_[kBytesAccessedKey] = bytes_accessed; in HandleCholesky() [all …]
|
D | hlo_profile_printer.cc | 62 instruction_info.bytes_accessed(), in PrintHloProfile()
|
D | hlo_cost_analysis.h | 150 float bytes_accessed() const; 161 int64 bytes_accessed(const HloInstruction& hlo) const;
|
D | hlo_profile_printer_data.proto | 34 float bytes_accessed = 6; field
|
D | hlo_execution_profile.cc | 106 instruction_info->set_bytes_accessed(cost_analysis.bytes_accessed(*hlo)); in CreateHloProfilePrinterData()
|
D | memory_space_assignment.cc | 261 float bytes_accessed = cost_analysis_.bytes_accessed(instruction); in GetInstructionElapsedDueToMemory() local 263 bytes_accessed / in GetInstructionElapsedDueToMemory() 272 bytes_accessed -= operand_bytes_accessed; in GetInstructionElapsedDueToMemory() 275 bytes_accessed / in GetInstructionElapsedDueToMemory() 285 bytes_accessed -= output_bytes_accessed; in GetInstructionElapsedDueToMemory() 288 bytes_accessed / in GetInstructionElapsedDueToMemory()
|
/external/tensorflow/tensorflow/core/profiler/convert/ |
D | op_metrics_db_combiner.cc | 71 dst->set_bytes_accessed(src.bytes_accessed() + dst->bytes_accessed()); in CombineOpMetrics() 100 src_memory_accessed.bytes_accessed() + in CombineMemoryAccessedBreakdown() 101 dst_memory_accessed->bytes_accessed()); in CombineMemoryAccessedBreakdown()
|
D | op_metrics_to_record.h | 91 SafeDivide(metrics.bytes_accessed(), PicosToNanos(metrics.time_ps()))); in SetRooflineMetrics() 93 SafeDivide(metrics.flops(), metrics.bytes_accessed())); in SetRooflineMetrics() 94 record->set_bound_by((metrics.bytes_accessed() != 0) in SetRooflineMetrics()
|
D | xplane_to_op_metrics_db.cc | 248 /*children_time_ps=*/0, costs.flops, costs.bytes_accessed); in ConvertDeviceTraceXPlaneToOpMetricsDb()
|
/external/tensorflow/tensorflow/core/profiler/utils/ |
D | op_utils.cc | 70 uint64 time_ps, uint64 children_time_ps, int64 flops, int64 bytes_accessed, in EnterOp() argument 86 op_metrics->set_bytes_accessed(op_metrics->bytes_accessed() + in EnterOp() 87 bytes_accessed * occurrences); in EnterOp()
|
D | op_metrics_db_utils.cc | 61 tf_op_metrics->set_bytes_accessed(tf_op_metrics->bytes_accessed() + in UpdateTfOpMetricsWithDeviceOpMetrics() 62 device_op_metrics.bytes_accessed()); in UpdateTfOpMetricsWithDeviceOpMetrics()
|
D | cost_utils.h | 44 uint64 bytes_accessed = 0LL; member
|
D | op_utils.h | 73 uint64 children_time_ps, int64 flops, int64 bytes_accessed,
|
/external/tensorflow/tensorflow/compiler/xla/service/cpu/ |
D | parallel_task_assignment.cc | 68 const int64 bytes_accessed = in GetParallelTaskCount() local 69 std::max(int64{1}, cost_analysis_->bytes_accessed(*instruction)); in GetParallelTaskCount() 72 static_cast<float>(bytes_accessed); in GetParallelTaskCount() 94 10 * cost_analysis_->bytes_accessed(*instruction); in GetParallelTaskCount()
|
/external/tensorflow/tensorflow/core/profiler/protobuf/ |
D | op_metrics.proto | 54 uint64 bytes_accessed = 5; field 65 uint64 bytes_accessed = 3; field
|
/external/tensorflow/tensorflow/compiler/xla/service/gpu/ |
D | gpu_compiler.cc | 890 cost_analysis.bytes_accessed()); in RunBackend()
|