/external/tensorflow/tensorflow/compiler/xla/service/ |
D | hlo_cost_analysis_test.cc | 157 EXPECT_EQ(analysis.bytes_accessed(), in TEST_F() 191 EXPECT_EQ(analysis.bytes_accessed(), in TEST_F() 227 EXPECT_EQ(analysis.bytes_accessed(), in TEST_F() 257 EXPECT_EQ(analysis.bytes_accessed(), in TEST_F() 281 EXPECT_EQ(analysis.bytes_accessed(), 80); in TEST_F() 313 EXPECT_EQ(analysis.bytes_accessed(), in TEST_F() 356 EXPECT_EQ(analysis.bytes_accessed(), in TEST_F() 436 EXPECT_EQ(analysis.bytes_accessed(), in TEST_F() 464 EXPECT_EQ(analysis.bytes_accessed(), sizeof(float) * (10 * 20 + 1 + 10)); in TEST_F() 488 EXPECT_EQ(analysis.bytes_accessed(), sizeof(float) * (10 * 20 + 1 + 2 * 4)); in TEST_F() [all …]
|
D | human_readable_profile_builder.cc | 52 if (op.cycles > 0 && op.bytes_accessed >= 0) { in ToString() 54 HumanReadableNumBytes(op.bytes_accessed / CyclesToSeconds(op.cycles)), in ToString() 56 double bpc = static_cast<double>(op.bytes_accessed) / op.cycles; in ToString() 57 if (op.bytes_accessed > op.cycles) { in ToString() 120 total_bytes += std::max(op.bytes_accessed, int64_t{0}); in ToString() 212 entry.metric = static_cast<double>(op.bytes_accessed) / (1 << 20); in ToString()
|
D | human_readable_profile_builder.h | 50 int64_t transcendental_count, int64_t bytes_accessed, in AddOp() argument 54 transcendental_count, bytes_accessed, in AddOp() 69 int64_t bytes_accessed; // -1 if unknown member
|
D | hlo_cost_analysis.cc | 57 float bytes_accessed = GetShapeSize(hlo->shape()); in Preprocess() local 61 bytes_accessed += GetShapeSize(operand->shape()); in Preprocess() 64 current_properties_[kBytesAccessedKey] = bytes_accessed; in Preprocess() 733 float bytes_accessed = GetShapeSize(hlo->shape()); in HandleTriangularSolve() local 735 bytes_accessed += GetShapeSize(hlo->operand(0)->shape()) / 2.0f; in HandleTriangularSolve() 737 bytes_accessed += GetShapeSize(hlo->operand(1)->shape()); in HandleTriangularSolve() 739 current_properties_[kBytesAccessedKey] = bytes_accessed; in HandleTriangularSolve() 752 float bytes_accessed = GetShapeSize(hlo->operand(0)->shape()) / 2.0f; in HandleCholesky() local 754 bytes_accessed += GetShapeSize(hlo->operand(0)->shape()) / 2.0f; in HandleCholesky() 756 current_properties_[kBytesAccessedKey] = bytes_accessed; in HandleCholesky() [all …]
|
D | hlo_profile_printer_data.proto | 34 reserved 6; // bytes_accessed used to erroneously be a float 35 int64 bytes_accessed = 9; field
|
D | hlo_profile_printer.cc | 63 instruction_info.bytes_accessed(), in PrintHloProfile()
|
D | hlo_cost_analysis.h | 182 float bytes_accessed() const; 193 int64_t bytes_accessed(const HloInstruction& hlo) const;
|
D | hlo_execution_profile.cc | 106 instruction_info->set_bytes_accessed(cost_analysis.bytes_accessed(*hlo)); in CreateHloProfilePrinterData()
|
D | memory_space_assignment.cc | 386 float total_bytes_accessed = cost_analysis_.bytes_accessed(instruction); in GetInstructionElapsedDueToMemory() 411 float total_bytes_accessed = cost_analysis_.bytes_accessed(instruction); in GetInstructionElapsedDueToMemory()
|
/external/tensorflow/tensorflow/core/profiler/convert/ |
D | op_metrics_db_combiner.cc | 75 dst->set_bytes_accessed(src.bytes_accessed() + dst->bytes_accessed()); in CombineOpMetrics() 108 src_memory_accessed.bytes_accessed() + in CombineMemoryAccessedBreakdown() 109 dst_memory_accessed->bytes_accessed()); in CombineMemoryAccessedBreakdown()
|
D | op_metrics_to_record.h | 42 return SafeDivide(metrics.bytes_accessed(), PicoToNano(metrics.time_ps())); in GigaBytesPerSecondPerCore() 109 SafeDivide(metrics.flops(), metrics.bytes_accessed())); in SetRooflineMetrics() 110 record->set_bound_by((metrics.bytes_accessed() != 0) in SetRooflineMetrics()
|
D | xplane_to_op_metrics_db_test.cc | 44 uint64 bytes_accessed, int64_t occurences, in AddTensorFlowTpuOpEvent() argument
|
D | op_profile_builder.cc | 160 metrics->set_raw_bytes_accessed(op_metrics.bytes_accessed()); in PopulateOpMetricsNode()
|
D | xplane_to_op_metrics_db.cc | 373 /*children_time_ps=*/0, costs.flops, costs.bytes_accessed); in ConvertDeviceTraceXPlaneToOpMetricsDb()
|
/external/tensorflow/tensorflow/core/profiler/utils/ |
D | op_utils.cc | 80 int64_t bytes_accessed, in EnterOp() argument 97 op_metrics->set_bytes_accessed(op_metrics->bytes_accessed() + in EnterOp() 98 bytes_accessed * occurrences); in EnterOp()
|
D | op_metrics_db_utils.cc | 62 tf_op_metrics->set_bytes_accessed(tf_op_metrics->bytes_accessed() + in UpdateTfOpMetricsWithDeviceOpMetrics() 63 device_op_metrics.bytes_accessed()); in UpdateTfOpMetricsWithDeviceOpMetrics()
|
D | cost_utils.h | 44 uint64 bytes_accessed = 0LL; member
|
D | op_utils.h | 77 uint64 children_time_ps, int64_t flops, int64_t bytes_accessed,
|
/external/tensorflow/tensorflow/compiler/xla/service/cpu/ |
D | parallel_task_assignment.cc | 70 const int64_t bytes_accessed = in GetParallelTaskCount() local 71 std::max(int64_t{1}, cost_analysis_->bytes_accessed(*instruction)); in GetParallelTaskCount() 74 static_cast<float>(bytes_accessed); in GetParallelTaskCount() 96 10 * cost_analysis_->bytes_accessed(*instruction); in GetParallelTaskCount()
|
/external/tensorflow/tensorflow/core/profiler/protobuf/ |
D | op_metrics.proto | 54 uint64 bytes_accessed = 5; field 65 uint64 bytes_accessed = 3; field
|
/external/tensorflow/tensorflow/compiler/xla/service/gpu/ |
D | gpu_compiler.cc | 1350 cost_analysis.bytes_accessed()); in RunBackend()
|