/external/tensorflow/tensorflow/compiler/xla/service/ |
D | dfs_hlo_visitor.h | 74 virtual Status HandleElementwiseUnary(HloInstructionPtr hlo); 75 virtual Status HandleElementwiseBinary(HloInstructionPtr hlo); 77 virtual Status HandleClamp(HloInstructionPtr hlo) = 0; 78 virtual Status HandleSelect(HloInstructionPtr hlo) = 0; 79 virtual Status HandleMaximum(HloInstructionPtr hlo) { in HandleMaximum() argument 80 return HandleElementwiseBinary(hlo); in HandleMaximum() 82 virtual Status HandleMinimum(HloInstructionPtr hlo) { in HandleMinimum() argument 83 return HandleElementwiseBinary(hlo); in HandleMinimum() 85 virtual Status HandleConcatenate(HloInstructionPtr hlo) = 0; 86 virtual Status HandleConvert(HloInstructionPtr hlo) { in HandleConvert() argument [all …]
|
D | bfloat16_conversion_folding.cc | 35 Status DefaultAction(HloInstruction* hlo) override; 47 Status TryFoldBF16Conversions(HloInstruction* hlo); 52 Status FoldOutputConversions(HloInstruction* hlo); 57 Status FoldOperandConversion(HloInstruction* hlo, int64 operand_index); 65 HloInstruction* hlo) { in FoldOutputConversions() argument 66 std::vector<HloInstruction*> materialized_users = hlo->users(); in FoldOutputConversions() 67 hlo->mutable_shape()->set_element_type(BF16); in FoldOutputConversions() 70 TF_RETURN_IF_ERROR(user->ReplaceAllUsesWith(hlo)); in FoldOutputConversions() 77 HloInstruction* hlo, int64 operand_index) { in FoldOperandConversion() argument 79 auto operand = hlo->mutable_operand(operand_index); in FoldOperandConversion() [all …]
|
D | bfloat16_normalization.cc | 35 Status DefaultAction(HloInstruction* hlo) override; 50 Status HandleInstruction(HloInstruction* hlo); 53 Status InsertConvertAfterOutput(HloInstruction* hlo, PrimitiveType to, 58 Status ChangeOutputTypeThenInsertConvertBack(HloInstruction* hlo, 63 Status InsertConvertBeforeOperand(HloInstruction* hlo, int64 operand_idx, 70 HloInstruction* hlo, 79 HloInstruction* hlo, PrimitiveType to, HloComputation* computation) { in InsertConvertAfterOutput() argument 80 bool is_root = computation->root_instruction() == hlo; in InsertConvertAfterOutput() 81 std::vector<HloInstruction*> materialized_users = hlo->users(); in InsertConvertAfterOutput() 84 HloInstruction::CreateConvert(hlo->shape(), hlo)); in InsertConvertAfterOutput() [all …]
|
D | hlo_element_type_converter.cc | 38 HloInstruction* ToElementType(HloInstruction* hlo, PrimitiveType type) { in ToElementType() argument 39 if (hlo->shape().element_type() != type) { in ToElementType() 40 Shape shape = ShapeUtil::ChangeElementType(hlo->shape(), type); in ToElementType() 41 hlo = hlo->parent()->AddInstruction( in ToElementType() 42 HloInstruction::CreateConvert(shape, hlo)); in ToElementType() 44 CHECK_EQ(hlo->shape().element_type(), type); in ToElementType() 45 return hlo; in ToElementType() 48 bool HasOperandType(HloInstruction* hlo, PrimitiveType type) { in HasOperandType() argument 49 for (HloInstruction* operand : hlo->operands()) { in HasOperandType() 85 HloInstruction* ConvertTupleElements(HloInstruction* hlo, in ConvertTupleElements() argument [all …]
|
D | implicit_broadcast_remover.cc | 50 Status HandleElementwiseBinary(HloInstruction* hlo) override { in HandleElementwiseBinary() argument 51 return ReplaceImplicitBroadcastOperands(hlo); in HandleElementwiseBinary() 54 Status HandleClamp(HloInstruction* hlo) override { in HandleClamp() argument 56 return ReplaceImplicitBroadcastOperands(hlo); in HandleClamp() 67 Status ReplaceImplicitBroadcastOperands(HloInstruction* hlo) { in ReplaceImplicitBroadcastOperands() argument 68 auto fadd = [hlo](std::unique_ptr<HloInstruction> x) { in ReplaceImplicitBroadcastOperands() 69 return hlo->parent()->AddInstruction(std::move(x)); in ReplaceImplicitBroadcastOperands() 73 for (int i = 0; i < hlo->operand_count(); ++i) { in ReplaceImplicitBroadcastOperands() 74 HloInstruction* operand = hlo->mutable_operand(i); in ReplaceImplicitBroadcastOperands() 75 if (!ShapeUtil::SameDimensions(hlo->shape(), operand->shape())) { in ReplaceImplicitBroadcastOperands() [all …]
|
D | hlo_execution_profile.cc | 87 for (const HloInstruction* hlo : computation->instructions()) { in CreateHloProfilePrinterData() local 90 instruction_info->set_long_name(hlo->ToString()); in CreateHloProfilePrinterData() 92 hlo->ToString(HloPrintOptions().set_compact_operands(true))); in CreateHloProfilePrinterData() 93 instruction_info->set_category(hlo->ToCategory()); in CreateHloProfilePrinterData() 94 instruction_info->set_flop_count(cost_analysis.flop_count(*hlo)); in CreateHloProfilePrinterData() 96 cost_analysis.transcendental_count(*hlo)); in CreateHloProfilePrinterData() 97 instruction_info->set_bytes_accessed(cost_analysis.bytes_accessed(*hlo)); in CreateHloProfilePrinterData() 99 cost_analysis.optimal_seconds(*hlo)); in CreateHloProfilePrinterData() 101 hlo_profile_index_map.GetProfileIndexFor(*hlo)); in CreateHloProfilePrinterData() 117 void HloExecutionProfile::SetCyclesTakenBy(const HloInstruction* hlo, in SetCyclesTakenBy() argument [all …]
|
D | bfloat16_support.cc | 22 bool BFloat16Support::SupportsBF16Operand(const HloInstruction& hlo, in SupportsBF16Operand() argument 24 switch (hlo.opcode()) { in SupportsBF16Operand() 34 return hlo.operand(0)->shape().element_type() == BF16; in SupportsBF16Operand() 41 bool BFloat16Support::SupportsBF16Output(const HloInstruction& hlo) const { in SupportsBF16Output() 42 switch (hlo.opcode()) { in SupportsBF16Output() 51 return hlo.shape().element_type() == BF16; in SupportsBF16Output() 58 bool BFloat16Support::SupportsMixedPrecisions(const HloInstruction& hlo) const { in SupportsMixedPrecisions() 59 switch (hlo.opcode()) { in SupportsMixedPrecisions() 76 const HloInstruction& hlo, int64 operand_index) { in EffectiveOperandPrecisionIsOutputPrecision() argument 77 switch (hlo.opcode()) { in EffectiveOperandPrecisionIsOutputPrecision() [all …]
|
D | elemental_ir_emitter.cc | 997 const HloInstruction* hlo, llvm::Value* x) const { in EmitReducePrecision() argument 998 if (hlo->operand(0)->shape().element_type() != F32) { in EmitReducePrecision() 1001 return EmitReducePrecisionFloat(x, /*exponent_bits=*/hlo->exponent_bits(), in EmitReducePrecision() 1002 /*mantissa_bits=*/hlo->mantissa_bits(), in EmitReducePrecision() 1086 const llvm_ir::IrArray::Index& target_index, const HloInstruction& hlo, in ElementwiseSourceIndex() argument 1088 CHECK(hlo.IsElementwise()) in ElementwiseSourceIndex() 1089 << "HLO " << hlo.ToString() << " is not elementwise."; in ElementwiseSourceIndex() 1091 const Shape& operand_shape = hlo.operand(operand_no)->shape(); in ElementwiseSourceIndex() 1099 if (ShapeUtil::CompatibleIgnoringElementType(operand_shape, hlo.shape())) { in ElementwiseSourceIndex() 1105 CHECK_EQ(ShapeUtil::Rank(operand_shape), ShapeUtil::Rank(hlo.shape())); in ElementwiseSourceIndex() [all …]
|
D | hlo_evaluator.h | 125 Status DefaultAction(HloInstruction* hlo) override { in DefaultAction() argument 126 return hlo->Visit(typed_visitors_.at(hlo->shape().element_type()).get()); in DefaultAction() 129 Status Preprocess(HloInstruction* hlo) override; 131 Status Postprocess(HloInstruction* hlo) override; 161 const Literal& GetEvaluatedLiteralFor(const HloInstruction* hlo) { in GetEvaluatedLiteralFor() argument 162 if (hlo->IsConstant()) { in GetEvaluatedLiteralFor() 163 return hlo->literal(); in GetEvaluatedLiteralFor() 165 auto it = evaluated_.find(hlo); in GetEvaluatedLiteralFor() 167 << "could not find evaluated value for: " << hlo->ToString(); in GetEvaluatedLiteralFor()
|
D | BUILD | 28 srcs = ["hlo.proto"], 51 ":hlo", 61 ":hlo", 75 ":hlo", 94 ":hlo", 108 ":hlo", 126 ":hlo", 157 ":hlo", 169 ":hlo", 187 ":hlo", [all …]
|
D | bfloat16_normalization_test.cc | 35 bool SupportsBF16Operand(const HloInstruction& hlo, in SupportsBF16Operand() argument 37 if (hlo.opcode() == HloOpcode::kAdd || in SupportsBF16Operand() 38 hlo.opcode() == HloOpcode::kSubtract || in SupportsBF16Operand() 39 hlo.opcode() == HloOpcode::kReduce || in SupportsBF16Operand() 40 hlo.opcode() == HloOpcode::kTuple || in SupportsBF16Operand() 41 hlo.opcode() == HloOpcode::kGetTupleElement) { in SupportsBF16Operand() 47 bool SupportsBF16Output(const HloInstruction& hlo) const override { in SupportsBF16Output() 48 if (hlo.opcode() == HloOpcode::kAdd || hlo.opcode() == HloOpcode::kReduce || in SupportsBF16Output() 49 hlo.opcode() == HloOpcode::kSubtract || in SupportsBF16Output() 50 hlo.opcode() == HloOpcode::kTuple || in SupportsBF16Output() [all …]
|
D | bfloat16_conversion_folding_test.cc | 35 bool SupportsBF16Operand(const HloInstruction& hlo, in SupportsBF16Operand() argument 37 if (hlo.opcode() == HloOpcode::kAdd || in SupportsBF16Operand() 38 hlo.opcode() == HloOpcode::kSubtract || in SupportsBF16Operand() 39 hlo.opcode() == HloOpcode::kTuple || in SupportsBF16Operand() 40 hlo.opcode() == HloOpcode::kGetTupleElement) { in SupportsBF16Operand() 46 bool SupportsBF16Output(const HloInstruction& hlo) const override { in SupportsBF16Output() 47 if (hlo.opcode() == HloOpcode::kAdd || in SupportsBF16Output() 48 hlo.opcode() == HloOpcode::kSubtract || in SupportsBF16Output() 49 hlo.opcode() == HloOpcode::kTuple || in SupportsBF16Output() 50 hlo.opcode() == HloOpcode::kGetTupleElement) { in SupportsBF16Output() [all …]
|
D | hlo_cost_analysis.cc | 41 Status HloCostAnalysis::Preprocess(const HloInstruction* hlo) { in Preprocess() argument 51 float bytes_accessed = shape_size_(hlo->shape()); in Preprocess() 52 for (const HloInstruction* operand : hlo->operands()) { in Preprocess() 60 Status HloCostAnalysis::Postprocess(const HloInstruction* hlo) { in Postprocess() argument 76 TF_RET_CHECK(hlo_properties_.emplace(hlo, current_properties_).second); in Postprocess() 113 const HloInstruction& hlo, const string& key, in GetPropertyForHlo() argument 115 auto it = hlo_to_properties.find(&hlo); in GetPropertyForHlo() 123 Status HloCostAnalysis::HandleElementwiseUnary(const HloInstruction* hlo) { in HandleElementwiseUnary() argument 124 return HandleElementwiseOp(hlo); in HandleElementwiseUnary() 127 Status HloCostAnalysis::HandleElementwiseBinary(const HloInstruction* hlo) { in HandleElementwiseBinary() argument [all …]
|
/external/tensorflow/tensorflow/compiler/xla/service/gpu/ |
D | stream_assignment.cc | 27 bool StreamAssignment::HasStreamAssigned(const HloInstruction& hlo) const { in HasStreamAssigned() 28 return hlo_to_stream_number_.count(&hlo); in HasStreamAssigned() 31 int StreamAssignment::StreamNumberForHlo(const HloInstruction& hlo) const { in StreamNumberForHlo() 32 return FindOrDie(hlo_to_stream_number_, &hlo); in StreamNumberForHlo() 35 void StreamAssignment::AssignStreamToHlo(const HloInstruction* hlo, in AssignStreamToHlo() argument 41 InsertOrDie(&hlo_to_stream_number_, hlo, stream_no); in AssignStreamToHlo() 42 VLOG(2) << "Assign stream #" << stream_no << " to " << hlo->ToString(); in AssignStreamToHlo() 59 const HloInstruction& hlo, const StreamAssignment& stream_assignment, in ComputeStreamToAssign() argument 62 if (hlo.opcode() == HloOpcode::kParameter || in ComputeStreamToAssign() 63 hlo.opcode() == HloOpcode::kConstant) { in ComputeStreamToAssign() [all …]
|
D | hlo_schedule.cc | 84 for (const HloInstruction* hlo : thunk_launch_order) { in GpuHloOrdering() local 85 predecessor_map->SetReachable(hlo, hlo); in GpuHloOrdering() 86 if (stream_assignment.HasStreamAssigned(*hlo)) { in GpuHloOrdering() 90 immediate_preds.insert(immediate_preds.end(), hlo->operands().begin(), in GpuHloOrdering() 91 hlo->operands().end()); in GpuHloOrdering() 93 hlo->control_predecessors().begin(), in GpuHloOrdering() 94 hlo->control_predecessors().end()); in GpuHloOrdering() 98 const int stream_no = stream_assignment.StreamNumberForHlo(*hlo); in GpuHloOrdering() 102 predecessor_map->SetReachabilityToUnion(immediate_preds, hlo); in GpuHloOrdering() 103 last_instruction_per_stream[stream_no] = hlo; in GpuHloOrdering() [all …]
|
D | instruction_fusion.cc | 27 bool IsFusile(const HloInstruction& hlo) { in IsFusile() argument 28 return (hlo.IsElementwise() && hlo.operand_count() > 0) || in IsFusile() 29 hlo.opcode() == HloOpcode::kBroadcast || in IsFusile() 30 hlo.opcode() == HloOpcode::kConcatenate || in IsFusile() 31 hlo.opcode() == HloOpcode::kDynamicSlice || in IsFusile() 32 hlo.opcode() == HloOpcode::kDynamicUpdateSlice || in IsFusile() 33 hlo.opcode() == HloOpcode::kFusion || in IsFusile() 34 hlo.opcode() == HloOpcode::kGetTupleElement || in IsFusile() 35 hlo.opcode() == HloOpcode::kPad || in IsFusile() 36 hlo.opcode() == HloOpcode::kReduce || in IsFusile() [all …]
|
D | cudnn_batchnorm_thunk.cc | 84 const BufferAllocation::Slice& output, const HloInstruction* hlo) in CudnnBatchNormForwardInferenceThunk() argument 85 : Thunk(Thunk::Kind::kCudnnBatchNormForwardInference, hlo), in CudnnBatchNormForwardInferenceThunk() 94 CHECK_EQ(hlo->opcode(), HloOpcode::kCustomCall); in CudnnBatchNormForwardInferenceThunk() 95 CHECK_EQ(hlo->custom_call_target(), in CudnnBatchNormForwardInferenceThunk() 98 LayoutUtil::LayoutsInShapesEqual(hlo->shape(), hlo->operand(0)->shape())); in CudnnBatchNormForwardInferenceThunk() 99 CHECK_EQ(hlo->shape().element_type(), F32) << "Not yet implemented"; in CudnnBatchNormForwardInferenceThunk() 140 const BufferAllocation::Slice& output_tuple, const HloInstruction* hlo) in CudnnBatchNormForwardTrainingThunk() argument 141 : Thunk(Thunk::Kind::kCudnnBatchNormForwardTraining, hlo), in CudnnBatchNormForwardTrainingThunk() 151 CHECK_EQ(hlo->opcode(), HloOpcode::kCustomCall); in CudnnBatchNormForwardTrainingThunk() 152 CHECK_EQ(hlo->custom_call_target(), kCudnnBatchNormForwardTrainingCallTarget); in CudnnBatchNormForwardTrainingThunk() [all …]
|
D | gpu_copy_insertion.cc | 38 HloInstruction* hlo) { in FindOrInsertCopy() argument 39 HloInstruction*& copy = hlo_to_copy_map_[hlo]; in FindOrInsertCopy() 41 TF_ASSIGN_OR_RETURN(copy, hlo->parent()->DeepCopyInstruction(hlo)); in FindOrInsertCopy() 56 for (HloInstruction* hlo : in Run() 60 HloInstruction* operand = hlo->mutable_operand(n); in Run() 68 TF_RETURN_IF_ERROR(hlo->ReplaceOperandWith(n, copy)); in Run() 74 if (IsCustomCallToDnnBatchNorm(*hlo)) { in Run() 78 for (int64 i = 0; i < hlo->operand_count() - 2; ++i) { in Run() 81 } else if (IsCustomCallToDnnConvolution(*hlo)) { in Run() 84 for (int64 i = 0; i < hlo->operand_count() - 2; ++i) { in Run() [all …]
|
D | ir_emission_utils.cc | 62 bool ImplementedAsGemm(const HloInstruction& hlo) { in ImplementedAsGemm() argument 64 if (hlo.parent() != hlo.GetModule()->entry_computation()) { in ImplementedAsGemm() 69 if (hlo.opcode() == HloOpcode::kDot) { in ImplementedAsGemm() 70 const Shape& lhs_shape = hlo.operand(0)->shape(); in ImplementedAsGemm() 71 const Shape& rhs_shape = hlo.operand(1)->shape(); in ImplementedAsGemm() 75 if (AreValidGemmShapes(lhs_shape, rhs_shape, hlo.shape())) { in ImplementedAsGemm() 84 if (hlo.opcode() == HloOpcode::kFusion && in ImplementedAsGemm() 85 hlo.fusion_kind() == HloInstruction::FusionKind::kTransposeDot && in ImplementedAsGemm() 86 hlo.fused_expression_root()->opcode() == HloOpcode::kDot) { in ImplementedAsGemm() 100 bool IsCustomCallToDnnBatchNorm(const HloInstruction& hlo) { in IsCustomCallToDnnBatchNorm() argument [all …]
|
D | hlo_to_ir_bindings.cc | 139 llvm::Value* HloToIrBindings::GetTypedIrValue(const HloInstruction& hlo, in GetTypedIrValue() argument 143 ShapeUtil::GetSubshape(hlo.shape(), shape_index), module_); in GetTypedIrValue() 154 ir_value->setName(llvm_ir::AsStringRef(llvm_ir::IrName(&hlo, "raw"))); in GetTypedIrValue() 155 typed_ir_value->setName(llvm_ir::AsStringRef(llvm_ir::IrName(&hlo, "typed"))); in GetTypedIrValue() 159 void HloToIrBindings::BindHloToIrValue(const HloInstruction& hlo, in BindHloToIrValue() argument 162 VLOG(2) << "Binding " << hlo.ToString(); in BindHloToIrValue() 164 const Shape& hlo_shape = hlo.shape(); in BindHloToIrValue() 165 llvm::Value* typed_ir_value = GetTypedIrValue(hlo, shape_index, ir_value); in BindHloToIrValue() 167 if (!BoundToIrValue(hlo)) { in BindHloToIrValue() 169 InsertOrDie(&base_ptrs_, &hlo, ShapeTree<llvm::Value*>(hlo_shape, nullptr)); in BindHloToIrValue() [all …]
|
D | hlo_to_ir_bindings.h | 53 void BindHloToIrValue(const HloInstruction& hlo, llvm::Value* ir_value, 64 bool BoundToIrValue(const HloInstruction& hlo) const { in BoundToIrValue() argument 65 return base_ptrs_.count(&hlo); in BoundToIrValue() 73 llvm::Value* GetBasePointer(const HloInstruction& hlo, 75 auto it = base_ptrs_.find(&hlo); 76 CHECK(it != base_ptrs_.end()) << hlo.ToString(); 87 llvm_ir::IrArray GetIrArray(const HloInstruction& hlo, 99 llvm::Value* GetTypedIrValue(const HloInstruction& hlo,
|
/external/tensorflow/tensorflow/compiler/xla/service/cpu/ |
D | cpu_instruction_fusion.cc | 29 bool CanBeLoopFused(const HloInstruction& hlo) { in CanBeLoopFused() argument 32 return hlo.IsElementwise() || // in CanBeLoopFused() 33 hlo.opcode() == HloOpcode::kBitcast || in CanBeLoopFused() 34 hlo.opcode() == HloOpcode::kBroadcast || in CanBeLoopFused() 35 hlo.opcode() == HloOpcode::kConcatenate || in CanBeLoopFused() 36 hlo.opcode() == HloOpcode::kDynamicSlice || in CanBeLoopFused() 37 hlo.opcode() == HloOpcode::kDynamicUpdateSlice || in CanBeLoopFused() 38 hlo.opcode() == HloOpcode::kPad || in CanBeLoopFused() 39 hlo.opcode() == HloOpcode::kReshape || in CanBeLoopFused() 40 hlo.opcode() == HloOpcode::kReverse || in CanBeLoopFused() [all …]
|
D | conv_canonicalization.cc | 33 for (HloInstruction* hlo : in Run() 35 if (hlo->opcode() == HloOpcode::kConvolution && in Run() 36 !PotentiallyImplementedAsEigenConvolution(*hlo)) { in Run() 38 hlo->convolution_dimension_numbers(); in Run() 56 HloInstruction* input = hlo->mutable_operand(0); in Run() 77 HloInstruction* kernel = hlo->mutable_operand(1); in Run() 104 new_conv_dims[0] = hlo->shape().dimensions(output_batch_dim); in Run() 108 hlo->shape().dimensions(dnums.output_spatial_dimensions(i)); in Run() 111 new_conv_dims[num_dims - 1] = hlo->shape().dimensions(output_feature_dim); in Run() 113 ShapeUtil::MakeShape(hlo->shape().element_type(), new_conv_dims); in Run() [all …]
|
D | elemental_ir_emitter.cc | 109 const HloInstruction* hlo, in MakeElementGenerator() argument 111 if (hlo->opcode() == HloOpcode::kMap) { in MakeElementGenerator() 112 return [this, hlo, &operand_to_generator]( in MakeElementGenerator() 115 for (int i = 0; i < hlo->operand_count(); i++) { in MakeElementGenerator() 117 operand_to_generator.at(hlo->operand(i))( in MakeElementGenerator() 118 ElementwiseSourceIndex(index, *hlo, 0))); in MakeElementGenerator() 121 return ir_emitter_->EmitScalarCall(hlo->shape().element_type(), in MakeElementGenerator() 122 hlo->to_apply(), operands, in MakeElementGenerator() 123 llvm_ir::IrName(hlo)); in MakeElementGenerator() 126 return ElementalIrEmitter::MakeElementGenerator(hlo, operand_to_generator); in MakeElementGenerator()
|
/external/tensorflow/tensorflow/compiler/xla/service/llvm_ir/ |
D | alias_analysis.cc | 34 void AliasAnalysis::AddAliasingInformationToIrArray(const HloInstruction& hlo, in AddAliasingInformationToIrArray() argument 37 if (hlo.opcode() == HloOpcode::kParameter) { in AddAliasingInformationToIrArray() 43 assignment_.GetAllSlices(&hlo, /*index=*/{}); in AddAliasingInformationToIrArray() 68 assignment_, hlo); in AddAliasingInformationToIrArray() 80 if (hlo.opcode() == HloOpcode::kParameter) { in AddAliasingInformationToIrArray() 85 &hlo) != parameter_instructions.end()) { in AddAliasingInformationToIrArray() 126 const BufferAssignment& assignment, const HloInstruction& hlo) { in GetNoaliasMetadataForBuffer() argument 152 for (HloInstruction* user : hlo.users()) { in GetNoaliasMetadataForBuffer() 159 add_buffers_to_worklist(&hlo); in GetNoaliasMetadataForBuffer() 160 for (HloInstruction* operand : hlo.operands()) { in GetNoaliasMetadataForBuffer()
|