/external/tensorflow/tensorflow/compiler/mlir/tensorflow/transforms/ |
D | gpu_fusion.cc | 70 Value side_input; in matchAndRewrite() local 80 side_input = add_op.y(); in matchAndRewrite() 86 side_input = add_op.x(); in matchAndRewrite() 97 if (side_input) state.operands.push_back(side_input); in matchAndRewrite()
|
/external/tensorflow/tensorflow/core/kernels/ |
D | fused_batch_norm_op.cu.cc | 140 const T* __restrict__ side_input, float epsilon, in run() 161 shifted_v += U(side_input[index]); in run() 207 const IT* side_input = reinterpret_cast<const IT*>(_side_input); in run() local 244 reinterpret_cast<const half2*>(side_input)[index]); in run() 276 shifted_v = __hadd(shifted_v, side_input[index]); in run() 301 const T* side_input, float epsilon, T* out) { in FusedBatchNormInferenceMetaKernel() argument 313 scale, offset, mean, var, side_input, in FusedBatchNormInferenceMetaKernel() 325 typename TTypes<T, 4>::ConstTensor side_input, U epsilon, in operator ()() 355 estimated_mean.data(), estimated_variance.data(), side_input.data(), \ in operator ()() 358 const bool no_side_input = side_input.dimensions().TotalSize() == 0; in operator ()() [all …]
|
D | fused_batch_norm_ex_op_test.cc | 159 Output side_input = ops::Const(root.WithOpName("side_input"), in RunFusedBatchNorm() local 171 ops::Add(root.WithOpName("with_side_input"), fwd.y, side_input); in RunFusedBatchNorm() 251 Output side_input = ops::Const(root.WithOpName("side_input"), in RunFusedBatchNormEx() local 262 side_inputs.push_back({side_input.name(), 0, t_dtype}); in RunFusedBatchNormEx() 364 Tensor side_input(t_dtype, input_shape); in VerifyTensorsNear() local 365 side_input.flat<T>().setRandom(); in VerifyTensorsNear() 366 side_input.flat<T>() += side_input.flat<T>().constant(static_cast<T>(5.0)); in VerifyTensorsNear() 381 is_training ? empty : var, side_input, &fbn_forward, in VerifyTensorsNear() 386 run_default(y_backprop, side_input, scale, offset, in VerifyTensorsNear() 391 is_training ? empty : var, side_input, &fbn_ex_forward, in VerifyTensorsNear()
|
D | fused_batch_norm_op.cc | 97 const Tensor* side_input, U epsilon, U exponential_avg_factor, in operator ()() 103 OP_REQUIRES(context, side_input == nullptr, in operator ()() 235 const Tensor* side_input, U epsilon, U exponential_avg_factor, in operator ()() 241 OP_REQUIRES(context, side_input == nullptr, in operator ()() 762 const Tensor& estimated_variance, const Tensor* side_input, in operator ()() 829 const bool has_side_input = side_input != nullptr; in operator ()() 841 side_input->tensor<T, 4>(), epsilon, activation_mode, in operator ()() 912 side_input != nullptr in operator ()() 913 ? StreamExecutorUtil::AsDeviceMemory<U>(*side_input) in operator ()() 1166 typename TTypes<T, 4>::ConstTensor side_input, U epsilon, \ [all …]
|
D | fused_batch_norm_op.h | 56 typename TTypes<T, 4>::ConstTensor side_input, U epsilon,
|
/external/tensorflow/tensorflow/compiler/xla/service/gpu/ |
D | cudnn_fused_conv_rewriter.cc | 41 HloInstruction* side_input; member 77 HloInstruction* side_input = nullptr; in FindConvWithRelu() local 100 auto side_input_pattern = Op(&side_input); in FindConvWithRelu() 129 } else if (side_input == nullptr && Match(addend, side_input_pattern)) { in FindConvWithRelu() 130 CHECK(side_input); in FindConvWithRelu() 180 side_input, in FindConvWithRelu() 205 if (match.side_input) { in TryRewriteToCudnnForwardRelu() 235 if (match.side_input) { in TryRewriteToCudnnForwardRelu() 236 args.push_back(match.side_input); in TryRewriteToCudnnForwardRelu()
|
D | gpu_conv_runner.cc | 111 se::DeviceMemory<OutputType> side_input(params.fusion->side_input_buf); in RunGpuConvForwardActivation() local 113 if (side_input.is_null()) { in RunGpuConvForwardActivation() 125 side_input = output_buf; in RunGpuConvForwardActivation() 131 filter_buf, params.config.conv_desc, side_input, in RunGpuConvForwardActivation()
|
/external/tensorflow/tensorflow/core/grappler/optimizers/ |
D | remapper.cc | 105 int side_input = kMissingIndex; member 947 matched->side_input = add_regular_fanin_1.node_index(); in IsCpuCompatibleDataType() 955 matched->side_input = add_regular_fanin_0.node_index(); in IsCpuCompatibleDataType() 1385 << (matched.side_input != kMissingIndex in IsCpuCompatibleDataType() 1386 ? graph->node(matched.side_input).name() in IsCpuCompatibleDataType() 1411 if (matched.side_input != kMissingIndex) { in IsCpuCompatibleDataType() 1413 const NodeDef& side_input = graph->node(matched.side_input); in IsCpuCompatibleDataType() local 1414 fused_op.add_input(side_input.name()); // 5: side_input in IsCpuCompatibleDataType() 1437 if (matched.side_input != kMissingIndex) { in IsCpuCompatibleDataType()
|
D | mkl_remapper_test.cc | 343 auto side_input = in TEST_F() local 347 ops::Cast(s.WithOpName("side_input_cast"), side_input, DT_FLOAT); in TEST_F()
|
D | remapper_test.cc | 255 auto side_input = Placeholder(s.WithOpName("side_input"), DT_FLOAT, in TEST_F() local 258 ops::Cast(s.WithOpName("side_input_cast"), side_input, DT_HALF); in TEST_F()
|
/external/tensorflow/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/ |
D | lhlo_gpu_ops.td | 147 // side_input * side_input_scale + 155 Arg<LHLO_Buffer, "", [MemRead]>:$side_input,
|
/external/tensorflow/tensorflow/compiler/mlir/hlo/tests/ |
D | lhlo_gpu_ops.mlir | 155 …17x9x9xf16>, %filter : memref<3x3x17x32xf16>, %bias : memref<32xf16>, %side_input: memref<32xf16>… 157 …"lmhlo_gpu.conv_forward_fused_with_side_input"(%input, %filter, %bias, %side_input, %output, %scra…
|
/external/tensorflow/tensorflow/stream_executor/cuda/ |
D | cuda_dnn.h | 223 const DeviceMemory<float>& side_input, const dnn::BatchDescriptor& x_desc, 237 const DeviceMemory<float>& side_input, const dnn::BatchDescriptor& x_desc, 594 const DeviceMemory<U>& side_input, const dnn::BatchDescriptor& x_desc,
|
D | cuda_dnn.cc | 3452 const DeviceMemory<float>& side_input, const dnn::BatchDescriptor& x_desc, in DoBatchNormalizationForward() argument 3463 offset, estimated_mean, estimated_variance, side_input, x_desc, in DoBatchNormalizationForward() 3475 const DeviceMemory<float>& side_input, const dnn::BatchDescriptor& x_desc, in DoBatchNormalizationForward() argument 3486 estimated_mean, estimated_variance, side_input, x_desc, in DoBatchNormalizationForward() 3500 const DeviceMemory<U>& side_input, const dnn::BatchDescriptor& x_desc, in DoBatchNormalizationForwardImpl() argument 3524 if (side_input.is_null()) { in DoBatchNormalizationForwardImpl() 3560 !side_input.is_null()) { in DoBatchNormalizationForwardImpl() 3602 /*zData=*/side_input.opaque(), in DoBatchNormalizationForwardImpl()
|
/external/tensorflow/tensorflow/stream_executor/rocm/ |
D | rocm_dnn.h | 261 const DeviceMemory<float>& side_input, const dnn::BatchDescriptor& x_desc, 275 const DeviceMemory<float>& side_input, const dnn::BatchDescriptor& x_desc, 710 const DeviceMemory<U>& side_input, const dnn::BatchDescriptor& x_desc,
|
D | rocm_dnn.cc | 3526 const DeviceMemory<float>& side_input, const dnn::BatchDescriptor& x_desc, in DoBatchNormalizationForward() argument 3536 estimated_mean, estimated_variance, side_input, x_desc, scale_offset_desc, in DoBatchNormalizationForward() 3546 const DeviceMemory<float>& side_input, const dnn::BatchDescriptor& x_desc, in DoBatchNormalizationForward() argument 3556 estimated_mean, estimated_variance, side_input, x_desc, scale_offset_desc, in DoBatchNormalizationForward() 3568 const DeviceMemory<U>& side_input, const dnn::BatchDescriptor& x_desc, in DoBatchNormalizationForwardImpl() argument
|
/external/tensorflow/tensorflow/core/grappler/costs/ |
D | op_level_cost_estimator_test.cc | 280 auto side_input = op_context.op_info.add_inputs(); in DescribeFusedConv2DBiasActivation() local 283 DescribeTensor4D(batch, ox, oy, oz, side_input); in DescribeFusedConv2DBiasActivation() 285 DescribeTensor4D(batch, oz, ox, oy, side_input); in DescribeFusedConv2DBiasActivation() 290 DescribeTensor5D(batch, oz / kVecWidth, ox, oy, kVecWidth, side_input); in DescribeFusedConv2DBiasActivation()
|
D | op_level_cost_estimator.cc | 1745 auto& side_input = op_context.op_info.inputs(3); in PredictFusedConv2DBiasActivation() local 1771 if (side_input.shape().dim_size() > 0) { in PredictFusedConv2DBiasActivation() 1772 component_ops.push_back(FusedChildContext(op_context, "Mul", side_input, in PredictFusedConv2DBiasActivation() 1773 {side_input, side_input_scale})); in PredictFusedConv2DBiasActivation()
|
/external/tensorflow/tensorflow/stream_executor/ |
D | dnn.h | 1016 const DeviceMemory<float>& side_input, const dnn::BatchDescriptor& x_desc, in DoBatchNormalizationForward() argument 1035 const DeviceMemory<float>& side_input, const dnn::BatchDescriptor& x_desc, in DoBatchNormalizationForward() argument
|
D | stream.h | 251 const DeviceMemory<float> &side_input, const dnn::BatchDescriptor &x_desc, 276 const DeviceMemory<float> &side_input, const dnn::BatchDescriptor &x_desc,
|
D | stream.cc | 347 const DeviceMemory<float> &side_input, const dnn::BatchDescriptor &x_desc, in ThenBatchNormalizationForward() argument 360 this, x, scale, offset, estimated_mean, estimated_variance, side_input, in ThenBatchNormalizationForward() 398 const DeviceMemory<float> &side_input, const dnn::BatchDescriptor &x_desc, in ThenBatchNormalizationForward() argument 411 this, x, scale, offset, estimated_mean, estimated_variance, side_input, in ThenBatchNormalizationForward()
|
/external/tensorflow/tensorflow/compiler/tf2tensorrt/convert/ |
D | convert_nodes.cc | 3573 TRT_ShapedWeights side_input = inputs.at(3).weights(); in ConvertFusedConv2DBiasActivation() local 3574 if (side_input.count() != 0) { in ConvertFusedConv2DBiasActivation()
|
/external/tensorflow/tensorflow/compiler/mlir/tensorflow/ir/ |
D | tf_generated_ops.td | 18304 Variadic<TensorOf<[TF_Bfloat16, TF_Float16, TF_Float32]>>:$side_input,
|