• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #ifndef TENSORFLOW_COMPILER_MLIR_TENSORFLOW_TRANSFORMS_PASSES_H_
17 #define TENSORFLOW_COMPILER_MLIR_TENSORFLOW_TRANSFORMS_PASSES_H_
18 
19 #include <memory>
20 
21 #include "mlir/IR/MLIRContext.h"  // from @llvm-project
22 #include "mlir/IR/PatternMatch.h"  // from @llvm-project
23 #include "mlir/Pass/Pass.h"  // from @llvm-project
24 
25 namespace mlir {
26 
27 // Creates a pass that breaks up an island with multiple ops into multiple
28 // islands, each with a single op.
29 std::unique_ptr<OperationPass<ModuleOp>> CreateBreakUpIslandsPass();
30 
31 // Creates a pass that converts mlir functions consisting of mlir ops into a
32 // tf_executor dialect as a single island.
33 std::unique_ptr<OperationPass<FuncOp>>
34 CreateFunctionalToExecutorDialectConversionPass();
35 
36 // Creates a pass that lifts inner ops of tf_executor.island ops in
37 // tf_executor.graph into the same block as the tf_executor.graph.
38 std::unique_ptr<OperationPass<FuncOp>>
39 CreateExecutorDialectToFunctionalConversionPass();
40 
41 namespace TF {
42 // Creates a pass that drops `shape_invariant` attribute from While/WhileRegion
43 // ops.
44 std::unique_ptr<OperationPass<FuncOp>> CreateDropWhileShapeInvariantPass();
45 
46 // Transforms functional control flow operations in the TensorFlow dialect to
47 // MLIR Control Flow Graph (CFG) form.
48 std::unique_ptr<OperationPass<FuncOp>> CreateTFFunctionalControlFlowToCFG();
49 
50 // Transforms functional control flow operations in the TensorFlow dialect to
51 // their region based counterparts.
52 std::unique_ptr<OperationPass<ModuleOp>>
53 CreateTFFunctionalControlFlowToRegions();
54 
55 // Transforms region bases control flow operations in the TensorFlow dialect to
56 // their functional counterparts.
57 std::unique_ptr<OperationPass<ModuleOp>>
58 CreateTFRegionControlFlowToFunctional();
59 
60 // Materialize the MlirPassthroughOp by replacing it with the MLIR module
61 // attached as an attribute.
62 std::unique_ptr<OperationPass<FuncOp>> CreateMaterializePassthroughOpPass();
63 
64 // Performs Shape Inference on the TensorFlow dialect using the global registry.
65 std::unique_ptr<OperationPass<ModuleOp>> CreateTFShapeInferencePass();
66 
67 // Guarantee that all FuncOp's have a single use.
68 std::unique_ptr<OperationPass<ModuleOp>> CreateGuaranteeAllFuncsOneUsePass();
69 
70 // Optional pass which will unroll BatchMatMul and use only MatMul
71 std::unique_ptr<OperationPass<FuncOp>> CreateUnrollBatchMatMulPassPass();
72 
73 // Optional pass which will map TF BatchMatMul to TF Einsum
74 std::unique_ptr<OperationPass<FuncOp>> CreateBatchMatMulToEinsumPass();
75 
76 // Optimizes Tensorflow graph.
77 std::unique_ptr<OperationPass<FuncOp>> CreateTFOptimizePass();
78 
79 // Creates pass to rewrite RecvTPUEmbeddingActivationsOp and
80 // SendTPUEmbeddingGradients ops to internal variants.
81 std::unique_ptr<OperationPass<FuncOp>> CreateRewriteTPUEmbeddingOpsPass();
82 
83 // Performs specific fusion for GPU targets.
84 std::unique_ptr<OperationPass<FuncOp>> CreateGpuOpFusionPass();
85 
86 // Create a pass that convert ops that copy tensors between devices, e.g.
87 // tf.Identity.
88 std::unique_ptr<OperationPass<mlir::FuncOp>>
89 CreateTensorDeviceCopyConversionPass();
90 
91 // Returns a pass that folds tf.BroadcastTo nodes with subsequent nodes if they
92 // have built in broadcasting support.
93 std::unique_ptr<OperationPass<FuncOp>> CreateBroadcastFoldPass();
94 
95 struct LayoutOptimizationPipelineOptions
96     : public PassPipelineOptions<LayoutOptimizationPipelineOptions> {
97   Option<std::string> force_data_format{
98       *this, "force-data-format",
99       llvm::cl::desc("Force data format for all layout sensitive ops")};
100   Option<bool> skip_fold_transpose_in_ops{
101       *this, "skip-fold-transpose-in-ops",
102       llvm::cl::desc("Skip folding transpose operands in Ops which can support "
103                      "different layouts.")};
104 };
105 
106 // Layout optimization assigns optimal data layout for layout sensitive
107 // operations, and cancels all redundant transposes.
108 void CreateLayoutOptimizationPipeline(
109     OpPassManager& pm,  // NOLINT - MLIR contract is pass by mutable reference.
110     const LayoutOptimizationPipelineOptions& options);
111 
112 struct StandardPipelineOptions
113     : public PassPipelineOptions<StandardPipelineOptions> {
114   Option<bool> enable_inliner{*this, "enable-inliner",
115                               llvm::cl::desc("Enable inliner."),
116                               llvm::cl::init(false)};
117   Option<bool> form_clusters{*this, "form-clusters",
118                              llvm::cl::desc("Enable Cluster Formation pass."),
119                              llvm::cl::init(false)};
120 };
121 
122 // Propagates the pass manager with the passes involved in transforming or
123 // optimizing an MLIR graph without any target specialization.
124 // NOLINTNEXTLINE - MLIR contract is pass by mutable reference.
125 void CreateTFStandardPipeline(OpPassManager& pm,
126                               const StandardPipelineOptions& options);
127 
128 // Propagates device attributes of resources from callers to callees.
129 std::unique_ptr<OperationPass<ModuleOp>> CreateResourceDeviceInferencePass();
130 
131 // Creates a pass that promotes resource reads/writes in the main function to
132 // inputs and outputs of the main function, assuming that resource operations
133 // have already been decomposed and function calls have already been inlined.
134 // The pass also annotates the input arguments for resources with the indices
135 // of their aliasing output arguments.
136 std::unique_ptr<OperationPass<ModuleOp>> CreatePromoteResourcesToArgsPass();
137 
138 // Creates a pass that promotes tf.VarHandleOp to resource arguments for all
139 // functions.
140 std::unique_ptr<OperationPass<ModuleOp>> CreatePromoteVarHandlesToArgsPass();
141 
142 // Creates a pass that converts readonly reference variables to the
143 // corresponding resource variables.
144 std::unique_ptr<OperationPass<FuncOp>>
145 CreateConvertReadonlyReferenceVariablesToResourceVariablesPass();
146 
147 // Creates a simple device assignment pass on TF dialect for CoreRT use case.
148 std::unique_ptr<OperationPass<FuncOp>> CreateSimpleTFDeviceAssignmentPass(
149     llvm::StringRef default_device);
150 
151 // Performs resource lifting on the function body to hoist resource variable
152 // accesses outside all control flow statements.
153 LogicalResult ResourceLiftingForFunctionalControlFlow(FuncOp function);
154 
155 // Converts stack ops into operations on local variables, which can later be
156 // removed by resource lifting. Requires known maximum sizes of stacks and
157 // known element shapes of push ops.
158 std::unique_ptr<OperationPass<ModuleOp>> CreateStackOpsDecompositionPass();
159 
160 // Converts tensor list operations into operations on buffers and sizes. Needs
161 // static shapes and known max element count.
162 std::unique_ptr<OperationPass<ModuleOp>> CreateTensorListOpsDecompositionPass();
163 
164 // Converts tensor array ops into operations on local variables, which can later
165 // be removed by resource lifting. Requires known sizes and known element shapes
166 // (either defined in TensorArrayV3 or implied in the first write).
167 std::unique_ptr<OperationPass<ModuleOp>>
168 CreateTensorArrayOpsDecompositionPass();
169 
170 // Create a pass that legalize HLO to TF dialect.
171 std::unique_ptr<OperationPass<FuncOp>> CreateLegalizeHloToTfPass();
172 
173 // Addds the HLO to TF rewrite patterns to the specified pattern list.
174 void PopulateLegalizeHloToTfPatterns(OwningRewritePatternList* patterns,
175                                      MLIRContext* context);
176 
177 // Matches sequence of ops to TensorFlow fused kernels. This pass should not be
178 // generally used beyond exporting to runtimes that supports these ops. In the
179 // future these fusions may be codegen'd automatically.
180 std::unique_ptr<OperationPass<FuncOp>> CreateFusedKernelMatcherPass();
181 
182 // Fuses operations defining `ContractionFusableInterface` interface into the
183 // contraction operations (MatMul, Conv2D, etc...). This is a more general
184 // version of `CreateFusedKernelMatcherPass` that relies on codegen to compose
185 // contraction fusions together.
186 std::unique_ptr<OperationPass<FuncOp>> CreateContractionFusionPass();
187 
188 // Creates function pass to select device index/fold tf.DeviceIndex.
189 std::unique_ptr<OperationPass<FuncOp>> CreateDeviceIndexSelectorPass();
190 
191 // Creates function pass to replace InitializeTableFromTextFileV2Ops with
192 // LookupTableImportV2Op ops.
193 std::unique_ptr<OperationPass<FuncOp>> CreateInitTextFileToImportPass();
194 
195 // Creates function pass to cluster TensorFlow ops by host. The program
196 // generated by this pass will have one function per host where all operations
197 // in the same function are placed on the same host. Each result of the per-host
198 // function will have a "tf.device" attribute which specifies the device
199 // assignment of the result.
200 std::unique_ptr<OperationPass<mlir::ModuleOp>> CreateClusterTFOpsByHostPass();
201 
202 // Creates a pass to insert tf_device.send and tf_device.receive ops to make
203 // sure any argument of any op is on the same host of the op itself.
204 std::unique_ptr<OperationPass<mlir::ModuleOp>> CreateCrossHostTransferPass();
205 
206 // Creates a pass that adds the device attribute to every tf.Const op based on
207 // the device attribute of the operations that read its result. If the result of
208 // a tf.Const op is read by operations placed on multiple devices, then the pass
209 // will replicate the tf.Const op once for each device.
210 std::unique_ptr<OperationPass<ModuleOp>> CreateConstantOpDeviceAssignmentPass();
211 
212 }  // namespace TF
213 
214 namespace tf_executor {
215 // Creates a pass to merge IslandOps from TFExecutor dialect.
216 std::unique_ptr<OperationPass<FuncOp>> CreateTFExecutorIslandCoarseningPass();
217 
218 // Creates a pass to merge IslandOps for operation marked for execution on TPU.
219 // This is a V1 backward compatibility.
220 std::unique_ptr<OperationPass<ModuleOp>>
221 CreateTFExecutorTPUV1IslandCoarseningPass();
222 
223 // Creates a pass to outlining TPU clusters from single IslandOp into a nested
224 // module suitable for being processed as-if it was a V2 module.
225 // This is a V1 backward compatibility.
226 std::unique_ptr<OperationPass<ModuleOp>>
227 CreateTFExecutorTPUV1IslandOutliningPass();
228 
229 // Creates a pass to inline calls to the nested TPU module, this reverses the
230 // effect of the `TFExecutorTPUV1IslandOutlining` pass above.
231 // This is a V1 backward compatibility.
232 std::unique_ptr<OperationPass<ModuleOp>>
233 CreateTFExecutorTPUV1IslandInliningPass();
234 
235 // Creates a pass to prune tf_executor.graph from dead nodes.
236 std::unique_ptr<OperationPass<FuncOp>> CreateTFExecutorGraphPruningPass(
237     llvm::ArrayRef<std::string> ops_to_preserve = {});
238 }  // namespace tf_executor
239 
240 namespace TFDevice {
241 // Creates a pass that forms clusters from instructions that are assigned to
242 // same device.
243 std::unique_ptr<OperationPass<FuncOp>> CreateClusterFormationPass();
244 
245 // Sinks `tf.Const` operations in the ClusterOp region using them. This is
246 // performed in order to limit the number of values implicitly captured in this
247 // region before outlining.
248 std::unique_ptr<OperationPass<FuncOp>> CreateClusterConstantSinkingPass();
249 
250 // Creates a pass that outlines regions of tf_device.launch operations.
251 std::unique_ptr<OperationPass<ModuleOp>> CreateClusterOutliningPass();
252 
253 // Creates a pass that clusters ops into tf_device::ClusterOp regions
254 // according to a policy specified by the pass options.
255 std::unique_ptr<FunctionPass> CreateClusterOpsByPolicyPass();
256 
257 // A pass that decomposes composite resource operations into primitive ones like
258 // ReadVariableOp, AssignVariableOp and other computations to facilitate
259 // transformations like resource op lifting.
260 std::unique_ptr<OperationPass<FuncOp>> CreateDecomposeResourceOpsPass();
261 
262 // Creates a pass that lifts operations on external resource variables from
263 // device computation nested in `tf_device::LaunchOp` out so that resource
264 // variable load operations are all before device computation while resource
265 // variable store operations are all after device computation. After this pass,
266 // device computation no longer interacts with external resource variables.
267 std::unique_ptr<OperationPass<ModuleOp>> CreateResourceOpLiftingPass();
268 
269 // Lifts resource operations from tf_device.launch_func ops nested in `op`
270 // outside. Returns a failure if there are remaining resource-type values that
271 // can not be lifted.
272 LogicalResult LiftResourceOps(Operation* op);
273 
274 // Creates a pass that hoists invariant operations in a `tf_device.replicate`.
275 std::unique_ptr<OperationPass<FuncOp>> CreateReplicateInvariantOpHoistingPass();
276 
277 // Creates a pass that forms replica `tf_executor.island` from a single
278 // `tf_device.replicate` island.
279 std::unique_ptr<OperationPass<FuncOp>> CreateReplicateToIslandPass();
280 
281 // Creates a pass that creates `tf_executor.island` from a single
282 // `tf_device.parallel_execute` island.
283 std::unique_ptr<OperationPass<FuncOp>> CreateParallelExecuteToIslandsPass();
284 
285 // Creates a pass that annotates whether a LaunchFuncOp's parameters have the
286 // same data across replicas.
287 std::unique_ptr<OperationPass<ModuleOp>>
288 CreateAnnotateParameterReplicationPass();
289 
290 // Creates a pass that marks unsupported ops in device cluster for outside
291 // compilation.
292 std::unique_ptr<OperationPass<ModuleOp>>
293 CreateMarkOpsForOutsideCompilationPass();
294 
295 // Creates a pass that merges control flow with similar predicates.
296 std::unique_ptr<OperationPass<ModuleOp>> CreateMergeControlFlowPass();
297 
298 // Creates a pass that hoists a `tf_device.launch` body and assigns a `device`
299 // attribute to each TensorFlow dialect op in the body based on the `device`
300 // attribute on the `tf_device.launch`.
301 std::unique_ptr<OperationPass<FuncOp>> CreateLaunchToDeviceAttributePass();
302 
303 // Creates a pass that hoists a `tf_device.replicate` body and replicates each
304 // TensorFlow dialect op in the body based on its `device` attribute and the
305 // `devices` attribute on the `tf_device.replicate`.
306 std::unique_ptr<OperationPass<mlir::ModuleOp>> CreateTFDeviceReplicationPass();
307 }  // namespace TFDevice
308 
309 namespace TFTPU {
310 // Creates a pass that forms clusters from operations of the same
311 // `_tpu_replicate` attribute.
312 std::unique_ptr<OperationPass<ModuleOp>> CreateTPUClusterFormationPass();
313 
314 // Creates a pass that cleans up `_tpu_replicate` attribute on operations
315 // that are inside a cluster.
316 std::unique_ptr<OperationPass<ModuleOp>>
317 CreateTPUClusterCleanupAttributesPass();
318 
319 // Creates a pass that removes Identity/IdentityN ops from a cluster.
320 std::unique_ptr<OperationPass<ModuleOp>> CreateTPUIdentityPruningPass();
321 
322 // Creates a pass that allows TPU program inputs to have layouts determined at
323 // run time.
324 std::unique_ptr<OperationPass<ModuleOp>> CreateTPUDynamicLayoutPass();
325 
326 // Creates a pass that remaps and assigns padding map from a
327 // `tf_device.launch_func` `padding_map` attribute to its encapsulated function.
328 std::unique_ptr<OperationPass<ModuleOp>> CreateTPUDynamicPaddingMapperPass();
329 
330 // Creates a pass that adds `tf.ReadVariableOp` to a TPU cluster for resources
331 // the cluster only writes to.
332 std::unique_ptr<OperationPass<ModuleOp>> CreateTPUResourceReadForWritePass();
333 
334 // Creates a pass that reorders partitiioned resource reads and replicated
335 // inputs.
336 std::unique_ptr<OperationPass<FuncOp>>
337 CreateTPUReorderReplicateAndPartitionedInputsPass();
338 
339 // Creates a pass that partitions unpartitioned resource read/write to
340 // partitioned resource variables.
341 std::unique_ptr<OperationPass<FuncOp>>
342 CreateTPUResourceReadsWritesPartitioningPass();
343 
344 // Creates a pass that rewrites `tf_device.launch_func` on TPUs into TPU runtime
345 // ops.
346 std::unique_ptr<OperationPass<ModuleOp>> CreateTPURewritePass();
347 
348 // Creates a pass that identifies XLASharding ops in launch op for TPU
349 // computation.
350 std::unique_ptr<OperationPass<ModuleOp>> CreateTPUShardingIdentificationPass();
351 
352 // Creates a pass that moves `tf.AssignVariableOp` into a
353 // `tf_device.parallel_execute` region if the `tf.AssignVariableOp` is the
354 // only consumer of a `tf_device.parallel_execute` result.
355 std::unique_ptr<OperationPass<FuncOp>>
356 CreateTPUParallelExecuteSinkResourceWritePass();
357 
358 // Creates a pass that merges device variable reads/updates into the surrounded
359 // TPUExecute node. This allows the execute node to perform in-place variable
360 // updates.
361 std::unique_ptr<OperationPass<FuncOp>> CreateTPUMergeVariablesWithExecutePass();
362 
363 // Creates a pass that wraps ReadVariableOp/AssignVariable op that consumes a
364 // packed tensor to have same device placement as underlying TPU device.
365 std::unique_ptr<OperationPass<FuncOp>> CreateTPUColocateCompositeResourceOps();
366 
367 // Creates a pass that adds ops which perform formatting on variables at
368 // run-time according to compilation result.
369 std::unique_ptr<OperationPass<ModuleOp>> CreateTPUVariableReformattingPass();
370 
371 // Creates a pass that wraps ops with the same `_xla_outside_compilation`
372 // attribute value in a tf_device.launch op with host device assignment.
373 std::unique_ptr<OperationPass<ModuleOp>>
374 CreateOutsideCompiledToHostLaunchPass();
375 
376 // Creates a pass that extracts outside compilation (CPU ops inside TPU cluster)
377 // at head/tail of TPU cluster to run before/after TPU computation.
378 std::unique_ptr<OperationPass<ModuleOp>>
379 CreateTPUExtractHeadTailOutsideCompilationPass();
380 
381 // Creates a pass that expands outside compilation cluster at the head/tail of
382 // TPU computation by adding outside compilation attribute to identity/cast ops
383 // that are only used for host computation.
384 std::unique_ptr<OperationPass<FuncOp>> CreateTPUHostComputationExpansionPass();
385 
386 // Creates a pass that updates inputs to TPU embedding layer enqueue ops so that
387 // correct ops are invoked during training and evaluation.
388 std::unique_ptr<OperationPass<FuncOp>>
389 CreateTPUUpdateEmbeddingEnqueueOpInputsPass();
390 
391 // Creates a pass that extract outside compilation (CPU ops inside TPU cluster)
392 // ops to a separate parallel_execute region to run on CPU.
393 std::unique_ptr<OperationPass<ModuleOp>>
394 CreateTPUExtractOutsideCompilationPass();
395 
396 // Creates a pass that propagates TPU devices to users.
397 std::unique_ptr<OperationPass<FuncOp>> CreateTPUDevicePropagationPass();
398 
399 // Populates the supplied passmanager with the passes required to run the
400 // bridge.
401 void CreateTPUBridgePipeline(OpPassManager& pm);
402 
403 // Populates the supplied passmanager with the passes required to run the
404 // bridge in V1 mode.
405 void CreateTPUBridgePipelineV1(OpPassManager& pm);
406 
407 // Creates a pass that replicates the tf._TPUCompileMlir op on each host that
408 // needs the compiled program. It helps avoid transferring the compiled binary
409 // between hosts.
410 std::unique_ptr<OperationPass<mlir::ModuleOp>>
411 CreateTPUCompileOpReplicationPass();
412 
413 }  // namespace TFTPU
414 
415 #define GEN_PASS_REGISTRATION
416 #include "tensorflow/compiler/mlir/tensorflow/transforms/tf_passes.h.inc"
417 
418 }  // namespace mlir
419 
420 #endif  // TENSORFLOW_COMPILER_MLIR_TENSORFLOW_TRANSFORMS_PASSES_H_
421