• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #ifndef TENSORFLOW_COMPILER_MLIR_TENSORFLOW_TRANSFORMS_PASSES_H_
17 #define TENSORFLOW_COMPILER_MLIR_TENSORFLOW_TRANSFORMS_PASSES_H_
18 
19 #include <memory>
20 
21 #include "mlir/IR/MLIRContext.h"  // from @llvm-project
22 #include "mlir/IR/PatternMatch.h"  // from @llvm-project
23 #include "mlir/Pass/Pass.h"  // from @llvm-project
24 #include "tensorflow/compiler/mlir/tensorflow/ir/tf_device.h"
25 
26 namespace mlir {
27 
28 // Creates a pass that breaks up an island with multiple ops into multiple
29 // islands, each with a single op.
30 std::unique_ptr<OperationPass<ModuleOp>> CreateBreakUpIslandsPass();
31 
32 // Creates a pass that converts mlir functions consisting of mlir ops into a
33 // tf_executor dialect as a single island.
34 std::unique_ptr<OperationPass<FuncOp>>
35 CreateFunctionalToExecutorDialectConversionPass();
36 
37 // Creates a pass that lifts inner ops of tf_executor.island ops in
38 // tf_executor.graph into the same block as the tf_executor.graph.
39 std::unique_ptr<OperationPass<FuncOp>>
40 CreateExecutorDialectToFunctionalConversionPass();
41 
42 namespace TF {
43 // Creates a pass that drops `shape_invariant` attribute from While/WhileRegion
44 // ops.
45 std::unique_ptr<OperationPass<FuncOp>> CreateDropWhileShapeInvariantPass();
46 
47 // Creates a pass that drops `shape_invariant` attribute from While/WhileRegion
48 // ops within device cluster.
49 std::unique_ptr<OperationPass<FuncOp>>
50 CreateDropWhileShapeInvariantInDeviceClusterPass();
51 
52 // Creates a pass that moves writes to replicate invariant resource variables
53 // outside tf_device.replicate op.
54 std::unique_ptr<OperationPass<FuncOp>>
55 CreateHoistReplicateInvariantResourceWritesPass();
56 
57 // Transforms functional control flow operations in the TensorFlow dialect to
58 // MLIR Control Flow Graph (CFG) form.
59 std::unique_ptr<OperationPass<FuncOp>> CreateTFFunctionalControlFlowToCFG();
60 
61 // Transforms functional control flow operations in the TensorFlow dialect to
62 // their region based counterparts.
63 std::unique_ptr<OperationPass<ModuleOp>>
64 CreateTFFunctionalControlFlowToRegions();
65 
66 // Transforms region bases control flow operations in the TensorFlow dialect to
67 // their functional counterparts.
68 std::unique_ptr<OperationPass<ModuleOp>>
69 CreateTFRegionControlFlowToFunctional();
70 
71 // Materialize the MlirPassthroughOp by replacing it with the MLIR module
72 // attached as an attribute.
73 std::unique_ptr<OperationPass<FuncOp>> CreateMaterializePassthroughOpPass();
74 
75 // Performs Shape Inference on the TensorFlow dialect using the global registry.
76 std::unique_ptr<OperationPass<ModuleOp>> CreateTFShapeInferencePass();
77 
78 // Performs checks that the whole module does not contain dynamic shapes.
79 std::unique_ptr<OperationPass<FuncOp>> CreateTFEnsureStaticShapesPass();
80 
81 // Guarantee that all FuncOp's have a single use.
82 std::unique_ptr<OperationPass<ModuleOp>> CreateGuaranteeAllFuncsOneUsePass();
83 
84 // Optional pass which will unroll BatchMatMul and use only MatMul
85 std::unique_ptr<OperationPass<FuncOp>> CreateUnrollBatchMatMulPassPass();
86 
87 // Optional pass which will map TF BatchMatMul to TF Einsum
88 std::unique_ptr<OperationPass<FuncOp>> CreateBatchMatMulToEinsumPass();
89 
90 // Optimizes Tensorflow graph.
91 std::unique_ptr<OperationPass<FuncOp>> CreateTFOptimizePass();
92 
93 // Creates pass to rewrite RecvTPUEmbeddingActivationsOp and
94 // SendTPUEmbeddingGradients ops to internal variants.
95 std::unique_ptr<OperationPass<FuncOp>> CreateRewriteTPUEmbeddingOpsPass();
96 
97 // Performs specific fusion for GPU targets.
98 std::unique_ptr<OperationPass<FuncOp>> CreateGpuOpFusionPass();
99 
100 // Create a pass that convert ops that copy tensors between devices, e.g.
101 // tf.Identity.
102 std::unique_ptr<OperationPass<mlir::FuncOp>>
103 CreateTensorDeviceCopyConversionPass();
104 
105 // Returns a pass that folds tf.BroadcastTo nodes with subsequent nodes if they
106 // have built in broadcasting support.
107 std::unique_ptr<OperationPass<FuncOp>> CreateBroadcastFoldPass();
108 
109 struct LayoutOptimizationPipelineOptions
110     : public PassPipelineOptions<LayoutOptimizationPipelineOptions> {
111   Option<std::string> force_data_format{
112       *this, "force-data-format",
113       llvm::cl::desc("Force data format for all layout sensitive ops")};
114   Option<bool> skip_fold_transpose_in_ops{
115       *this, "skip-fold-transpose-in-ops",
116       llvm::cl::desc("Skip folding transpose operands in Ops which can support "
117                      "different layouts.")};
118 };
119 
120 // Layout optimization assigns optimal data layout for layout sensitive
121 // operations, and cancels all redundant transposes.
122 void CreateLayoutOptimizationPipeline(
123     OpPassManager& pm,  // NOLINT - MLIR contract is pass by mutable reference.
124     const LayoutOptimizationPipelineOptions& options);
125 
126 struct StandardPipelineOptions
127     : public PassPipelineOptions<StandardPipelineOptions> {
128   Option<bool> enable_inliner{*this, "enable-inliner",
129                               llvm::cl::desc("Enable inliner."),
130                               llvm::cl::init(false)};
131   Option<bool> form_clusters{*this, "form-clusters",
132                              llvm::cl::desc("Enable Cluster Formation pass."),
133                              llvm::cl::init(false)};
134 };
135 
136 // Propagates the pass manager with the passes involved in transforming or
137 // optimizing an MLIR graph without any target specialization.
138 // NOLINTNEXTLINE - MLIR contract is pass by mutable reference.
139 void CreateTFStandardPipeline(OpPassManager& pm,
140                               const StandardPipelineOptions& options);
141 
142 // Propagates device attributes of resources from callers to callees.
143 std::unique_ptr<OperationPass<ModuleOp>> CreateResourceDeviceInferencePass();
144 
145 // Creates a pass that promotes resource reads/writes in the main function to
146 // inputs and outputs of the main function, assuming that resource operations
147 // have already been decomposed and function calls have already been inlined.
148 // The pass also annotates the input arguments for resources with the indices
149 // of their aliasing output arguments.
150 std::unique_ptr<OperationPass<ModuleOp>> CreatePromoteResourcesToArgsPass();
151 
152 // Creates a pass that promotes tf.VarHandleOp to resource arguments for all
153 // functions.
154 std::unique_ptr<OperationPass<ModuleOp>> CreatePromoteVarHandlesToArgsPass();
155 
156 // Creates a pass that converts readonly reference variables to the
157 // corresponding resource variables.
158 std::unique_ptr<OperationPass<FuncOp>>
159 CreateConvertReadonlyReferenceVariablesToResourceVariablesPass();
160 
161 // Creates a simple device assignment pass on TF dialect for CoreRT use case.
162 std::unique_ptr<OperationPass<FuncOp>> CreateSimpleTFDeviceAssignmentPass(
163     llvm::StringRef default_device);
164 
165 // Performs resource lifting on the function body to hoist resource variable
166 // accesses outside all control flow statements.
167 LogicalResult ResourceLiftingForFunctionalControlFlow(FuncOp function);
168 
169 // Converts stack ops into operations on local variables, which can later be
170 // removed by resource lifting. Requires known maximum sizes of stacks and
171 // known element shapes of push ops.
172 std::unique_ptr<OperationPass<ModuleOp>> CreateStackOpsDecompositionPass();
173 
174 // Converts tensor list operations into operations on buffers and sizes. Needs
175 // static shapes and known max element count.
176 std::unique_ptr<OperationPass<ModuleOp>> CreateTensorListOpsDecompositionPass();
177 
178 // Converts tensor array ops into operations on local variables, which can later
179 // be removed by resource lifting. Requires known sizes and known element shapes
180 // (either defined in TensorArrayV3 or implied in the first write).
181 std::unique_ptr<OperationPass<ModuleOp>>
182 CreateTensorArrayOpsDecompositionPass();
183 
184 // Create a pass that legalize HLO to TF dialect.
185 std::unique_ptr<OperationPass<FuncOp>> CreateLegalizeHloToTfPass();
186 
187 // Addds the HLO to TF rewrite patterns to the specified pattern list.
188 void PopulateLegalizeHloToTfPatterns(OwningRewritePatternList* patterns,
189                                      MLIRContext* context);
190 
191 // Matches sequence of ops to TensorFlow fused kernels. This pass should not be
192 // generally used beyond exporting to runtimes that supports these ops. In the
193 // future these fusions may be codegen'd automatically.
194 std::unique_ptr<OperationPass<FuncOp>> CreateFusedKernelMatcherPass();
195 
196 // Creates function pass to select device index/fold tf.DeviceIndex.
197 std::unique_ptr<OperationPass<FuncOp>> CreateDeviceIndexSelectorPass();
198 
199 // Creates function pass to replace InitializeTableFromTextFileV2Ops with
200 // LookupTableImportV2Op ops.
201 std::unique_ptr<OperationPass<FuncOp>> CreateInitTextFileToImportPass(
202     std::string saved_model_dir = "");
203 
204 // Creates function pass to cluster TensorFlow ops by host. The program
205 // generated by this pass will have one function per host where all operations
206 // in the same function are placed on the same host. Each result of the per-host
207 // function will have a "tf.device" attribute which specifies the device
208 // assignment of the result.
209 std::unique_ptr<OperationPass<mlir::ModuleOp>> CreateClusterTFOpsByHostPass();
210 
211 // Creates a pass to insert tf_device.send and tf_device.receive ops to make
212 // sure any argument of any op is on the same host of the op itself.
213 std::unique_ptr<OperationPass<mlir::ModuleOp>> CreateCrossHostTransferPass();
214 
215 // Creates a pass that adds the device attribute to every tf.Const op based on
216 // the device attribute of the operations that read its result. If the result of
217 // a tf.Const op is read by operations placed on multiple devices, then the pass
218 // will replicate the tf.Const op once for each device.
219 std::unique_ptr<OperationPass<ModuleOp>> CreateConstantOpDeviceAssignmentPass();
220 
221 // Populates the supplied passmanager with the passes required to export
222 // to TensorFlow Graph.
223 void AddGraphExportLoweringPasses(OpPassManager& pm);
224 
225 // Returns pass that verifies whether all functions in module are of single
226 // tf_executor.graph and each tf_executor.island in tf_executor.graph only has a
227 // single op.
228 std::unique_ptr<OperationPass<ModuleOp>> CreateVerifySuitableForExportPass();
229 
230 // Returns pass that prepares TPU computation to be legal for export to
231 // TensorFlow.
232 std::unique_ptr<OperationPass<FuncOp>>
233 CreatePrepareTpuComputationForTfExportPass();
234 
235 // Rewrites ops that require quantized inputs or outputs to ops that allow
236 // non-quantized inputs and outputs.
237 std::unique_ptr<OperationPass<FuncOp>> CreateLowerQuantizedPass();
238 }  // namespace TF
239 
240 namespace tf_executor {
241 // Creates a pass to merge IslandOps from TFExecutor dialect.
242 std::unique_ptr<OperationPass<FuncOp>> CreateTFExecutorIslandCoarseningPass();
243 
244 // Creates a pass to merge IslandOps for operation marked for execution on TPU.
245 // This is a V1 backward compatibility.
246 std::unique_ptr<OperationPass<ModuleOp>>
247 CreateTFExecutorTPUV1IslandCoarseningPass();
248 
249 // Creates a pass to outlining TPU clusters from single IslandOp into a nested
250 // module suitable for being processed as-if it was a V2 module.
251 // This is a V1 backward compatibility.
252 std::unique_ptr<OperationPass<ModuleOp>>
253 CreateTFExecutorTPUV1IslandOutliningPass();
254 
255 // Creates a pass to inline calls to the nested TPU module, this reverses the
256 // effect of the `TFExecutorTPUV1IslandOutlining` pass above.
257 // This is a V1 backward compatibility.
258 std::unique_ptr<OperationPass<ModuleOp>>
259 CreateTFExecutorTPUV1IslandInliningPass();
260 
261 // Creates a pass to prune tf_executor.graph from dead nodes.
262 std::unique_ptr<OperationPass<FuncOp>> CreateTFExecutorGraphPruningPass(
263     llvm::ArrayRef<std::string> ops_to_preserve = {});
264 }  // namespace tf_executor
265 
266 namespace TFDevice {
267 // Creates a pass that forms clusters from instructions that are assigned to
268 // same device.
269 std::unique_ptr<OperationPass<FuncOp>> CreateClusterFormationPass();
270 
271 // Sinks `tf.Const` operations in the ClusterOp region using them. This is
272 // performed in order to limit the number of values implicitly captured in this
273 // region before outlining.
274 std::unique_ptr<OperationPass<FuncOp>> CreateClusterConstantSinkingPass(
275     llvm::function_ref<bool(tf_device::ClusterOp, ElementsAttr)> filter = {});
276 
277 // Creates a pass that outlines regions of tf_device.cluster operations.
278 std::unique_ptr<OperationPass<ModuleOp>> CreateClusterOutliningPass();
279 
280 // Creates a pass that outlines regions of tf_device.launch operations.
281 std::unique_ptr<OperationPass<ModuleOp>> CreateLaunchOutliningPass();
282 
283 // Creates a pass that converts tf_device::LaunchFuncOp into
284 // TF::PartitionedCallOp.
285 std::unique_ptr<OperationPass<ModuleOp>> CreateConvertLaunchFuncToTFCallPass();
286 
287 // A pass that decomposes composite resource operations into primitive ones like
288 // ReadVariableOp, AssignVariableOp and other computations to facilitate
289 // transformations like resource op lifting.
290 std::unique_ptr<OperationPass<FuncOp>> CreateDecomposeResourceOpsPass();
291 
292 // A pass that decomposes composite resource operations in device cluster
293 // (tf_device.cluster op) into primitive ones like ReadVariableOp,
294 // AssignVariableOp and other computations to facilitate transformations like
295 // resource op lifting.
296 std::unique_ptr<OperationPass<ModuleOp>>
297 CreateDecomposeResourceOpsInClusterPass();
298 
299 // Creates a pass that marks TPU cluster input-output pairs reading and writing
300 // to same resource variable as aliases.
301 std::unique_ptr<OperationPass<ModuleOp>> CreateMarkInputOutputAliasesPass();
302 
303 // Creates a pass that lifts operations on external resource variables from
304 // device computation nested in `tf_device::LaunchOp` out so that resource
305 // variable load operations are all before device computation while resource
306 // variable store operations are all after device computation. After this pass,
307 // device computation no longer interacts with external resource variables.
308 std::unique_ptr<OperationPass<ModuleOp>> CreateResourceOpLiftingPass();
309 
310 // Creates a pass that lifts operations from the main function.
311 std::unique_ptr<OperationPass<ModuleOp>>
312 CreateResourceOpLiftingForMainFunctionPass();
313 
314 // Lifts resource operations from tf_device.launch_func ops nested in `op`
315 // outside. Returns a failure if there are remaining resource-type values that
316 // can not be lifted.
317 LogicalResult LiftResourceOps(Operation* op);
318 
319 // Creates a pass that hoists invariant operations in a `tf_device.replicate`.
320 std::unique_ptr<OperationPass<FuncOp>> CreateReplicateInvariantOpHoistingPass();
321 
322 // Creates a pass that forms replica `tf_executor.island` from a single
323 // `tf_device.replicate` island.
324 std::unique_ptr<OperationPass<FuncOp>> CreateReplicateToIslandPass();
325 
326 // Creates a pass that creates `tf_executor.island` from a single
327 // `tf_device.parallel_execute` island.
328 std::unique_ptr<OperationPass<FuncOp>> CreateParallelExecuteToIslandsPass();
329 
330 // Creates a pass that annotates whether a LaunchFuncOp's parameters have the
331 // same data across replicas.
332 std::unique_ptr<OperationPass<ModuleOp>>
333 CreateAnnotateParameterReplicationPass();
334 
335 // Creates a pass that marks unsupported ops in device cluster for outside
336 // compilation.
337 std::unique_ptr<OperationPass<ModuleOp>>
338 CreateMarkOpsForOutsideCompilationPass();
339 
340 // Creates a pass that merges control flow with similar predicates.
341 std::unique_ptr<OperationPass<ModuleOp>> CreateMergeControlFlowPass();
342 
343 // Creates a pass that wraps each TensorFlow dialect with `device` attribute
344 // in a `tf_device.launch` op with the same `device` attribute.
345 std::unique_ptr<OperationPass<FuncOp>> CreateDeviceAttributeToLaunchPass();
346 
347 // Creates a pass that hoists a `tf_device.launch` body and assigns a `device`
348 // attribute to each TensorFlow dialect op in the body based on the `device`
349 // attribute on the `tf_device.launch`.
350 std::unique_ptr<OperationPass<FuncOp>> CreateLaunchToDeviceAttributePass();
351 
352 // Creates a pass that hoists a `tf_device.replicate` body and replicates each
353 // TensorFlow dialect op in the body based on its `device` attribute and the
354 // `devices` attribute on the `tf_device.replicate`.
355 std::unique_ptr<OperationPass<mlir::ModuleOp>> CreateTFDeviceReplicationPass();
356 
357 // Creates a pass that extracts ops in tf_device.launch op with host device
358 // assignment and adds an `_xla_outside_compilation` attribute value.
359 std::unique_ptr<OperationPass<ModuleOp>>
360 CreateHostLaunchToOutsideCompiledPass();
361 
362 }  // namespace TFDevice
363 
364 namespace TFTPU {
365 // Creates a pass that forms clusters from operations of the same
366 // `_tpu_replicate` attribute.
367 std::unique_ptr<OperationPass<ModuleOp>> CreateTPUClusterFormationPass();
368 
369 // Creates a pass that cleans up `_tpu_replicate` attribute on operations
370 // that are inside a cluster.
371 std::unique_ptr<OperationPass<ModuleOp>>
372 CreateTPUClusterCleanupAttributesPass();
373 
374 // Creates a pass that removes Identity/IdentityN ops from a cluster.
375 std::unique_ptr<OperationPass<ModuleOp>> CreateTPUIdentityPruningPass();
376 
377 // Creates a pass that allows TPU program inputs to have layouts determined at
378 // run time.
379 std::unique_ptr<OperationPass<ModuleOp>> CreateTPUDynamicLayoutPass();
380 
381 // Creates a pass that adds `tf.ReadVariableOp` to a TPU cluster for resources
382 // the cluster only writes to.
383 std::unique_ptr<OperationPass<ModuleOp>> CreateTPUResourceReadForWritePass();
384 
385 // Creates a pass that reorders partitiioned resource reads and replicated
386 // inputs.
387 std::unique_ptr<OperationPass<FuncOp>>
388 CreateTPUReorderReplicateAndPartitionedInputsPass();
389 
390 // Creates a pass that partitions unpartitioned resource read/write to
391 // partitioned resource variables.
392 std::unique_ptr<OperationPass<FuncOp>>
393 CreateTPUResourceReadsWritesPartitioningPass();
394 
395 // Creates a pass that rewrites `tf_device.launch_func` on TPUs into TPU runtime
396 // ops.
397 std::unique_ptr<OperationPass<ModuleOp>> CreateTPURewritePass();
398 
399 // Creates a pass that identifies XLASharding ops in launch op for TPU
400 // computation.
401 std::unique_ptr<OperationPass<ModuleOp>> CreateTPUShardingIdentificationPass();
402 
403 // Creates a pass that moves `tf.AssignVariableOp` into a
404 // `tf_device.parallel_execute` region if the `tf.AssignVariableOp` is the
405 // only consumer of a `tf_device.parallel_execute` result.
406 std::unique_ptr<OperationPass<FuncOp>>
407 CreateTPUParallelExecuteSinkResourceWritePass();
408 
409 // Creates a pass that merges device variable reads/updates into the surrounded
410 // TPUExecute node. This allows the execute node to perform in-place variable
411 // updates.
412 std::unique_ptr<OperationPass<ModuleOp>>
413 CreateTPUMergeVariablesWithExecutePass();
414 
415 // Creates a pass that wraps ReadVariableOp/AssignVariable op that consumes a
416 // packed tensor to have same device placement as underlying TPU device.
417 std::unique_ptr<OperationPass<FuncOp>> CreateTPUColocateCompositeResourceOps();
418 
419 // Creates a pass that adds ops which perform formatting on variables at
420 // run-time according to compilation result.
421 std::unique_ptr<OperationPass<ModuleOp>> CreateTPUVariableReformattingPass();
422 
423 // Creates a pass that wraps ops with the same `_xla_outside_compilation`
424 // attribute value in a tf_device.launch op with host device assignment.
425 std::unique_ptr<OperationPass<ModuleOp>>
426 CreateOutsideCompiledToHostLaunchPass();
427 
428 // Creates a pass that extracts outside compilation (CPU ops inside TPU cluster)
429 // at head/tail of TPU cluster to run before/after TPU computation.
430 std::unique_ptr<OperationPass<ModuleOp>>
431 CreateTPUExtractHeadTailOutsideCompilationPass();
432 
433 // Creates a pass that expands outside compilation cluster at the head/tail of
434 // TPU computation by adding outside compilation attribute to identity/cast ops
435 // that are only used for host computation.
436 std::unique_ptr<OperationPass<FuncOp>> CreateTPUHostComputationExpansionPass();
437 
438 // Creates a pass that updates inputs to TPU embedding layer enqueue ops so that
439 // correct ops are invoked during training and evaluation.
440 std::unique_ptr<OperationPass<FuncOp>>
441 CreateTPUUpdateEmbeddingEnqueueOpInputsPass();
442 
443 // Creates a pass that extract outside compilation (CPU ops inside TPU cluster)
444 // ops to a separate parallel_execute region to run on CPU.
445 std::unique_ptr<OperationPass<ModuleOp>>
446 CreateTPUExtractOutsideCompilationPass();
447 
448 // Creates a pass that propagates TPU devices to users.
449 std::unique_ptr<OperationPass<FuncOp>> CreateTPUDevicePropagationPass();
450 
451 // Populates the supplied passmanager with the passes required to run the
452 // bridge.
453 void CreateTPUBridgePipeline(OpPassManager& pm);
454 
455 // Populates the supplied passmanager with the passes required to run the
456 // bridge in V1 mode.
457 void CreateTPUBridgePipelineV1(OpPassManager& pm);
458 
459 // Creates a pass that replicates the tf._TPUCompileMlir op on each host that
460 // needs the compiled program. It helps avoid transferring the compiled binary
461 // between hosts.
462 std::unique_ptr<OperationPass<mlir::ModuleOp>>
463 CreateTPUCompileOpReplicationPass();
464 
465 // Creates a pass that applies space to depth transform
466 // for the first or frontier convolutions consume host inputs on TPU.
467 std::unique_ptr<OperationPass<ModuleOp>> CreateTPUSpaceToDepthPass();
468 
469 }  // namespace TFTPU
470 
471 #define GEN_PASS_REGISTRATION
472 #include "tensorflow/compiler/mlir/tensorflow/transforms/tf_passes.h.inc"
473 
474 namespace TFDevice {
475 #define GEN_PASS_REGISTRATION
476 #include "tensorflow/compiler/mlir/tensorflow/transforms/tf_device_passes.h.inc"
477 }  // namespace TFDevice
478 
479 }  // namespace mlir
480 
481 #endif  // TENSORFLOW_COMPILER_MLIR_TENSORFLOW_TRANSFORMS_PASSES_H_
482