//===- ParallelLoopMapper.cpp - Utilities for mapping parallel loops to GPU =// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file implements utilities to generate mappings for parallel loops to // GPU devices. // //===----------------------------------------------------------------------===// #include "mlir/Dialect/GPU/ParallelLoopMapper.h" #include "mlir/Dialect/GPU/GPUDialect.h" #include "mlir/Dialect/GPU/Passes.h" #include "mlir/Dialect/SCF/SCF.h" #include "mlir/IR/AffineMap.h" #include "mlir/Pass/Pass.h" using namespace mlir; using namespace mlir::gpu; using namespace mlir::scf; #include "mlir/Dialect/GPU/ParallelLoopMapperAttr.cpp.inc" #include "mlir/Dialect/GPU/ParallelLoopMapperEnums.cpp.inc" namespace mlir { namespace gpu { StringRef getMappingAttrName() { return "mapping"; } ParallelLoopDimMapping getParallelLoopDimMappingAttr(Processor processor, AffineMap map, AffineMap bound) { MLIRContext *context = map.getContext(); OpBuilder builder(context); return ParallelLoopDimMapping::get( builder.getI64IntegerAttr(static_cast(processor)), AffineMapAttr::get(map), AffineMapAttr::get(bound), context); } LogicalResult setMappingAttr(scf::ParallelOp ploopOp, ArrayRef mapping) { // Verify that each processor is mapped to only once. llvm::DenseSet specifiedMappings; for (auto dimAttr : mapping) { gpu::Processor processor = getProcessor(dimAttr); if (processor != gpu::Processor::Sequential && specifiedMappings.count(processor)) return ploopOp.emitError( "invalid mapping multiple loops to same processor"); } ArrayRef mappingAsAttrs(mapping.data(), mapping.size()); ploopOp.setAttr(getMappingAttrName(), ArrayAttr::get(mappingAsAttrs, ploopOp.getContext())); return success(); } } // namespace gpu } // namespace mlir namespace { enum MappingLevel { MapGrid = 0, MapBlock = 1, Sequential = 2 }; static constexpr int kNumHardwareIds = 3; } // namespace /// Bounded increment on MappingLevel. Increments to the next /// level unless Sequential was already reached. MappingLevel &operator++(MappingLevel &mappingLevel) { if (mappingLevel < Sequential) { mappingLevel = static_cast(mappingLevel + 1); } return mappingLevel; } /// Computed the hardware id to use for a given mapping level. Will /// assign x,y and z hardware ids for the first 3 dimensions and use /// sequential after. /// TODO: Make this use x for the inner-most loop that is /// distributed to map to x, the next innermost to y and the next innermost to /// z. static gpu::Processor getHardwareIdForMapping(MappingLevel level, int dimension) { if (dimension >= kNumHardwareIds || level == Sequential) return Processor::Sequential; switch (level) { case MapGrid: switch (dimension) { case 0: return Processor::BlockX; case 1: return Processor::BlockY; case 2: return Processor::BlockZ; default: return Processor::Sequential; } break; case MapBlock: switch (dimension) { case 0: return Processor::ThreadX; case 1: return Processor::ThreadY; case 2: return Processor::ThreadZ; default: return Processor::Sequential; } default:; } return Processor::Sequential; } /// Add mapping information to the given parallel loop. Do not add /// mapping information if the loop already has it. Also, don't /// start a mapping at a nested loop. static void mapParallelOp(ParallelOp parallelOp, MappingLevel mappingLevel = MapGrid) { // Do not try to add a mapping to already mapped loops or nested loops. if (parallelOp.getAttr(getMappingAttrName()) || ((mappingLevel == MapGrid) && parallelOp->getParentOfType())) return; MLIRContext *ctx = parallelOp.getContext(); Builder b(ctx); SmallVector attrs; attrs.reserve(parallelOp.getNumLoops()); for (int i = 0, e = parallelOp.getNumLoops(); i < e; ++i) { attrs.push_back(getParallelLoopDimMappingAttr( getHardwareIdForMapping(mappingLevel, i), b.getDimIdentityMap(), b.getDimIdentityMap())); } setMappingAttr(parallelOp, attrs); ++mappingLevel; // Parallel loop operations are immediately nested, so do not use // walk but just iterate over the operations. for (Operation &op : *parallelOp.getBody()) { if (ParallelOp nested = dyn_cast(op)) mapParallelOp(nested, mappingLevel); } } void mlir::greedilyMapParallelSCFToGPU(Region ®ion) { region.walk([](ParallelOp parallelOp) { mapParallelOp(parallelOp); }); }