1//===- LinalgStructuredOps.td - Linalg dialect library ops -*- tablegen -*-===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// This is the operation definition file for structured operations on buffers 10// that correspond to underlying library calls (e.g. BLAS). 11// 12//===----------------------------------------------------------------------===// 13 14#ifndef LINALG_STRUCTURED_OPS 15#define LINALG_STRUCTURED_OPS 16 17include "mlir/Dialect/Linalg/IR/LinalgBase.td" 18include "mlir/Dialect/Linalg/IR/LinalgStructuredOpsInterface.td" 19include "mlir/Interfaces/CopyOpInterface.td" 20include "mlir/Interfaces/SideEffectInterfaces.td" 21 22// The Linalg `NInputs` trait provides the API for ops that are known 23// to have a specified number of inputs, all passed as operands. 24// See Linalg/LinalgTraits.h for implementation details and usage. 25class NInputs<int n> : 26 NativeOpTrait<"linalg::NInputs<" # !cast<string>(n) # ">::Impl"> {} 27 28// The Linalg `ZeroInitTensors` trait provides the API for ops that are known 29// to not have input tensor operands. 30// See Linalg/LinalgTraits.h for implementation details and usage. 31def ZeroInitTensors : NativeOpTrait<"linalg::ZeroInitTensors"> {} 32 33// The Linalg `NOutputs` trait provides the API for ops that are known 34// to have a specified number of outputs, all passed as operands. 35// See Linalg/LinalgTraits.h for implementation details and usage. 36class NOutputs<int n> : 37 NativeOpTrait<"linalg::NOutputs<" # !cast<string>(n) # ">::Impl"> {} 38 39def StructuredOpTraits : NativeOpTrait<"linalg::StructuredOpTraits">; 40def NamedStructuredOpTrait : NativeOpTrait<"linalg::NamedStructuredOpTrait">; 41 42// Base Tablegen class for Linalg ops. 43// Linalg ops that correspond to library calls operate on ShapedType as their 44// first operands. These may be optionally followed by non-view operands 45// depending on the specific Linalg op. 46class LinalgStructuredBase_Op<string mnemonic, list<OpTrait> props> 47 : Op<Linalg_Dialect, mnemonic, !listconcat(props, [ 48 LinalgStructuredInterface])> {} 49 50class LinalgStructured_Op<string mnemonic, list<OpTrait> props> 51 : LinalgStructuredBase_Op<mnemonic, 52 !listconcat(props, [ 53 StructuredOpTraits, 54 DeclareOpInterfaceMethods<MemoryEffectsOpInterface>])> { 55 code libraryCallName = [{ 56 std::string getLibraryCallName() { 57 return generateLibraryCallName(getOperation()); 58 } 59 }]; 60 let assemblyFormat = "`(` operands `)` attr-dict `:` type(operands)"; 61} 62 63//===----------------------------------------------------------------------===// 64// Named Linalg ops, implemented as special configurations of generic ops. 65//===----------------------------------------------------------------------===// 66// At the moment these are not declarative and require a bunch of C++ code. 67// In the future, these should be migrated to a declarative specification. 68def CopyOp : LinalgStructured_Op<"copy", [ 69 CopyOpInterface, 70 NInputs<1>, 71 ZeroInitTensors, 72 NOutputs<1> 73 ]> { 74 let description = [{ 75 Copies the data in the input view into the output view. 76 77 Usage: 78 79 ```mlir 80 linalg.copy(%arg0, %arg1) : memref<?xf32, stride_specification>, 81 memref<?xf32, stride_specification> 82 ``` 83 84 One possible lowering to loop form is: 85 86 ```mlir 87 %0 = linalg.dim %arg0, 0 : index 88 scf.for %i0 = %c0 to %0 step %c1 { 89 %1 = load %arg0[%i0] : memref<?xf32, stride_specification> 90 store %1, %arg1[%i0] : memref<?xf32, stride_specification> 91 } 92 ``` 93 94 Optionally, can take `input_permutation` and `output_permutation` attributes 95 to reorder the dimensions of the input and output views. 96 97 Usage: 98 99 ```mlir 100 linalg.copy(%arg0, %arg1) {inputPermutation : (i, j, k) -> (i, k, j), 101 outputPermutation : (i, j, k) -> (k, j, i)} : 102 memref<?x?x?xf32, stride_specification>, 103 memref<?x?x?xf32, stride_specification> 104 ``` 105 106 One possible lowering to loop form is: 107 108 ```mlir 109 %0 = linalg.dim %arg0, 0 110 %1 = linalg.dim %arg0, 1 111 %2 = linalg.dim %arg0, 2 112 scf.for %i0 = %c0 to %{{.*}} step %c1 { 113 scf.for %i1 = %c0 to %{{.*}} step %c1 { 114 scf.for %i2 = %c0 to %{{.*}} step %c1 { 115 %3 = load %arg0[%i0, %i2, %i1] : 116 memref<?x?x?xf32, stride_specification> 117 store %3, %arg1[%i2, %i1, %i0] : 118 memref<?x?x?xf32, stride_specification> 119 ``` 120 121 The views are expected to be compatible for correctness but this is not 122 enforced at the moment. 123 }]; 124 125 let arguments = (ins 126 AnyStridedMemRef:$input, 127 AnyStridedMemRef:$output, 128 OptionalAttr<AffineMapAttr>:$inputPermutation, 129 OptionalAttr<AffineMapAttr>:$outputPermutation); 130 131 // TODO: this should go away once the usage of OptionalAttr triggers emission 132 // of builders with default arguments left unspecified. 133 let builders = [OpBuilderDAG<(ins "Value":$input, "Value":$output), 134 [{ 135 return build( 136 $_builder, $_state, input, output, AffineMapAttr(), AffineMapAttr()); 137 }]>]; 138 139 let extraClassDeclaration = libraryCallName # [{ 140 // Rank-polymorphic. 141 // filling_value -> O(ivs) with parallel iterators. 142 ArrayAttr iterator_types() { 143 unsigned nPar = getInputShapedType(0).getRank(); 144 return Builder(getContext()).getStrArrayAttr( 145 SmallVector<StringRef, 8>(nPar, getParallelIteratorTypeName())); 146 } 147 148 // I(input_perm(ivs)) -> O(output_perm(ivs)) 149 ArrayAttr indexing_maps() { 150 MLIRContext *context = getContext(); 151 auto maybeInputMap = inputPermutation(); 152 auto maybeOutputMap = outputPermutation(); 153 unsigned inputRank = getInputShapedType(0).getRank(); 154 unsigned outputRank = getOutputShapedType(0).getRank(); 155 return Builder(getContext()).getAffineMapArrayAttr({ 156 extractOrIdentityMap(maybeInputMap, inputRank, context), 157 extractOrIdentityMap(maybeOutputMap, outputRank, context)}); 158 } 159 160 Value getSource() { return input();} 161 Value getTarget() { return output(); } 162 163 static std::function<void(Block &)> getRegionBuilder() { 164 return nullptr; 165 } 166 }]; 167 let verifier = [{ return ::verify(*this); }]; 168 169 let hasFolder = 1; 170 let hasCanonicalizer = 1; 171} 172 173def FillOp : LinalgStructured_Op<"fill", [ 174 NInputs<0>, 175 ZeroInitTensors, 176 NOutputs<1>]> { 177 178 let arguments = (ins AnyStridedMemRef:$output, 179 AnyTypeOf<[AnyFloat, AnySignlessInteger, AnyVector]>:$value); 180 let extraClassDeclaration = libraryCallName # [{ 181 // Rank-polymorphic. 182 // filling_value -> O(ivs) with parallel iterators. 183 ArrayAttr iterator_types() { 184 unsigned nPar = getOutputShapedType(0).getRank(); 185 return Builder(getContext()).getStrArrayAttr( 186 SmallVector<StringRef, 8>(nPar, getParallelIteratorTypeName())); 187 } 188 189 ArrayAttr indexing_maps() { 190 MLIRContext *context = getContext(); 191 // filling_value -> O(ivs) 192 return Builder(getContext()).getAffineMapArrayAttr({ 193 extractOrIdentityMap(llvm::None, getNumParallelLoops(), context)}); 194 } 195 196 static std::function<void(Block &)> getRegionBuilder() { 197 return nullptr; 198 } 199 }]; 200 201 let verifier = [{ return ::verify(*this); }]; 202 203 let hasFolder = 1; 204 let hasCanonicalizer = 1; 205} 206 207/// A base class for pooling operation such as conv. The arguments must contain 208/// optional arguments `strides`, `dilations` and `padding` with following type: 209/// OptionalAttr<I64ArrayAttr>:$strides 210/// OptionalAttr<I64ArrayAttr>:$dilations 211/// OptionalAttr<I64ElementsAttr>:$padding 212/// `strides` denotes the step of each window along the dimension. 213class PoolingBase_Op<string mnemonic, list<OpTrait> props> 214 : LinalgStructured_Op<mnemonic, props> { 215 let description = [{ 216 Performs an N-D pooling operation similarly to the description in the TF 217 documentation: 218 https://www.tensorflow.org/api_docs/python/tf/nn/pool 219 220 Different from the description, this operation doesn't perform on batch and 221 channel. It only takes tensors of rank `N`. 222 223 ``` 224 output[x[0], ..., x[N-1]] = 225 REDUCE_{z[0], ..., z[N-1]} 226 input[ 227 x[0] * strides[0] - pad_before[0] + dilation_rate[0]*z[0], 228 ... 229 x[N-1]*strides[N-1] - pad_before[N-1] + dilation_rate[N-1]*z[N-1] 230 ], 231 ``` 232 233 The required optional arguments are: 234 - strides: an i64 array specifying the stride (i.e. step) for window 235 loops. 236 - dilations: an i64 array specifying the filter upsampling/input 237 downsampling rate 238 - padding: an i64 array of pairs (low, high) specifying the number of 239 elements to pad along a dimension. 240 241 If strides or dilations attributes are missing then the default value is 242 one for each of the input dimensions. Similarly, padding values are zero 243 for both low and high in each of the dimensions, if not specified. 244 }]; 245 246 code commonUtils = libraryCallName # [{ 247 int64_t getStride(unsigned i) { 248 assert(i < getNumWindowLoops()); 249 if (!strides().hasValue()) return 1; 250 return strides()->getValue()[i] 251 .cast<IntegerAttr>().getValue().getSExtValue(); 252 } 253 254 int64_t getDilation(unsigned i) { 255 assert(i < getNumWindowLoops()); 256 if (!dilations().hasValue()) return 1; 257 return dilations()->getValue()[i] 258 .cast<IntegerAttr>().getValue().getSExtValue(); 259 } 260 261 int64_t getLowPad(unsigned i) { 262 assert(i < getNumWindowLoops()); 263 if (!padding().hasValue()) return 0; 264 return padding().getValue().getValue<int64_t>({i, 0}); 265 } 266 267 int64_t getHighPad(unsigned i) { 268 assert(i < getNumWindowLoops()); 269 if (!padding().hasValue()) return 0; 270 return padding().getValue().getValue<int64_t>({i, 1}); 271 } 272 273 static std::function<void(Block &)> getRegionBuilder() { 274 return nullptr; 275 } 276 }]; 277} 278 279def ConvOp : PoolingBase_Op<"conv", [ 280 NInputs<2>, 281 // Despite having reductions, this manually defined ConvOp may only take 282 // memref operands and can never have init tensors. 283 ZeroInitTensors, 284 NOutputs<1>]> { 285 286 let description = [{ 287 Generic n-D convolution as described in the TF documentation: 288 https://www.tensorflow.org/versions/r2.0/api_docs/python/tf/nn/convolution 289 290 ``` 291 output[b, x[0], ..., x[N-1], k] = 292 sum_{z[0], ..., z[N-1], q} 293 filter[z[0], ..., z[N-1], q, k] * 294 padded_input[b, 295 x[0] * strides[0] + dilation_rate[0] * z[0], 296 ..., 297 x[N-1] * strides[N-1] + dilation_rate[N-1] * z[N-1], 298 q] 299 ``` 300 }]; 301 302 // Following the TF source of truth above, strides, dilations and padding are 303 // integer attributes of the same rank as the number of window dimensions. 304 // The padding attribute specifies the amount of zero padding to be applied to 305 // the base area, which is a n-d array of (low, high) padding. Each pair has 306 // the low padding as the first element and the high padding as the second 307 // element. Using padding is equivalent to inserting those same zero values 308 // into the input before doing the convolution. 309 let arguments = (ins AnyStridedMemRef:$filter, AnyStridedMemRef:$input, 310 AnyStridedMemRef:$output, 311 OptionalAttr<I64ArrayAttr>:$strides, 312 OptionalAttr<I64ArrayAttr>:$dilations, 313 OptionalAttr<I64ElementsAttr>:$padding); 314 315 let extraClassDeclaration = commonUtils # [{ 316 // TODO: extend to support more than 1 dimensions and potentially grouping 317 // too. 318 unsigned getNumBatchDimensions() { return 1; } 319 320 unsigned getNumInputFeatureDimensions() { return 1; } 321 322 unsigned getNumOutputFeatureDimensions() { return 1; } 323 324 unsigned getNumSpatialDimensions() { 325 return getOutputShapedType(0).getRank() - getNumBatchDimensions() - 326 getNumOutputFeatureDimensions(); 327 } 328 329 ArrayAttr iterator_types() { 330 // Outer parallel loops are always the number of output dimensions; i.e. 331 // [b, xs, q] in the TF notation above. 332 unsigned nPar = getOutputShapedType(0).getRank(); 333 unsigned nRed = getNumInputFeatureDimensions(); 334 // Window loops are a special kind of reduction that is never tiled or 335 // parallelized across; i.e. [zs] in the TF notation above whose number 336 // match `xs` (i.e. 1 window loop per "image" dimension). 337 // This may evolve in the future. 338 unsigned nWin = 339 nPar - getNumBatchDimensions() - getNumInputFeatureDimensions(); 340 SmallVector<StringRef, 8> iters(nPar, getParallelIteratorTypeName()); 341 iters.reserve(nPar + nRed + nWin); 342 iters.append(nRed, getReductionIteratorTypeName()); 343 iters.append(nWin, getWindowIteratorTypeName()); 344 return Builder(getContext()).getStrArrayAttr(iters); 345 } 346 347 // F(z0, ..., zN-1, q, k) * 348 // I(b, x0 + z0 - pad_low_0, ..., xN-1 + zN-1 - pad_low_N-1, q) 349 // -> O(b, x0, ..., xN-1, k) 350 // for N equal to `nWindow`. If there is no padding attribute, it will be 351 // ignored. 352 ArrayAttr indexing_maps() { 353 MLIRContext *context = getContext(); 354 auto nWin = getNumWindowLoops(); 355 assert(nWin > 0 && "expected at least one window dimension"); 356 unsigned idx = 0; 357 // In the following, AffineDimExprs are indexed in loop order: 358 // [ b, xs, k, q, zs] 359 // parallels non-window reductions windows 360 // 361 // Parallel dims are exactly the dimensions indexing `output`: 362 // output[b, x[0], ..., x[N-1], k]; i.e. 363 // * batch dimensions (bs with #bs = 1 for now) 364 // * "image" dimensions (xs with #xs = #zs = output_rank - #bs - #ks) 365 // * output filter dimensions (ks with #ks = 1 for now) 366 auto bs = makeAffineDimExprs(getNumBatchDimensions(), idx, context); 367 auto xs = makeAffineDimExprs(nWin, idx, context); 368 auto ks = makeAffineDimExprs( 369 getNumOutputFeatureDimensions(), idx, context); 370 // Non-window reduction dim: sum_{z[0], ..., z[N-1], q} 371 auto qs = makeAffineDimExprs( 372 getNumInputFeatureDimensions(), idx, context); 373 // Window reduction dims: sum_{z[0], ..., z[N-1], q} 374 auto zs = makeAffineDimExprs(nWin, idx, context); 375 // Construct the weighedSum expression. 376 auto ws = weightedPoolingInputIndex(*this, xs, zs); 377 return Builder(getContext()).getAffineMapArrayAttr({ 378 // filter[z[0], ..., z[N-1], q, k] 379 AffineMap::get(idx, 0, concat(concat(zs, qs), ks), context), 380 // input[b, 381 // x[0]*s[0] + d[0]*z[0] - pad_low[0], 382 // ... 383 // x[N-1]*s[N-1] + d[N-1]*z[N-1] - pad_low[N-1], 384 // q] 385 AffineMap::get(idx, 0, concat(concat(bs, ws), qs), context), 386 // output[b, x[0], ..., x[N-1], k] 387 AffineMap::get(idx, 0, concat(concat(bs, xs), ks), context)}); 388 } 389 }]; 390 391 let verifier = [{ return ::verify(*this); }]; 392 393 let hasFolder = 1; 394 let hasCanonicalizer = 1; 395} 396 397class SingleInputPoolingBase_Op<string mnemonic> 398 : PoolingBase_Op<mnemonic, [ 399 NInputs<2>, 400 // Despite having reductions, this manually defined ConvOp may only take 401 // memref operands and can never have init tensors. 402 ZeroInitTensors, 403 NOutputs<1>]> { 404 let description = [{ 405 A base class for single input pooling function. 406 407 TODO: Figure out a better way to handle window dimensions, i.e., eliminate 408 the fake memref. 409 The window dimensions are specified by argument `windowDims`. The i-th 410 dimension in the shape of `windowDims` denotes the size of the window along 411 dimension i. For example, if the window size is 2x3, then a memref<2x3> 412 should be passed to the operation as `windowDims`. 413 }]; 414 415 let arguments = (ins AnyStridedMemRef:$input, 416 AnyStridedMemRef:$windowDims, 417 AnyStridedMemRef:$output, 418 OptionalAttr<I64ArrayAttr>:$strides, 419 OptionalAttr<I64ArrayAttr>:$dilations, 420 OptionalAttr<I64ElementsAttr>:$padding); 421 422 let extraClassDeclaration = commonUtils# [{ 423 ArrayAttr iterator_types() { 424 // Outer parallel loops are always the number of output dimensions. 425 unsigned nPar = getOutputShapedType(0).getRank(); 426 // The window loops has the same number loops with output dimensions. 427 unsigned nWin = nPar; 428 SmallVector<StringRef, 8> iters(nPar, getParallelIteratorTypeName()); 429 iters.reserve(nPar + nWin); 430 iters.append(nWin, getWindowIteratorTypeName()); 431 return Builder(getContext()).getStrArrayAttr(iters); 432 } 433 434 ArrayAttr indexing_maps() { 435 MLIRContext *context = getContext(); 436 auto nPar = getNumParallelLoops(); 437 auto nWin = getNumWindowLoops(); 438 assert(nWin > 0 && "expected at least one window dimension"); 439 unsigned idx = 0; 440 auto outputDims = makeAffineDimExprs(nPar, idx, context); 441 auto windowDims = makeAffineDimExprs(nWin, idx, context); 442 // Construct the weighedSum expression. 443 auto inputDims = 444 weightedPoolingInputIndex(*this, outputDims, windowDims); 445 return Builder(getContext()).getAffineMapArrayAttr({ 446 // input 447 AffineMap::get(idx, 0, inputDims, context), 448 // windowDims 449 AffineMap::get(idx, 0, windowDims, context), 450 // output 451 AffineMap::get(idx, 0, outputDims, context)}); 452 } 453 }]; 454 455 let verifier = [{ return ::verify(*this); }]; 456 457 let hasFolder = 1; 458 let hasCanonicalizer = 1; 459} 460 461def PoolingMaxOp: SingleInputPoolingBase_Op<"pooling_max"> { 462 let description = [{ 463 Takes max op as pooling operation, i.e., it samples the maximum value in the 464 window. 465 }]; 466} 467 468def PoolingMinOp: SingleInputPoolingBase_Op<"pooling_min"> { 469 let description = [{ 470 Takes min op as pooling operation, i.e., it samples the minimum value in the 471 window. 472 }]; 473} 474 475def PoolingSumOp: SingleInputPoolingBase_Op<"pooling_sum"> { 476 let description = [{ 477 Takes add op as pooling operation, i.e., it accumulates the values in the 478 window. 479 }]; 480} 481 482//===----------------------------------------------------------------------===// 483// Generic Linalg ops. 484//===----------------------------------------------------------------------===// 485def LinalgOperand: AnyTypeOf<[AnyRankedTensor, AnyStridedMemRef]>; 486 487class LinalgOperandOfRank<int rank>: Type< 488 And<[ 489 LinalgOperand.predicate, 490 CPred<"$_self.cast<ShapedType>().getRank() == " # rank>] 491 >>; 492 493class GenericOpBase<string mnemonic> : LinalgStructuredBase_Op<mnemonic, [ 494 AttrSizedOperandSegments, 495 DeclareOpInterfaceMethods<MemoryEffectsOpInterface>, 496 NamedStructuredOpTrait, 497 SingleBlockImplicitTerminator<"YieldOp">]> { 498 let arguments = (ins Variadic<AnyShaped>:$inputs, 499 Variadic<AnyMemRef>:$output_buffers, 500 Variadic<AnyRankedTensor>:$init_tensors, 501 AffineMapArrayAttr:$indexing_maps, 502 ArrayAttr:$iterator_types, 503 OptionalAttr<StrAttr>:$doc, 504 OptionalAttr<StrAttr>:$library_call, 505 // ArrayAttr of StrArrayAttr: 506 OptionalAttr<ArrayAttr>:$sparse); 507 let results = (outs Variadic<AnyRankedTensor>:$result_tensors); 508 let regions = (region AnyRegion:$region); 509 let extraClassDeclaration = [{ 510 SmallVector<StringRef, 8> linalgTraitAttrNames() { 511 return SmallVector<StringRef, 8>{ 512 getDocAttrName(), 513 getIndexingMapsAttrName(), getLibraryCallAttrName(), 514 getIteratorTypesAttrName(), 515 }; 516 } 517 std::string getLibraryCallName() { 518 return library_call().hasValue() ? 519 library_call()->str() : "op_has_no_registered_library_name"; 520 } 521 522 static std::function<void(Block &)> getRegionBuilder() { 523 return nullptr; 524 } 525 }]; 526 let printer = [{ return ::print(p, *this); }]; 527 let parser = [{ return ::parseGenericOp(parser, result); }]; 528} 529 530/// Index-free GenericOp. 531def GenericOp : GenericOpBase<"generic"> { 532 let description = [{ 533 Generic Linalg op form where the key properties of the computation are 534 specified as attributes. In pretty form, a `linalg.generic` op is written 535 as: 536 537 ```mlir 538 linalg.generic #trait_attribute 539 ins(%A, %B : memref<?x?xf32, stride_specification>, 540 memref<?x?xf32, stride_specification>) 541 outs(%C : memref<?x?xf32, stride_specification>) 542 attrs = {other-optional-attributes} 543 {region} 544 ``` 545 546 Where #trait_attributes is an alias of a dictionary attribute containing: 547 - doc [optional]: a documentation string 548 - indexing_maps: a list of AffineMapAttr, one AffineMapAttr per each input 549 and output view. Such AffineMapAttr specifies the mapping between the 550 loops and the indexing within each view. 551 - library_call [optional]: a StringAttr containing the name of an 552 external library function that the linalg.generic operation maps to. 553 The external library is assumed to be dynamically linked and no strong 554 compile-time guarantees are provided. In the absence of such a library 555 call, linalg.generic will always lower to loops. 556 - iterator_types: an ArrayAttr specifying the type of the enclosing loops. 557 Each element of the list represents and iterator of one of the following 558 types: 559 parallel, reduction, window 560 - sparse: an optional list with per-dimension sparsity annotations (either 561 "D" for dense or "S" for sparse) for each input and output view. 562 563 Example: 564 Defining a #matmul_trait attribute in MLIR can be done as follows: 565 ```mlir 566 #matmul_accesses = [ 567 (m, n, k) -> (m, k), 568 (m, n, k) -> (k, n), 569 (m, n, k) -> (m, n) 570 ] 571 #matmul_trait = { 572 doc = "C(m, n) += A(m, k) * B(k, n)", 573 indexing_maps = #matmul_accesses, 574 library_call = "linalg_matmul", 575 iterator_types = ["parallel", "parallel", "reduction"] 576 } 577 ``` 578 579 And can be reused in multiple places as: 580 ```mlir 581 linalg.generic #matmul_trait 582 ins(%A, %B : memref<?x?xf32, stride_specification>, 583 memref<?x?xf32, stride_specification>) 584 outs(%C : memref<?x?xf32, stride_specification>) 585 {other-optional-attributes} { 586 ^bb0(%a: f32, %b: f32, %c: f32) : 587 %d = mulf %a, %b: f32 588 %e = addf %c, %d: f32 589 linalg.yield %e : f32 590 } 591 ``` 592 593 This may lower to either: 594 ```mlir 595 call @linalg_matmul(%A, %B, %C) : 596 (memref<?x?xf32, stride_specification>, 597 memref<?x?xf32, stride_specification>, 598 memref<?x?xf32, stride_specification>) 599 -> () 600 ``` 601 602 or IR resembling: 603 ```mlir 604 scf.for %m = %c0 to %M step %c1 { 605 scf.for %n = %c0 to %N step %c1 { 606 scf.for %k = %c0 to %K step %c1 { 607 %a = load %A[%m, %k] : memref<?x?xf32, stride_specification> 608 %b = load %B[%k, %n] : memref<?x?xf32, stride_specification> 609 %c = load %C[%m, %n] : memref<?x?xf32, stride_specification> 610 %d = mulf %a, %b: f32 611 %e = addf %c, %d: f32 612 store %e, %C[%m, %n] : memref<?x?x?xf32, stride_specification> 613 } 614 } 615 } 616 ``` 617 618 To allow progressive lowering from the value world (a.k.a tensor values) to 619 the buffer world (a.k.a memref values), a `linalg.generic` op allows mixing 620 tensors and buffers operands and tensor results. 621 622 ```mlir 623 %C = linalg.generic #trait_attribute 624 ins(%A, %B : tensor<?x?xf32>, memref<?x?xf32, stride_specification>) 625 init(%C : tensor<?x?xf32>) 626 {other-optional-attributes} 627 {region} 628 -> (tensor<?x?xf32>) 629 ``` 630 631 The `init` operand and the conventions around mixing tensors and buffers are 632 described in more detail in the "Tensors and Buffers: Conventions and 633 Limitations" section in the [Linalg Document](../docs/Linalg.md) 634 635 Tensor values must be legalized by a buffer allocation pass before most 636 transformations can be applied. Such legalizations move tensor return values 637 into output buffer operands and updates the region arguments accordingly. 638 }]; 639 640 let builders = [ 641 OpBuilderDAG<(ins "TypeRange":$resultTensorTypes, "ValueRange":$inputs, 642 "ValueRange":$outputBuffers, "ValueRange":$initTensors, 643 "ArrayRef<AffineMap>":$indexingMaps, "ArrayRef<StringRef>":$iteratorTypes, 644 "StringRef":$doc, "StringRef":$libraryCall, 645 CArg<"function_ref<void(OpBuilder &, Location, ValueRange)>", "nullptr">)>, 646 OpBuilderDAG<(ins "ValueRange":$inputs, "ValueRange":$outputBuffers, 647 "ArrayRef<AffineMap>":$indexingMaps, "ArrayRef<StringRef>":$iteratorTypes, 648 "StringRef":$doc, "StringRef":$libraryCall, 649 CArg<"function_ref<void(OpBuilder &, Location, ValueRange)>", "nullptr">)>, 650 OpBuilderDAG<(ins "TypeRange":$resultTensorTypes, "ValueRange":$inputs, 651 "ValueRange":$outputBuffers, "ValueRange":$initTensors, 652 "ArrayRef<AffineMap>":$indexingMaps, "ArrayRef<StringRef>":$iteratorTypes, 653 CArg<"function_ref<void(OpBuilder &, Location, ValueRange)>", "nullptr">)>, 654 OpBuilderDAG<(ins "ValueRange":$inputs, "ValueRange":$outputBuffers, 655 "ArrayRef<AffineMap>":$indexingMaps, "ArrayRef<StringRef>":$iteratorTypes, 656 CArg<"function_ref<void(OpBuilder &, Location, ValueRange)>", "nullptr">)> 657 ]; 658 let verifier = [{ return ::verify(*this); }]; 659 660 let hasFolder = 1; 661 let hasCanonicalizer = 1; 662} 663 664/// GenericOp with Indexing (i.e. multi-for style in which the region is passed 665/// the enclosing loop induction variables) 666def IndexedGenericOp : GenericOpBase<"indexed_generic"> { 667 let description = [{ 668 Indexed Generic Linalg op form where the key properties of the computation 669 are specified as attributes. In pretty form, a `linalg.indexed_generic` op 670 is written as: 671 672 ```mlir 673 linalg.indexed_generic #trait_attribute 674 ins(%A, %B : memref<?x?xf32, stride_specification>, 675 memref<?x?xf32, stride_specification>) 676 outs(%C : memref<?x?xf32, stride_specification>) 677 attrs = {other-optional-attributes} 678 {region} 679 ``` 680 681 Where #trait_attributes is an alias of a dictionary attribute containing: 682 - doc [optional]: a documentation string 683 - indexing_maps: a list of AffineMapAttr, one AffineMapAttr per each input 684 and output view. Such AffineMapAttr specifies the mapping between the 685 loops and the indexing within each view. 686 - library_call [optional]: a StringAttr containing the name of an 687 external library function that the linalg.indexed_generic operation 688 maps to. The external library is assumed to be dynamically linked and 689 no strong compile-time guarantees are provided. In the absence of such 690 a library call, linalg.indexed_generic will always lower to loops. 691 - iterator_types: an ArrayAttr they type of the enclosing loops; Each 692 element of the list represents and iterator of one of the following 693 types: 694 parallel, reduction, window 695 696 Example: 697 Defining a #matmul_trait attribute in MLIR can be done as follows: 698 699 ```mlir 700 #matmul_accesses = [ 701 (m, n, k) -> (m, k), 702 (m, n, k) -> (k, n), 703 (m, n, k) -> (m, n) 704 ] 705 #matmul_trait = { 706 doc = "C(m, n) += A(m, k) * B(k, n)", 707 indexing_maps = #matmul_accesses, 708 library_call = "linalg_matmul", 709 iterator_types = ["parallel", "parallel", "reduction"] 710 } 711 ``` 712 713 And can be reused in multiple places as: 714 715 ```mlir 716 linalg.indexed_generic #matmul_trait 717 ins(%A, %B : memref<?x?xf32, stride_specification>, 718 memref<?x?xf32, stride_specification>) 719 outs(%C : memref<?x?xf32, stride_specification>) { 720 (%offset_m: index, %offset_n: index, %offset_k: index, 721 %a: f32, %b: f32, %c: f32) : 722 "some_optional_computation"(%offset_m, %offset_n, %offset_k) 723 %d = mulf %a, %b: f32 724 %e = addf %c, %d: f32 725 linalg_yield %e : f32 726 } 727 ``` 728 729 This may lower to either: 730 731 ```mlir 732 call @linalg_matmul(%offset_m, %offset_n, %offset_k, %A, %B, %C) : 733 (index, index, index, 734 memref<?x?xf32, stride_specification>, 735 memref<?x?xf32, stride_specification>, 736 memref<?x?xf32, stride_specification>) 737 -> () 738 ``` 739 740 or IR resembling: 741 742 ```mlir 743 scf.for %m = %c0 to %M step %c1 { 744 scf.for %n = %c0 to %N step %c1 { 745 scf.for %k = %c0 to %K step %c1 { 746 %a = load %A[%m, %k] : memref<?x?xf32, stride_specification> 747 %b = load %B[%k, %n] : memref<?x?xf32, stride_specification> 748 %c = load %C[%m, %n] : memref<?x?xf32, stride_specification> 749 "some_optional_computation"(%m, %n, %k) 750 %d = mulf %a, %b: f32 751 %e = addf %c, %d: f32 752 store %d, %C[%m, %n] : memref<?x?x?xf32, stride_specification> 753 } 754 } 755 } 756 ``` 757 758 To allow progressive lowering from the value world (a.k.a tensor values) to 759 the buffer world (a.k.a memref values), a `linalg.indexed_generic` op 760 allows mixing tensors and buffers operands and tensor results. 761 762 ```mlir 763 %C = linalg.indexed_generic #trait_attribute 764 ins(%A, %B : tensor<?x?xf32>, memref<?x?xf32, stride_specification>) 765 init(%C : tensor<?x?xf32>) 766 {other-optional-attributes} 767 {region_with_index_arguments} 768 -> (tensor<?x?xf32>) 769 ``` 770 771 The `init` operand and the conventions around mixing tensors and buffers are 772 described in more detail in the "Tensors and Buffers: Conventions and 773 Limitations" section in the [Linalg Document](../docs/Linalg.md) 774 775 Tensor values must be legalized by a buffer allocation pass before most 776 transformations can be applied. Such legalizations move tensor return values 777 into output buffer operands and update the region arguments accordingly. 778 }]; 779 780 let builders = [ 781 OpBuilderDAG<(ins "TypeRange":$resultTensorTypes, "ValueRange":$inputs, 782 "ValueRange":$outputBuffers, "ValueRange":$initTensors, 783 "ArrayRef<AffineMap>":$indexingMaps, "ArrayRef<StringRef>":$iteratorTypes, 784 "StringRef":$doc, "StringRef":$libraryCall, 785 CArg<"function_ref<void(OpBuilder &, Location, ValueRange, ValueRange)>", 786 "nullptr">)>, 787 OpBuilderDAG<(ins "ValueRange":$inputs, "ValueRange":$outputBuffers, 788 "ArrayRef<AffineMap>":$indexingMaps, "ArrayRef<StringRef>":$iteratorTypes, 789 "StringRef":$doc, "StringRef":$libraryCall, 790 CArg<"function_ref<void(OpBuilder &, Location, ValueRange, ValueRange)>", 791 "nullptr">)>, 792 OpBuilderDAG<(ins "TypeRange":$resultTensorTypes, "ValueRange":$inputs, 793 "ValueRange":$outputBuffers, "ValueRange":$initTensors, 794 "ArrayRef<AffineMap>":$indexingMaps, "ArrayRef<StringRef>":$iteratorTypes, 795 CArg<"function_ref<void(OpBuilder &, Location, ValueRange, ValueRange)>", 796 "nullptr">)>, 797 OpBuilderDAG<(ins "ValueRange":$inputs, "ValueRange":$outputBuffers, 798 "ArrayRef<AffineMap>":$indexingMaps, "ArrayRef<StringRef>":$iteratorTypes, 799 CArg<"function_ref<void(OpBuilder &, Location, ValueRange, ValueRange)>", 800 "nullptr">)> 801 ]; 802 let verifier = [{ return ::verify(*this); }]; 803 804 let hasFolder = 1; 805 let hasCanonicalizer = 1; 806} 807 808//===----------------------------------------------------------------------===// 809// Named Linalg ops, implemented as a declarative configurations of generic ops. 810//===----------------------------------------------------------------------===// 811 812// This file is auto-generated from a TC def specification. 813include "mlir/Dialect/Linalg/IR/LinalgNamedStructuredOps.td" 814 815#endif // LINALG_STRUCTURED_OPS 816