1//===- VectorOps.td - Vector op definitions ---------------*- tablegen -*-====// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// Defines MLIR vector operations. 10// 11//===----------------------------------------------------------------------===// 12 13#ifndef VECTOR_OPS 14#define VECTOR_OPS 15 16include "mlir/Interfaces/SideEffectInterfaces.td" 17include "mlir/Interfaces/VectorInterfaces.td" 18include "mlir/Interfaces/ViewLikeInterface.td" 19 20def Vector_Dialect : Dialect { 21 let name = "vector"; 22 let cppNamespace = "::mlir::vector"; 23 let hasConstantMaterializer = 1; 24} 25 26// Base class for Vector dialect ops. 27class Vector_Op<string mnemonic, list<OpTrait> traits = []> : 28 Op<Vector_Dialect, mnemonic, traits> { 29 // For every vector op, there needs to be a: 30 // * void print(OpAsmPrinter &p, ${C++ class of Op} op) 31 // * LogicalResult verify(${C++ class of Op} op) 32 // * ParseResult parse${C++ class of Op}(OpAsmParser &parser, 33 // OperationState &result) 34 // functions. 35 let printer = [{ return ::print(p, *this); }]; 36 let verifier = [{ return ::verify(*this); }]; 37 let parser = [{ return ::parse$cppClass(parser, result); }]; 38} 39 40// TODO: Add an attribute to specify a different algebra with operators other 41// than the current set: {*, +}. 42def Vector_ContractionOp : 43 Vector_Op<"contract", [ 44 NoSideEffect, 45 PredOpTrait<"lhs and rhs have same element type", TCopVTEtIsSameAs<0, 1>>, 46 PredOpTrait<"third operand acc and result have same element type", 47 TCresVTEtIsSameAsOpBase<0, 2>>, 48 DeclareOpInterfaceMethods<VectorUnrollOpInterface, ["getShapeForUnroll"]> 49 ]>, 50 Arguments<(ins AnyVector:$lhs, AnyVector:$rhs, AnyType:$acc, 51 Variadic<VectorOf<[I1]>>:$masks, 52 AffineMapArrayAttr:$indexing_maps, ArrayAttr:$iterator_types)>, 53 Results<(outs AnyType)> { 54 let summary = "vector contraction operation"; 55 let description = [{ 56 Computes the sum of products of vector elements along contracting 57 dimension pairs from 2 vectors of rank M and N respectively, adds this 58 intermediate result to the accumulator argument of rank K, and returns a 59 vector result of rank K (where K = num_lhs_free_dims + num_rhs_free_dims + 60 num_batch_dims (see dimension type descriptions below)). For K = 0 (no 61 free or batch dimensions), the accumulator and output are a scalar. 62 63 Optional vector mask arguments (produced by CreateMaskOp or ConstantMaskOp) 64 specify the dynamic dimension sizes of valid data within the lhs/rhs vector 65 arguments. 66 67 An iterator type attribute list must be specified, where each element of 68 the list represents an iterator with one of the following types: 69 70 *) "reduction": reduction dimensions are present in the lhs and rhs 71 arguments but not in the output (and accumulator 72 argument). These are the dimensions along which the vector 73 contraction op computes the sum of products, and 74 contracting dimension pair dimension sizes must match 75 between lhs/rhs. 76 *) "parallel": Batch dimensions are iterator type "parallel", and 77 are non-contracting dimensions present in the lhs, rhs and 78 output. The lhs/rhs co-iterate along the batch dimensions, 79 which should be expressed in their indexing maps. 80 81 Free dimensions are iterator type "parallel", and are 82 non-contraction, non-batch dimensions accessed by either the 83 lhs or rhs (but not both). The lhs and rhs free dimensions 84 are unrelated to each other and do not co-iterate, which 85 should be expressed in their indexing maps. 86 87 An indexing map attribute list must be specified with an entry for lhs, rhs 88 and acc arguments. An indexing map attribute specifies a mapping from each 89 iterator in the iterator type list, to each dimension of an N-D vector. 90 91 Example: 92 93 ```mlir 94 // Simple DOT product (K = 0). 95 #contraction_accesses = [ 96 affine_map<(i) -> (i)>, 97 affine_map<(i) -> (i)>, 98 affine_map<(i) -> ()> 99 ] 100 #contraction_trait = { 101 indexing_maps = #contraction_accesses, 102 iterator_types = ["reduction"] 103 } 104 %3 = vector.contract #contraction_trait %0, %1, %2 105 : vector<10xf32>, vector<10xf32> into f32 106 107 // 2D vector contraction with one contracting dimension (matmul, K = 2). 108 #contraction_accesses = [ 109 affine_map<(i, j, k) -> (i, k)>, 110 affine_map<(i, j, k) -> (k, j)>, 111 affine_map<(i, j, k) -> (i, j)> 112 ] 113 #contraction_trait = { 114 indexing_maps = #contraction_accesses, 115 iterator_types = ["parallel", "parallel", "reduction"] 116 } 117 118 %3 = vector.contract #contraction_trait %0, %1, %2 119 : vector<4x3xf32>, vector<3x7xf32> into vector<4x7xf32> 120 121 // 4D to 3D vector contraction with two contracting dimensions and 122 // one batch dimension (K = 3). 123 #contraction_accesses = [ 124 affine_map<(b0, f0, f1, c0, c1) -> (c0, b0, c1, f0)>, 125 affine_map<(b0, f0, f1, c0, c1) -> (b0, c1, c0, f1)>, 126 affine_map<(b0, f0, f1, c0, c1) -> (b0, f0, f1)> 127 ] 128 #contraction_trait = { 129 indexing_maps = #contraction_accesses, 130 iterator_types = ["parallel", "parallel", "parallel", 131 "reduction", "reduction"] 132 } 133 134 %4 = vector.contract #contraction_trait %0, %1, %2 135 : vector<7x8x16x15xf32>, vector<8x16x7x5xf32> into vector<8x15x5xf32> 136 137 // 4D vector contraction with two contracting dimensions and optional 138 // vector mask arguments. 139 %lhs_mask = vector.constant_mask [7, 8, 16, 15] : vector<7x8x16x15xi1> 140 %rhs_mask = vector.constant_mask [8, 16, 7, 5] : vector<8x16x7x5xi1> 141 142 %5 = vector.contract #contraction_trait %0, %1, %2, %lhs_mask, %rhs_mask 143 : vector<7x8x16x15xf32>, vector<8x16x7x5xf32> into vector<8x15x8x5xf32> 144 145 // Vector contraction with mixed typed. lhs/rhs have different element 146 // types than accumulator/result. 147 %6 = vector.contract #contraction_trait %0, %1, %2 148 : vector<10xf16>, vector<10xf16> into f32 149 ``` 150 }]; 151 let builders = [ 152 OpBuilderDAG<(ins "Value":$lhs, "Value":$rhs, "Value":$acc, 153 "ArrayAttr":$indexingMaps, "ArrayAttr":$iteratorTypes)>, 154 OpBuilderDAG<(ins "Value":$lhs, "Value":$rhs, "Value":$acc, 155 "ArrayRef<ArrayRef<AffineExpr>>":$indexingExprs, 156 "ArrayRef<StringRef>":$iteratorTypes)> 157 ]; 158 let extraClassDeclaration = [{ 159 VectorType getLhsType() { 160 return lhs().getType().cast<VectorType>(); 161 } 162 VectorType getRhsType() { 163 return rhs().getType().cast<VectorType>(); 164 } 165 Type getAccType() { return acc().getType(); } 166 VectorType getLHSVectorMaskType() { 167 if (llvm::size(masks()) != 2) return VectorType(); 168 return getOperand(3).getType().cast<VectorType>(); 169 } 170 VectorType getRHSVectorMaskType() { 171 if (llvm::size(masks()) != 2) return VectorType(); 172 return getOperand(4).getType().cast<VectorType>(); 173 } 174 Type getResultType() { return getResult().getType(); } 175 ArrayRef<StringRef> getTraitAttrNames(); 176 SmallVector<AffineMap, 4> getIndexingMaps(); 177 static unsigned getAccOperandIndex() { return 2; } 178 179 // Returns the bounds of each dimension in the iteration space spanned 180 // by the iterator types of this operation. 181 void getIterationBounds(SmallVectorImpl<int64_t> &iterationBounds); 182 183 // Returns a list of index maps, where there is a list entry for each 184 // op indexing map attribute (i.e. one for each input and output, with 185 // the output listed last). Each index map, maps from this operations 186 // iteration space, to vector dimensions of the maps input/output. 187 void getIterationIndexMap( 188 std::vector<DenseMap<int64_t, int64_t>> &iterationIndexMap); 189 190 std::vector<std::pair<int64_t, int64_t>> getContractingDimMap(); 191 std::vector<std::pair<int64_t, int64_t>> getBatchDimMap(); 192 }]; 193} 194 195def Vector_ReductionOp : 196 Vector_Op<"reduction", [NoSideEffect, 197 PredOpTrait<"source operand and result have same element type", 198 TCresVTEtIsSameAsOpBase<0, 0>>]>, 199 Arguments<(ins StrAttr:$kind, AnyVector:$vector, Variadic<AnyType>:$acc)>, 200 Results<(outs AnyType:$dest)> { 201 let summary = "reduction operation"; 202 let description = [{ 203 Reduces an 1-D vector "horizontally" into a scalar using the given 204 operation (add/mul/min/max for int/fp and and/or/xor for int only). 205 Some reductions (add/mul for fp) also allow an optional fused 206 accumulator. 207 208 Note that these operations are restricted to 1-D vectors to remain 209 close to the corresponding LLVM intrinsics: 210 211 http://llvm.org/docs/LangRef.html#vector-reduction-intrinsics 212 213 Example: 214 215 ```mlir 216 %1 = vector.reduction "add", %0 : vector<16xf32> into f32 217 218 %3 = vector.reduction "xor", %2 : vector<4xi32> into i32 219 220 %4 = vector.reduction "mul", %0, %1 : vector<16xf32> into f32 221 ``` 222 }]; 223 let extraClassDeclaration = [{ 224 VectorType getVectorType() { 225 return vector().getType().cast<VectorType>(); 226 } 227 }]; 228} 229 230def Vector_BroadcastOp : 231 Vector_Op<"broadcast", [NoSideEffect, 232 PredOpTrait<"source operand and result have same element type", 233 TCresVTEtIsSameAsOpBase<0, 0>>]>, 234 Arguments<(ins AnyType:$source)>, 235 Results<(outs AnyVector:$vector)> { 236 let summary = "broadcast operation"; 237 let description = [{ 238 Broadcasts the scalar or k-D vector value in the source operand 239 to a n-D result vector such that the broadcast makes sense, i.e., 240 the source operand is duplicated to match the given rank and sizes 241 in the result vector. The legality rules are: 242 * the source operand must have the same element type as the result type 243 * a k-D vector <s_1 x .. x s_k x type> can be broadcast to 244 a n-D vector <t_1 x .. x t_n x type> if 245 * k <= n, and 246 * the sizes in the trailing dimensions n-k < i <= n with j=i+k-n 247 match exactly as s_j = t_i or s_j = 1: 248 ``` 249 t_1 x .. t_n-k x t_n-k+1 x .. x t_i x .. x t_n 250 s_1 x .. x s_j x .. x s_k 251 <duplication> <potential stretch> 252 ``` 253 The source operand is duplicated over all the missing leading dimensions 254 and stretched over the trailing dimensions where the source has a non-equal 255 dimension of 1. These rules imply that any scalar broadcast (k=0) to any 256 shaped vector with the same element type is always legal. 257 258 Example: 259 260 ```mlir 261 %0 = constant 0.0 : f32 262 %1 = vector.broadcast %0 : f32 to vector<16xf32> 263 %2 = vector.broadcast %1 : vector<16xf32> to vector<4x16xf32> 264 ``` 265 }]; 266 let extraClassDeclaration = [{ 267 Type getSourceType() { return source().getType(); } 268 VectorType getVectorType() { 269 return vector().getType().cast<VectorType>(); 270 } 271 }]; 272 let assemblyFormat = "$source attr-dict `:` type($source) `to` type($vector)"; 273 let hasFolder = 1; 274} 275 276def Vector_ShuffleOp : 277 Vector_Op<"shuffle", [NoSideEffect, 278 PredOpTrait<"first operand v1 and result have same element type", 279 TCresVTEtIsSameAsOpBase<0, 0>>, 280 PredOpTrait<"second operand v2 and result have same element type", 281 TCresVTEtIsSameAsOpBase<0, 1>>]>, 282 Arguments<(ins AnyVector:$v1, AnyVector:$v2, I64ArrayAttr:$mask)>, 283 Results<(outs AnyVector:$vector)> { 284 let summary = "shuffle operation"; 285 let description = [{ 286 The shuffle operation constructs a permutation (or duplication) of elements 287 from two input vectors, returning a vector with the same element type as 288 the input and a length that is the same as the shuffle mask. The two input 289 vectors must have the same element type, rank, and trailing dimension sizes 290 and shuffles their values in the leading dimension (which may differ in size) 291 according to the given mask. The legality rules are: 292 * the two operands must have the same element type as the result 293 * the two operands and the result must have the same rank and trailing 294 dimension sizes, viz. given two k-D operands 295 v1 : <s_1 x s_2 x .. x s_k x type> and 296 v2 : <t_1 x t_2 x .. x t_k x type> 297 we have s_i = t_i for all 1 < i <= k 298 * the mask length equals the leading dimension size of the result 299 * numbering the input vector indices left to right across the operands, all 300 mask values must be within range, viz. given two k-D operands v1 and v2 301 above, all mask values are in the range [0,s_1+t_1) 302 303 Example: 304 305 ```mlir 306 %0 = vector.shuffle %a, %b[0, 3] 307 : vector<2xf32>, vector<2xf32> ; yields vector<2xf32> 308 %1 = vector.shuffle %c, %b[0, 1, 2] 309 : vector<2x16xf32>, vector<1x16xf32> ; yields vector<3x16xf32> 310 %2 = vector.shuffle %a, %b[3, 2, 1, 0] 311 : vector<2xf32>, vector<2xf32> ; yields vector<4xf32> 312 ``` 313 }]; 314 let builders = [ 315 OpBuilderDAG<(ins "Value":$v1, "Value":$v2, "ArrayRef<int64_t>")> 316 ]; 317 let extraClassDeclaration = [{ 318 static StringRef getMaskAttrName() { return "mask"; } 319 VectorType getV1VectorType() { 320 return v1().getType().cast<VectorType>(); 321 } 322 VectorType getV2VectorType() { 323 return v2().getType().cast<VectorType>(); 324 } 325 VectorType getVectorType() { 326 return vector().getType().cast<VectorType>(); 327 } 328 }]; 329} 330 331def Vector_ExtractElementOp : 332 Vector_Op<"extractelement", [NoSideEffect, 333 TypesMatchWith<"result type matches element type of vector operand", 334 "vector", "result", 335 "$_self.cast<ShapedType>().getElementType()">]>, 336 Arguments<(ins AnyVector:$vector, AnySignlessInteger:$position)>, 337 Results<(outs AnyType:$result)> { 338 let summary = "extractelement operation"; 339 let description = [{ 340 Takes an 1-D vector and a dynamic index position and extracts the 341 scalar at that position. Note that this instruction resembles 342 vector.extract, but is restricted to 1-D vectors and relaxed 343 to dynamic indices. It is meant to be closer to LLVM's version: 344 https://llvm.org/docs/LangRef.html#extractelement-instruction 345 346 Example: 347 348 ```mlir 349 %c = constant 15 : i32 350 %1 = vector.extractelement %0[%c : i32]: vector<16xf32> 351 ``` 352 }]; 353 let assemblyFormat = [{ 354 $vector `[` $position `:` type($position) `]` attr-dict `:` type($vector) 355 }]; 356 357 let builders = [ 358 OpBuilderDAG<(ins "Value":$source, "int64_t":$position)>, 359 OpBuilderDAG<(ins "Value":$source, "Value":$position)> 360 ]; 361 let extraClassDeclaration = [{ 362 VectorType getVectorType() { 363 return vector().getType().cast<VectorType>(); 364 } 365 }]; 366} 367 368def Vector_ExtractOp : 369 Vector_Op<"extract", [NoSideEffect, 370 PredOpTrait<"operand and result have same element type", 371 TCresVTEtIsSameAsOpBase<0, 0>>]>, 372 Arguments<(ins AnyVector:$vector, I64ArrayAttr:$position)>, 373 Results<(outs AnyType)> { 374 let summary = "extract operation"; 375 let description = [{ 376 Takes an n-D vector and a k-D position and extracts the (n-k)-D vector at 377 the proper position. Degenerates to an element type in the 0-D case. 378 379 Example: 380 381 ```mlir 382 %1 = vector.extract %0[3]: vector<4x8x16xf32> 383 %2 = vector.extract %0[3, 3, 3]: vector<4x8x16xf32> 384 ``` 385 }]; 386 let builders = [ 387 OpBuilderDAG<(ins "Value":$source, "ArrayRef<int64_t>":$position)>, 388 // Convenience builder which assumes the values in `position` are defined by 389 // ConstantIndexOp. 390 OpBuilderDAG<(ins "Value":$source, "ValueRange":$position)> 391 ]; 392 let extraClassDeclaration = [{ 393 static StringRef getPositionAttrName() { return "position"; } 394 VectorType getVectorType() { 395 return vector().getType().cast<VectorType>(); 396 } 397 }]; 398 let hasFolder = 1; 399} 400 401def Vector_ExtractSlicesOp : 402 Vector_Op<"extract_slices", [NoSideEffect]>, 403 Arguments<(ins AnyVector:$vector, I64ArrayAttr:$sizes, 404 I64ArrayAttr:$strides)>, 405 Results<(outs TupleOf<[AnyVector]>)> { 406 let summary = "vector extract slices operation"; 407 let description = [{ 408 Takes an N-d vector and returns a tuple of vector slices of 'vector', 409 based on 'sizes' and 'strides' parameters. 410 411 The arguments 'sizes' and 'strides' represent a specification for 412 generating the unrolling of 'vector' shape, which has all slices of shape 413 'sizes' except for slices at dimension boundaries when 'vector' dimension 414 sizes are not a multiple of 'sizes'. 415 416 Each slice is returned at the tuple element index corresponding to the 417 linear index of the slice w.r.t the unrolling scheme represented by 'sizes'. 418 Currently, only unit strides are supported. 419 420 Example: 421 422 ```mlir 423 %0 = vector.transfer_read ...: vector<4x2xf32> 424 425 %1 = vector.extract_slices %0, [2, 2], [1, 1] 426 : vector<4x2xf32> into tuple<vector<2x2xf32>, vector<2x2xf32>> 427 428 // Example with partial slices at dimension boundaries. 429 %2 = vector.transfer_read ...: vector<4x3xf32> 430 431 %3 = vector.extract_slices %2, [2, 2], [1, 1] 432 : vector<4x3xf32> into tuple<vector<2x2xf32>, vector<2x1xf32>, 433 vector<2x2xf32>, vector<2x1xf32>> 434 ``` 435 }]; 436 let builders = [ 437 OpBuilderDAG<(ins "TupleType":$tupleType, "Value":$vector, 438 "ArrayRef<int64_t>":$sizes, "ArrayRef<int64_t>":$strides)> 439 ]; 440 let extraClassDeclaration = [{ 441 VectorType getSourceVectorType() { 442 return vector().getType().cast<VectorType>(); 443 } 444 TupleType getResultTupleType() { 445 return getResult().getType().cast<TupleType>(); 446 } 447 void getSizes(SmallVectorImpl<int64_t> &results); 448 void getStrides(SmallVectorImpl<int64_t> &results); 449 static StringRef getSizesAttrName() { return "sizes"; } 450 static StringRef getStridesAttrName() { return "strides"; } 451 }]; 452 let assemblyFormat = [{ 453 $vector `,` $sizes `,` $strides attr-dict `:` type($vector) `into` 454 type(results) 455 }]; 456} 457 458def Vector_ExtractMapOp : 459 Vector_Op<"extract_map", [NoSideEffect]>, 460 Arguments<(ins AnyVector:$vector, Variadic<Index>:$ids)>, 461 Results<(outs AnyVector)> { 462 let summary = "vector extract map operation"; 463 let description = [{ 464 Takes an N-D vector and extracts a sub-part of the vector starting at id 465 along each dimension. 466 467 The dimension associated to each element of `ids` used to extract are 468 implicitly deduced from the the destination type. For each dimension the 469 multiplicity is the destination dimension size divided by the source 470 dimension size, each dimension with a multiplicity greater than 1 is 471 associated to the next id, following ids order. 472 For example if the source type is `vector<64x4x32xf32>` and the destination 473 type is `vector<4x4x2xf32>`, the first id maps to dimension 0 and the second 474 id to dimension 2. 475 476 Similarly to vector.tuple_get, this operation is used for progressive 477 lowering and should be folded away before converting to LLVM. 478 479 It is different than `vector.extract_slice` and 480 `vector.extract_strided_slice` as it takes a Value as index instead of an 481 attribute. Also in the future it is meant to support extracting along any 482 dimensions and not only the most major ones. 483 484 For instance: 485 ``` 486 // dynamic computation producing the value 0 of index type 487 %idx0 = ... : index 488 // dynamic computation producing the value 1 of index type 489 %idx1 = ... : index 490 %0 = constant dense<0, 1, 2, 3>: vector<4xi32> 491 // extracts values [0, 1] 492 %1 = vector.extract_map %0[%idx0] : vector<4xi32> to vector<2xi32> 493 // extracts values [1, 2] 494 %2 = vector.extract_map %0[%idx1] : vector<4xi32> to vector<2xi32> 495 ``` 496 497 Example: 498 499 ```mlir 500 %ev = vector.extract_map %v[%id] : vector<32xf32> to vector<1xf32> 501 %ev1 = vector.extract_map %v1[%id1, %id2] : vector<64x4x32xf32> 502 to vector<4x4x2xf32> 503 ``` 504 }]; 505 let builders = [ 506 OpBuilderDAG<(ins "Value":$vector, "ValueRange":$ids, 507 "ArrayRef<int64_t>":$multiplicity, 508 "AffineMap":$map)>]; 509 let extraClassDeclaration = [{ 510 VectorType getSourceVectorType() { 511 return vector().getType().cast<VectorType>(); 512 } 513 VectorType getResultType() { 514 return getResult().getType().cast<VectorType>(); 515 } 516 void getMultiplicity(SmallVectorImpl<int64_t> &multiplicity); 517 AffineMap map(); 518 }]; 519 let assemblyFormat = [{ 520 $vector `[` $ids `]` attr-dict `:` type($vector) `to` type(results) 521 }]; 522 523 let hasFolder = 1; 524} 525 526def Vector_FMAOp : 527 Op<Vector_Dialect, "fma", [NoSideEffect, 528 AllTypesMatch<["lhs", "rhs", "acc", "result"]>]>, 529 Arguments<(ins AnyVector:$lhs, AnyVector:$rhs, AnyVector:$acc)>, 530 Results<(outs AnyVector:$result)> { 531 let summary = "vector fused multiply-add"; 532 let description = [{ 533 Multiply-add expressions operate on n-D vectors and compute a fused 534 pointwise multiply-and-accumulate: `$result = `$lhs * $rhs + $acc`. 535 All operands and result have the same vector type. The semantics 536 of the operation correspond to those of the `llvm.fma` 537 [intrinsic](https://llvm.org/docs/LangRef.html#int-fma). In the 538 particular case of lowering to LLVM, this is guaranteed to lower 539 to the `llvm.fma.*` intrinsic. 540 541 Example: 542 543 ```mlir 544 %3 = vector.fma %0, %1, %2: vector<8x16xf32> 545 ``` 546 }]; 547 // Fully specified by traits. 548 let verifier = ?; 549 let assemblyFormat = "$lhs `,` $rhs `,` $acc attr-dict `:` type($lhs)"; 550 let builders = [ 551 OpBuilderDAG<(ins "Value":$lhs, "Value":$rhs, "Value":$acc), 552 [{build($_builder, $_state, lhs.getType(), lhs, rhs, acc);}]> 553 ]; 554 let extraClassDeclaration = [{ 555 VectorType getVectorType() { return lhs().getType().cast<VectorType>(); } 556 }]; 557} 558 559def Vector_InsertElementOp : 560 Vector_Op<"insertelement", [NoSideEffect, 561 TypesMatchWith<"source operand type matches element type of result", 562 "result", "source", 563 "$_self.cast<ShapedType>().getElementType()">, 564 AllTypesMatch<["dest", "result"]>]>, 565 Arguments<(ins AnyType:$source, AnyVector:$dest, 566 AnySignlessInteger:$position)>, 567 Results<(outs AnyVector:$result)> { 568 let summary = "insertelement operation"; 569 let description = [{ 570 Takes a scalar source, an 1-D destination vector and a dynamic index 571 position and inserts the source into the destination at the proper 572 position. Note that this instruction resembles vector.insert, but 573 is restricted to 1-D vectors and relaxed to dynamic indices. It is 574 meant to be closer to LLVM's version: 575 https://llvm.org/docs/LangRef.html#insertelement-instruction 576 577 Example: 578 579 ```mlir 580 %c = constant 15 : i32 581 %f = constant 0.0f : f32 582 %1 = vector.insertelement %f, %0[%c : i32]: vector<16xf32> 583 ``` 584 }]; 585 let assemblyFormat = [{ 586 $source `,` $dest `[` $position `:` type($position) `]` attr-dict `:` 587 type($result) 588 }]; 589 590 let builders = [ 591 OpBuilderDAG<(ins "Value":$source, "Value":$dest, "int64_t":$position)>, 592 OpBuilderDAG<(ins "Value":$source, "Value":$dest, "Value":$position)> 593 ]; 594 let extraClassDeclaration = [{ 595 Type getSourceType() { return source().getType(); } 596 VectorType getDestVectorType() { 597 return dest().getType().cast<VectorType>(); 598 } 599 }]; 600 601} 602 603def Vector_InsertOp : 604 Vector_Op<"insert", [NoSideEffect, 605 PredOpTrait<"source operand and result have same element type", 606 TCresVTEtIsSameAsOpBase<0, 0>>, 607 AllTypesMatch<["dest", "res"]>]>, 608 Arguments<(ins AnyType:$source, AnyVector:$dest, I64ArrayAttr:$position)>, 609 Results<(outs AnyVector:$res)> { 610 let summary = "insert operation"; 611 let description = [{ 612 Takes an n-D source vector, an (n+k)-D destination vector and a k-D position 613 and inserts the n-D source into the (n+k)-D destination at the proper 614 position. Degenerates to a scalar source type when n = 0. 615 616 Example: 617 618 ```mlir 619 %2 = vector.insert %0, %1[3] : vector<8x16xf32> into vector<4x8x16xf32> 620 %5 = vector.insert %3, %4[3, 3, 3] : f32 into vector<4x8x16xf32> 621 ``` 622 }]; 623 let assemblyFormat = [{ 624 $source `,` $dest $position attr-dict `:` type($source) `into` type($dest) 625 }]; 626 627 let builders = [ 628 OpBuilderDAG<(ins "Value":$source, "Value":$dest, 629 "ArrayRef<int64_t>":$position)>, 630 // Convenience builder which assumes all values are constant indices. 631 OpBuilderDAG<(ins "Value":$source, "Value":$dest, "ValueRange":$position)> 632 ]; 633 let extraClassDeclaration = [{ 634 static StringRef getPositionAttrName() { return "position"; } 635 Type getSourceType() { return source().getType(); } 636 VectorType getDestVectorType() { 637 return dest().getType().cast<VectorType>(); 638 } 639 }]; 640} 641 642def Vector_InsertSlicesOp : 643 Vector_Op<"insert_slices", [NoSideEffect]>, 644 Arguments<(ins TupleOf<[AnyVector]>:$vectors, I64ArrayAttr:$sizes, 645 I64ArrayAttr:$strides)>, 646 Results<(outs AnyVector)> { 647 let summary = "vector insert slices operation"; 648 let description = [{ 649 Takes a tuple of vector slices and inserts them into the vector result 650 according to the 'sizes' and 'strides' parameters. 651 652 The arguments 'sizes' and 'strides' represent a specification for 653 generating the unrolling of 'vector' shape, which has all slices of shape 654 'sizes' except for slices at dimension boundaries when 'vector' dimension 655 sizes are not a multiple of 'sizes'. 656 657 Each slice in 'vectors' is at the tuple element index corresponding to the 658 linear index of the slice w.r.t the unrolling scheme represented by 'sizes'. 659 Currently, only unit strides are supported. 660 661 Example: 662 663 ```mlir 664 %0 = vector.extract_slices %0, [2, 2], [1, 1] 665 : vector<4x2xf32> into tuple<vector<2x2xf32>, vector<2x2xf32>> 666 667 %1 = vector.insert_slices %0, [2, 2], [1, 1] 668 : tuple<vector<2x2xf32>, vector<2x2xf32>> into vector<4x2xf32> 669 670 // Example with partial slices at dimension boundaries. 671 %3 = vector.extract_slices %2, [2, 2], [1, 1] 672 : vector<4x3xf32> into tuple<vector<2x2xf32>, vector<2x1xf32>, 673 vector<2x2xf32>, vector<2x1xf32>> 674 675 %4 = vector.insert_slices %3, [2, 2], [1, 1] 676 : tuple<vector<2x2xf32>, vector<2x1xf32>, 677 vector<2x2xf32>, vector<2x1xf32>> into vector<4x3xf32> 678 ``` 679 }]; 680 681 let extraClassDeclaration = [{ 682 TupleType getSourceTupleType() { 683 return vectors().getType().cast<TupleType>(); 684 } 685 VectorType getResultVectorType() { 686 return getResult().getType().cast<VectorType>(); 687 } 688 void getSizes(SmallVectorImpl<int64_t> &results); 689 void getStrides(SmallVectorImpl<int64_t> &results); 690 static StringRef getSizesAttrName() { return "sizes"; } 691 static StringRef getStridesAttrName() { return "strides"; } 692 }]; 693 let assemblyFormat = [{ 694 $vectors `,` $sizes `,` $strides attr-dict `:` type($vectors) `into` 695 type(results) 696 }]; 697} 698 699def Vector_InsertMapOp : 700 Vector_Op<"insert_map", [NoSideEffect, AllTypesMatch<["dest", "result"]>]>, 701 Arguments<(ins AnyVector:$vector, AnyVector:$dest, Variadic<Index>:$ids)>, 702 Results<(outs AnyVector:$result)> { 703 let summary = "vector insert map operation"; 704 let description = [{ 705 Inserts a N-D vector and within a larger vector starting at id. The new 706 vector created will have the same size as the destination operand vector. 707 708 The dimension associated to each element of `ids` used to insert is 709 implicitly deduced from the source type (see `ExtractMapOp` for details). 710 For example if source type is `vector<4x4x2xf32>` and the destination type 711 is `vector<64x4x32xf32>`, the first id maps to dimension 0 and the second id 712 to dimension 2. 713 714 Similarly to vector.tuple_get, this operation is used for progressive 715 lowering and should be folded away before converting to LLVM. 716 717 It is different than `vector.insert` and `vector.insert_strided_slice` as it 718 takes a Value as index instead of an attribute. Also in the future it is 719 meant to support inserting along any dimensions and not only the most major 720 ones. 721 722 This operations is meant to be used in combination with vector.extract_map. 723 724 For instance: 725 ``` 726 // dynamic computation producing the value 0 of index type 727 %idx0 = ... : index 728 // dynamic computation producing the value 1 of index type 729 %idx1 = ... : index / 730 %0 = constant dense<0, 1, 2, 3>: vector<4xi32> 731 // extracts values [0, 1] 732 %1 = vector.extract_map %0[%idx0] : vector<4xi32> to vector<2xi32> 733 // extracts values [1, 2] 734 %2 = vector.extract_map %0[%idx1] : vector<4xi32> to vector<2xi32> 735 // insert [0, 1] into [x, x, x, x] and produce [0, 1, x, x] 736 %3 = vector.insert_map %1, %0[%idx0] : vector<2xi32> into vector<4xi32> 737 // insert [1, 2] into [x, x, x, x] and produce [x, 1, 2, x] 738 %4 = vector.insert_map %2, %0[%idx1] : vector<2xi32> into vector<4xi32> 739 ``` 740 Example: 741 742 ```mlir 743 %v = vector.insert_map %ev %v[%id] : vector<1xf32> into vector<32xf32> 744 %v1 = vector.insert_map %ev1, %v1[%arg0, %arg1] : vector<2x4x1xf32> 745 into vector<64x4x32xf32> 746 ``` 747 }]; 748 let builders = [OpBuilderDAG<(ins "Value":$vector, "Value":$dest, 749 "ValueRange":$ids)>]; 750 let extraClassDeclaration = [{ 751 VectorType getSourceVectorType() { 752 return vector().getType().cast<VectorType>(); 753 } 754 VectorType getResultType() { 755 return getResult().getType().cast<VectorType>(); 756 } 757 // Return a map indicating the dimension mapping to the given ids. 758 AffineMap map(); 759 }]; 760 let assemblyFormat = [{ 761 $vector `,` $dest `[` $ids `]` attr-dict 762 `:` type($vector) `into` type($result) 763 }]; 764} 765 766def Vector_InsertStridedSliceOp : 767 Vector_Op<"insert_strided_slice", [NoSideEffect, 768 PredOpTrait<"operand #0 and result have same element type", 769 TCresVTEtIsSameAsOpBase<0, 0>>, 770 AllTypesMatch<["dest", "res"]>]>, 771 Arguments<(ins AnyVector:$source, AnyVector:$dest, I64ArrayAttr:$offsets, 772 I64ArrayAttr:$strides)>, 773 Results<(outs AnyVector:$res)> { 774 let summary = "strided_slice operation"; 775 let description = [{ 776 Takes a k-D source vector, an n-D destination vector (n >= k), n-sized 777 `offsets` integer array attribute, a k-sized `strides` integer array attribute 778 and inserts the k-D source vector as a strided subvector at the proper offset 779 into the n-D destination vector. 780 781 At the moment strides must contain only 1s. 782 783 Returns an n-D vector that is a copy of the n-D destination vector in which 784 the last k-D dimensions contain the k-D source vector elements strided at 785 the proper location as specified by the offsets. 786 787 Example: 788 789 ```mlir 790 %2 = vector.insert_strided_slice %0, %1 791 {offsets = [0, 0, 2], strides = [1, 1]}: 792 vector<2x4xf32> into vector<16x4x8xf32> 793 ``` 794 }]; 795 796 let assemblyFormat = [{ 797 $source `,` $dest attr-dict `:` type($source) `into` type($dest) 798 }]; 799 800 let builders = [ 801 OpBuilderDAG<(ins "Value":$source, "Value":$dest, 802 "ArrayRef<int64_t>":$offsets, "ArrayRef<int64_t>":$strides)> 803 ]; 804 let extraClassDeclaration = [{ 805 static StringRef getOffsetsAttrName() { return "offsets"; } 806 static StringRef getStridesAttrName() { return "strides"; } 807 VectorType getSourceVectorType() { 808 return source().getType().cast<VectorType>(); 809 } 810 VectorType getDestVectorType() { 811 return dest().getType().cast<VectorType>(); 812 } 813 }]; 814} 815 816def Vector_OuterProductOp : 817 Vector_Op<"outerproduct", [NoSideEffect, 818 PredOpTrait<"lhs operand and result have same element type", 819 TCresVTEtIsSameAsOpBase<0, 0>>, 820 PredOpTrait<"rhs operand and result have same element type", 821 TCresVTEtIsSameAsOpBase<0, 1>>]>, 822 Arguments<(ins AnyVector:$lhs, AnyType:$rhs, Variadic<AnyVector>:$acc)>, 823 Results<(outs AnyVector)> { 824 let summary = "vector outerproduct with optional fused add"; 825 let description = [{ 826 Takes 2 1-D vectors and returns the 2-D vector containing the outer-product, 827 as illustrated below: 828 ``` 829 outer | [c, d] 830 ------+------------ 831 [a, | [ [a*c, a*d], 832 b] | [b*c, b*d] ] 833 ``` 834 This operation also accepts a 1-D vector lhs and a scalar rhs. In this 835 case a simple AXPY operation is performed, which returns a 1-D vector. 836 ``` 837 [a, b] * c = [a*c, b*c] 838 ``` 839 840 An optional extra vector argument with the same shape as the output 841 vector may be specified in which case the operation returns the sum of 842 the outer-product and the extra vector. In this multiply-accumulate 843 scenario for floating-point arguments, the rounding mode is enforced 844 by guaranteeing that a fused-multiply add operation is emitted. When 845 lowered to the LLVMIR dialect, this form emits `llvm.intr.fma`, which 846 is guaranteed to lower to actual `fma` instructions on x86. 847 848 Example: 849 850 ``` 851 %2 = vector.outerproduct %0, %1: vector<4xf32>, vector<8xf32> 852 return %2: vector<4x8xf32> 853 854 %3 = vector.outerproduct %0, %1, %2: 855 vector<4xf32>, vector<8xf32>, vector<4x8xf32> 856 return %3: vector<4x8xf32> 857 858 %6 = vector.outerproduct %4, %5: vector<10xf32>, f32 859 return %6: vector<10xf32> 860 861 ``` 862 }]; 863 let builders = [ 864 // Build an op without mask, use the type of `acc` as the return type. 865 OpBuilderDAG<(ins "Value":$lhs, "Value":$rhs, "Value":$acc)> 866 ]; 867 let extraClassDeclaration = [{ 868 VectorType getOperandVectorTypeLHS() { 869 return lhs().getType().cast<VectorType>(); 870 } 871 Type getOperandTypeRHS() { 872 return rhs().getType(); 873 } 874 VectorType getOperandVectorTypeACC() { 875 return (llvm::size(acc()) == 0) 876 ? VectorType() 877 : (*acc().begin()).getType().cast<VectorType>(); 878 } 879 VectorType getVectorType() { 880 return getResult().getType().cast<VectorType>(); 881 } 882 }]; 883} 884 885// TODO: Add transformation which decomposes ReshapeOp into an optimized 886// sequence of vector rotate/shuffle/select operations. 887def Vector_ReshapeOp : 888 Vector_Op<"reshape", [AttrSizedOperandSegments, NoSideEffect]>, 889 Arguments<(ins AnyVector:$vector, Variadic<Index>:$input_shape, 890 Variadic<Index>:$output_shape, 891 I64ArrayAttr:$fixed_vector_sizes)>, 892 Results<(outs AnyVector:$result)> { 893 let summary = "vector reshape operation"; 894 let description = [{ 895 Reshapes its vector operand from 'input_shape' to 'output_shape' maintaining 896 fixed vector dimension 'fixed_vector_sizes' on the innermost vector 897 dimensions. 898 899 The parameters 'input_shape' and 'output_shape' represent valid data shapes 900 across fixed vector shapes. For example, if a vector has a valid data 901 shape [6] with fixed vector size [8], then the valid data elements are 902 assumed to be stored at the beginning of the vector with the remaining 903 vector elements undefined. 904 905 In the examples below, valid data elements are represented by an alphabetic 906 character, and undefined data elements are represented by '-'. 907 908 Example 909 910 vector<1x8xf32> with valid data shape [6], fixed vector sizes [8] 911 912 input: [a, b, c, d, e, f] 913 914 layout map: (d0) -> (d0 floordiv 8, d0 mod 8) 915 916 vector layout: [a, b, c, d, e, f, -, -] 917 918 Example 919 920 vector<2x8xf32> with valid data shape [10], fixed vector sizes [8] 921 922 input: [a, b, c, d, e, f, g, h, i, j] 923 924 layout map: (d0) -> (d0 floordiv 8, d0 mod 8) 925 926 vector layout: [[a, b, c, d, e, f, g, h], 927 [i, j, -, -, -, -, -, -]] 928 929 Example 930 931 vector<2x2x2x3xf32> with valid data shape [3, 5], fixed vector sizes 932 [2, 3] 933 934 input: [[a, b, c, d, e], 935 [f, g, h, i, j], 936 [k, l, m, n, o]] 937 938 layout map: (d0, d1) -> (d0 floordiv 3, d1 floordiv 5, 939 d0 mod 3, d1 mod 5) 940 941 vector layout: [[[[a, b, c], 942 [f, g, h]] 943 [[d, e, -], 944 [i, j, -]]], 945 [[[k, l, m], 946 [-, -, -]] 947 [[n, o, -], 948 [-, -, -]]]] 949 950 Example 951 952 %1 = vector.reshape %0, [%c3, %c6], [%c2, %c9], [4] 953 : vector<3x2x4xf32> to vector<2x3x4xf32> 954 955 input: [[a, b, c, d, e, f], 956 [g, h, i, j, k, l], 957 [m, n, o, p, q, r]] 958 959 layout map: (d0, d1) -> (d0, d1 floordiv 4, d1 mod 4) 960 961 962 Input vector: [[[a, b, c, d], 963 [e, f, -, -]], 964 [[g, h, i, j], 965 [k, l, -, -]], 966 [[m, n, o, p], 967 [q, r, -, -]]] 968 969 Output vector: [[[a, b, c, d], 970 [e, f, g, h], 971 [i, -, -, -]], 972 [[j, k, l, m], 973 [n, o, p, q], 974 [r, -, -, -]]] 975 }]; 976 977 let extraClassDeclaration = [{ 978 VectorType getInputVectorType() { 979 return vector().getType().cast<VectorType>(); 980 } 981 VectorType getOutputVectorType() { 982 return getResult().getType().cast<VectorType>(); 983 } 984 985 /// Returns as integer value the number of input shape operands. 986 int64_t getNumInputShapeSizes() { return input_shape().size(); } 987 988 /// Returns as integer value the number of output shape operands. 989 int64_t getNumOutputShapeSizes() { return output_shape().size(); } 990 991 void getFixedVectorSizes(SmallVectorImpl<int64_t> &results); 992 993 static StringRef getFixedVectorSizesAttrName() { 994 return "fixed_vector_sizes"; 995 } 996 static StringRef getInputShapeAttrName() { return "input_shape"; } 997 static StringRef getOutputShapeAttrName() { return "output_shape"; } 998 }]; 999 1000 let assemblyFormat = [{ 1001 $vector `,` `[` $input_shape `]` `,` `[` $output_shape `]` `,` 1002 $fixed_vector_sizes attr-dict `:` type($vector) `to` type($result) 1003 }]; 1004} 1005 1006def Vector_ExtractStridedSliceOp : 1007 Vector_Op<"extract_strided_slice", [NoSideEffect, 1008 PredOpTrait<"operand and result have same element type", 1009 TCresVTEtIsSameAsOpBase<0, 0>>]>, 1010 Arguments<(ins AnyVector:$vector, I64ArrayAttr:$offsets, 1011 I64ArrayAttr:$sizes, I64ArrayAttr:$strides)>, 1012 Results<(outs AnyVector)> { 1013 let summary = "extract_strided_slice operation"; 1014 let description = [{ 1015 Takes an n-D vector, k-D `offsets` integer array attribute, a k-sized 1016 `sizes` integer array attribute, a k-sized `strides` integer array 1017 attribute and extracts the n-D subvector at the proper offset. 1018 1019 At the moment strides must contain only 1s. 1020 // TODO: support non-1 strides. 1021 1022 Returns an n-D vector where the first k-D dimensions match the `sizes` 1023 attribute. The returned subvector contains the elements starting at offset 1024 `offsets` and ending at `offsets + sizes`. 1025 1026 Example: 1027 1028 ```mlir 1029 %1 = vector.extract_strided_slice %0 1030 {offsets = [0, 2], sizes = [2, 4], strides = [1, 1]}: 1031 vector<4x8x16xf32> to vector<2x4x16xf32> 1032 1033 // TODO: Evolve to a range form syntax similar to: 1034 %1 = vector.extract_strided_slice %0[0:2:1][2:4:1] 1035 vector<4x8x16xf32> to vector<2x4x16xf32> 1036 ``` 1037 }]; 1038 let builders = [ 1039 OpBuilderDAG<(ins "Value":$source, "ArrayRef<int64_t>":$offsets, 1040 "ArrayRef<int64_t>":$sizes, "ArrayRef<int64_t>":$strides)> 1041 ]; 1042 let extraClassDeclaration = [{ 1043 static StringRef getOffsetsAttrName() { return "offsets"; } 1044 static StringRef getSizesAttrName() { return "sizes"; } 1045 static StringRef getStridesAttrName() { return "strides"; } 1046 VectorType getVectorType(){ return vector().getType().cast<VectorType>(); } 1047 void getOffsets(SmallVectorImpl<int64_t> &results); 1048 }]; 1049 let hasCanonicalizer = 1; 1050 let hasFolder = 1; 1051 let assemblyFormat = "$vector attr-dict `:` type($vector) `to` type(results)"; 1052} 1053 1054def Vector_TransferReadOp : 1055 Vector_Op<"transfer_read", [ 1056 DeclareOpInterfaceMethods<VectorTransferOpInterface>, 1057 DeclareOpInterfaceMethods<VectorUnrollOpInterface, ["getShapeForUnroll"]> 1058 ]>, 1059 Arguments<(ins AnyMemRef:$memref, Variadic<Index>:$indices, 1060 AffineMapAttr:$permutation_map, AnyType:$padding, 1061 OptionalAttr<BoolArrayAttr>:$masked)>, 1062 Results<(outs AnyVector:$vector)> { 1063 1064 let summary = "Reads a supervector from memory into an SSA vector value."; 1065 1066 let description = [{ 1067 The `vector.transfer_read` op performs a read from a slice within a 1068 [MemRef](../LangRef.md#memref-type) supplied as its first operand 1069 into a [vector](../LangRef.md#vector-type) of the same base elemental type. 1070 1071 A memref operand with vector element type, must have its vector element 1072 type match a suffix (shape and element type) of the vector (e.g. 1073 memref<3x2x6x4x3xf32>, vector<1x1x4x3xf32>). 1074 1075 The slice is further defined by a full-rank index within the MemRef, 1076 supplied as the operands `2 .. 1 + rank(memref)`. 1077 1078 The permutation_map [attribute](../LangRef.md#attributes) is an 1079 [affine-map](Affine.md#affine-maps) which specifies the transposition on the 1080 slice to match the vector shape. The permutation map may be implicit and 1081 omitted from parsing and printing if it is the canonical minor identity map 1082 (i.e. if it does not permute or broadcast any dimension). 1083 1084 The size of the slice is specified by the size of the vector, given as the 1085 return type. 1086 1087 An `ssa-value` of the same elemental type as the MemRef is provided as the 1088 last operand to specify padding in the case of out-of-bounds accesses. 1089 1090 An optional boolean array attribute is provided to specify which dimensions 1091 of the transfer need masking. When a dimension is specified as not requiring 1092 masking, the `vector.transfer_read` may be lowered to simple loads. The 1093 absence of this `masked` attribute signifies that all dimensions of the 1094 transfer need to be masked. 1095 1096 This operation is called 'read' by opposition to 'load' because the 1097 super-vector granularity is generally not representable with a single 1098 hardware register. A `vector.transfer_read` is thus a mid-level abstraction 1099 that supports super-vectorization with non-effecting padding for full-tile 1100 only operations. 1101 1102 More precisely, let's dive deeper into the permutation_map for the following 1103 MLIR: 1104 1105 ```mlir 1106 vector.transfer_read %A[%expr1, %expr2, %expr3, %expr4] 1107 { permutation_map : (d0,d1,d2,d3) -> (d2,0,d0) } : 1108 memref<?x?x?x?xf32>, vector<3x4x5xf32> 1109 ``` 1110 1111 This operation always reads a slice starting at `%A[%expr1, %expr2, %expr3, 1112 %expr4]`. The size of the slice is 3 along d2 and 5 along d0, so the slice 1113 is: `%A[%expr1 : %expr1 + 5, %expr2, %expr3:%expr3 + 3, %expr4]` 1114 1115 That slice needs to be read into a `vector<3x4x5xf32>`. Since the 1116 permutation map is not full rank, there must be a broadcast along vector 1117 dimension `1`. 1118 1119 A notional lowering of vector.transfer_read could generate code resembling: 1120 1121 ```mlir 1122 // %expr1, %expr2, %expr3, %expr4 defined before this point 1123 %tmp = alloc() : vector<3x4x5xf32> 1124 %view_in_tmp = "element_type_cast"(%tmp) : memref<1xvector<3x4x5xf32>> 1125 for %i = 0 to 3 { 1126 affine.for %j = 0 to 4 { 1127 affine.for %k = 0 to 5 { 1128 %a = load %A[%expr1 + %k, %expr2, %expr3 + %i, %expr4] : 1129 memref<?x?x?x?xf32> 1130 store %tmp[%i, %j, %k] : vector<3x4x5xf32> 1131 }}} 1132 %c0 = constant 0 : index 1133 %vec = load %view_in_tmp[%c0] : vector<3x4x5xf32> 1134 ``` 1135 1136 On a GPU one could then map `i`, `j`, `k` to blocks and threads. Notice that 1137 the temporary storage footprint is `3 * 5` values but `3 * 4 * 5` values are 1138 actually transferred between `%A` and `%tmp`. 1139 1140 Alternatively, if a notional vector broadcast operation were available, the 1141 lowered code would resemble: 1142 1143 ```mlir 1144 // %expr1, %expr2, %expr3, %expr4 defined before this point 1145 %tmp = alloc() : vector<3x4x5xf32> 1146 %view_in_tmp = "element_type_cast"(%tmp) : memref<1xvector<3x4x5xf32>> 1147 for %i = 0 to 3 { 1148 affine.for %k = 0 to 5 { 1149 %a = load %A[%expr1 + %k, %expr2, %expr3 + %i, %expr4] : 1150 memref<?x?x?x?xf32> 1151 store %tmp[%i, 0, %k] : vector<3x4x5xf32> 1152 }} 1153 %c0 = constant 0 : index 1154 %tmpvec = load %view_in_tmp[%c0] : vector<3x4x5xf32> 1155 %vec = broadcast %tmpvec, 1 : vector<3x4x5xf32> 1156 ``` 1157 1158 where `broadcast` broadcasts from element 0 to all others along the 1159 specified dimension. This time, the temporary storage footprint is `3 * 5` 1160 values which is the same amount of data as the `3 * 5` values transferred. 1161 An additional `1` broadcast is required. On a GPU this broadcast could be 1162 implemented using a warp-shuffle if loop `j` were mapped to `threadIdx.x`. 1163 1164 Syntax 1165 ``` 1166 operation ::= ssa-id `=` `vector.transfer_read` ssa-use-list 1167 `{` attribute-entry `} :` memref-type `,` vector-type 1168 ``` 1169 1170 Example: 1171 1172 ```mlir 1173 // Read the slice `%A[%i0, %i1:%i1+256, %i2:%i2+32]` into vector<32x256xf32> 1174 // and pad with %f0 to handle the boundary case: 1175 %f0 = constant 0.0f : f32 1176 for %i0 = 0 to %0 { 1177 affine.for %i1 = 0 to %1 step 256 { 1178 affine.for %i2 = 0 to %2 step 32 { 1179 %v = vector.transfer_read %A[%i0, %i1, %i2], (%f0) 1180 {permutation_map: (d0, d1, d2) -> (d2, d1)} : 1181 memref<?x?x?xf32>, vector<32x256xf32> 1182 }}} 1183 1184 // Read the slice `%A[%i0, %i1]` (i.e. the element `%A[%i0, %i1]`) into 1185 // vector<128xf32>. The underlying implementation will require a 1-D vector 1186 // broadcast: 1187 for %i0 = 0 to %0 { 1188 affine.for %i1 = 0 to %1 { 1189 %3 = vector.transfer_read %A[%i0, %i1] 1190 {permutation_map: (d0, d1) -> (0)} : 1191 memref<?x?xf32>, vector<128xf32> 1192 } 1193 } 1194 1195 // Read from a memref with vector element type. 1196 %4 = vector.transfer_read %arg1[%c3, %c3], %vf0 1197 {permutation_map = (d0, d1)->(d0, d1)} 1198 : memref<?x?xvector<4x3xf32>>, vector<1x1x4x3xf32> 1199 ``` 1200 }]; 1201 1202 let builders = [ 1203 // Builder that sets padding to zero. 1204 OpBuilderDAG<(ins "VectorType":$vector, "Value":$memref, 1205 "ValueRange":$indices, "AffineMap":$permutationMap, 1206 CArg<"ArrayRef<bool>", "{}">:$maybeMasked)>, 1207 // Builder that sets permutation map (resp. padding) to 1208 // 'getMinorIdentityMap' (resp. zero). 1209 OpBuilderDAG<(ins "VectorType":$vector, "Value":$memref, 1210 "ValueRange":$indices, CArg<"ArrayRef<bool>", "{}">:$maybeMasked)> 1211 ]; 1212 1213 let hasFolder = 1; 1214} 1215 1216def Vector_TransferWriteOp : 1217 Vector_Op<"transfer_write", [ 1218 DeclareOpInterfaceMethods<VectorTransferOpInterface>, 1219 DeclareOpInterfaceMethods<VectorUnrollOpInterface, ["getShapeForUnroll"]> 1220 ]>, 1221 Arguments<(ins AnyVector:$vector, AnyMemRef:$memref, 1222 Variadic<Index>:$indices, 1223 AffineMapAttr:$permutation_map, 1224 OptionalAttr<BoolArrayAttr>:$masked)> { 1225 1226 let summary = "The vector.transfer_write op writes a supervector to memory."; 1227 1228 let description = [{ 1229 The `vector.transfer_write` op performs a write from a 1230 [vector](../LangRef.md#vector-type), supplied as its first operand, into a 1231 slice within a [MemRef](../LangRef.md#memref-type) of the same base 1232 elemental type, supplied as its second operand. 1233 1234 A vector memref operand must have its vector element type match a suffix 1235 (shape and element type) of the vector (e.g. memref<3x2x6x4x3xf32>, 1236 vector<1x1x4x3xf32>). 1237 1238 The slice is further defined by a full-rank index within the MemRef, 1239 supplied as the operands `3 .. 2 + rank(memref)`. 1240 1241 The permutation_map [attribute](../LangRef.md#attributes) is an 1242 [affine-map](Affine.md#affine-maps) which specifies the transposition on the 1243 slice to match the vector shape. The permutation map may be implicit and 1244 omitted from parsing and printing if it is the canonical minor identity map 1245 (i.e. if it does not permute or broadcast any dimension). 1246 1247 The size of the slice is specified by the size of the vector. 1248 1249 An optional boolean array attribute is provided to specify which dimensions 1250 of the transfer need masking. When a dimension is specified as not requiring 1251 masking, the `vector.transfer_write` may be lowered to simple stores. The 1252 absence of this `mask` attribute signifies that all dimensions of the 1253 transfer need to be masked. 1254 1255 This operation is called 'write' by opposition to 'store' because the 1256 super-vector granularity is generally not representable with a single 1257 hardware register. A `vector.transfer_write` is thus a 1258 mid-level abstraction that supports super-vectorization with non-effecting 1259 padding for full-tile-only code. It is the responsibility of 1260 `vector.transfer_write`'s implementation to ensure the memory writes are 1261 valid. Different lowerings may be pertinent depending on the hardware 1262 support. 1263 1264 Example: 1265 1266 ```mlir 1267 // write vector<16x32x64xf32> into the slice 1268 // `%A[%i0, %i1:%i1+32, %i2:%i2+64, %i3:%i3+16]`: 1269 for %i0 = 0 to %0 { 1270 affine.for %i1 = 0 to %1 step 32 { 1271 affine.for %i2 = 0 to %2 step 64 { 1272 affine.for %i3 = 0 to %3 step 16 { 1273 %val = `ssa-value` : vector<16x32x64xf32> 1274 vector.transfer_write %val, %A[%i0, %i1, %i2, %i3] 1275 {permutation_map: (d0, d1, d2, d3) -> (d3, d1, d2)} : 1276 vector<16x32x64xf32>, memref<?x?x?x?xf32> 1277 }}}} 1278 1279 // write to a memref with vector element type. 1280 vector.transfer_write %4, %arg1[%c3, %c3] 1281 {permutation_map = (d0, d1)->(d0, d1)} 1282 : vector<1x1x4x3xf32>, memref<?x?xvector<4x3xf32>> 1283 ``` 1284 }]; 1285 1286 let builders = [ 1287 // Builder that sets permutation map to 'getMinorIdentityMap'. 1288 OpBuilderDAG<(ins "Value":$vector, "Value":$memref, "ValueRange":$indices, 1289 CArg<"ArrayRef<bool>", "{}">:$maybeMasked)>, 1290 OpBuilderDAG<(ins "Value":$vector, "Value":$memref, "ValueRange":$indices, 1291 "AffineMap":$permutationMap)>, 1292 ]; 1293 1294 let hasFolder = 1; 1295} 1296 1297def Vector_MaskedLoadOp : 1298 Vector_Op<"maskedload">, 1299 Arguments<(ins AnyMemRef:$base, 1300 VectorOfRankAndType<[1], [I1]>:$mask, 1301 VectorOfRank<[1]>:$pass_thru)>, 1302 Results<(outs VectorOfRank<[1]>:$result)> { 1303 1304 let summary = "loads elements from memory into a vector as defined by a mask vector"; 1305 1306 let description = [{ 1307 The masked load reads elements from memory into a 1-D vector as defined 1308 by a base and a 1-D mask vector. When the mask is set, the element is read 1309 from memory. Otherwise, the corresponding element is taken from a 1-D 1310 pass-through vector. Informally the semantics are: 1311 ``` 1312 result[0] := mask[0] ? MEM[base+0] : pass_thru[0] 1313 result[1] := mask[1] ? MEM[base+1] : pass_thru[1] 1314 etc. 1315 ``` 1316 The masked load can be used directly where applicable, or can be used 1317 during progressively lowering to bring other memory operations closer to 1318 hardware ISA support for a masked load. The semantics of the operation 1319 closely correspond to those of the `llvm.masked.load` 1320 [intrinsic](https://llvm.org/docs/LangRef.html#llvm-masked-load-intrinsics). 1321 1322 Example: 1323 1324 ```mlir 1325 %0 = vector.maskedload %base, %mask, %pass_thru 1326 : memref<?xf32>, vector<8xi1>, vector<8xf32> into vector<8xf32> 1327 ``` 1328 }]; 1329 let extraClassDeclaration = [{ 1330 MemRefType getMemRefType() { 1331 return base().getType().cast<MemRefType>(); 1332 } 1333 VectorType getMaskVectorType() { 1334 return mask().getType().cast<VectorType>(); 1335 } 1336 VectorType getPassThruVectorType() { 1337 return pass_thru().getType().cast<VectorType>(); 1338 } 1339 VectorType getResultVectorType() { 1340 return result().getType().cast<VectorType>(); 1341 } 1342 }]; 1343 let assemblyFormat = "$base `,` $mask `,` $pass_thru attr-dict `:` " 1344 "type($base) `,` type($mask) `,` type($pass_thru) `into` type($result)"; 1345 let hasCanonicalizer = 1; 1346} 1347 1348def Vector_MaskedStoreOp : 1349 Vector_Op<"maskedstore">, 1350 Arguments<(ins AnyMemRef:$base, 1351 VectorOfRankAndType<[1], [I1]>:$mask, 1352 VectorOfRank<[1]>:$value)> { 1353 1354 let summary = "stores elements from a vector into memory as defined by a mask vector"; 1355 1356 let description = [{ 1357 The masked store operation writes elements from a 1-D vector into memory 1358 as defined by a base and a 1-D mask vector. When the mask is set, the 1359 corresponding element from the vector is written to memory. Otherwise, 1360 no action is taken for the element. Informally the semantics are: 1361 ``` 1362 if (mask[0]) MEM[base+0] = value[0] 1363 if (mask[1]) MEM[base+1] = value[1] 1364 etc. 1365 ``` 1366 The masked store can be used directly where applicable, or can be used 1367 during progressively lowering to bring other memory operations closer to 1368 hardware ISA support for a masked store. The semantics of the operation 1369 closely correspond to those of the `llvm.masked.store` 1370 [intrinsic](https://llvm.org/docs/LangRef.html#llvm-masked-store-intrinsics). 1371 1372 Example: 1373 1374 ```mlir 1375 vector.maskedstore %base, %mask, %value 1376 : memref<?xf32>, vector<8xi1>, vector<8xf32> 1377 ``` 1378 }]; 1379 let extraClassDeclaration = [{ 1380 MemRefType getMemRefType() { 1381 return base().getType().cast<MemRefType>(); 1382 } 1383 VectorType getMaskVectorType() { 1384 return mask().getType().cast<VectorType>(); 1385 } 1386 VectorType getValueVectorType() { 1387 return value().getType().cast<VectorType>(); 1388 } 1389 }]; 1390 let assemblyFormat = "$base `,` $mask `,` $value attr-dict `:` " 1391 "type($mask) `,` type($value) `into` type($base)"; 1392 let hasCanonicalizer = 1; 1393} 1394 1395def Vector_GatherOp : 1396 Vector_Op<"gather">, 1397 Arguments<(ins AnyMemRef:$base, 1398 VectorOfRankAndType<[1], [AnyInteger]>:$indices, 1399 VectorOfRankAndType<[1], [I1]>:$mask, 1400 Variadic<VectorOfRank<[1]>>:$pass_thru)>, 1401 Results<(outs VectorOfRank<[1]>:$result)> { 1402 1403 let summary = "gathers elements from memory into a vector as defined by an index vector and mask"; 1404 1405 let description = [{ 1406 The gather operation gathers elements from memory into a 1-D vector as 1407 defined by a base and a 1-D index vector, but only if the corresponding 1408 bit is set in a 1-D mask vector. Otherwise, the element is taken from a 1409 1-D pass-through vector, if provided, or left undefined. Informally the 1410 semantics are: 1411 ``` 1412 if (!defined(pass_thru)) pass_thru = [undef, .., undef] 1413 result[0] := mask[0] ? MEM[base + index[0]] : pass_thru[0] 1414 result[1] := mask[1] ? MEM[base + index[1]] : pass_thru[1] 1415 etc. 1416 ``` 1417 The vector dialect leaves out-of-bounds behavior undefined. 1418 1419 The gather operation can be used directly where applicable, or can be used 1420 during progressively lowering to bring other memory operations closer to 1421 hardware ISA support for a gather. The semantics of the operation closely 1422 correspond to those of the `llvm.masked.gather` 1423 [intrinsic](https://llvm.org/docs/LangRef.html#llvm-masked-gather-intrinsics). 1424 1425 Example: 1426 1427 ```mlir 1428 %g = vector.gather %base, %indices, %mask, %pass_thru 1429 : (memref<?xf32>, vector<16xi32>, vector<16xi1>, vector<16xf32>) -> vector<16xf32> 1430 ``` 1431 }]; 1432 let extraClassDeclaration = [{ 1433 MemRefType getMemRefType() { 1434 return base().getType().cast<MemRefType>(); 1435 } 1436 VectorType getIndicesVectorType() { 1437 return indices().getType().cast<VectorType>(); 1438 } 1439 VectorType getMaskVectorType() { 1440 return mask().getType().cast<VectorType>(); 1441 } 1442 VectorType getPassThruVectorType() { 1443 return (llvm::size(pass_thru()) == 0) 1444 ? VectorType() 1445 : (*pass_thru().begin()).getType().cast<VectorType>(); 1446 } 1447 VectorType getResultVectorType() { 1448 return result().getType().cast<VectorType>(); 1449 } 1450 }]; 1451 let assemblyFormat = "operands attr-dict `:` functional-type(operands, results)"; 1452 let hasCanonicalizer = 1; 1453} 1454 1455def Vector_ScatterOp : 1456 Vector_Op<"scatter">, 1457 Arguments<(ins AnyMemRef:$base, 1458 VectorOfRankAndType<[1], [AnyInteger]>:$indices, 1459 VectorOfRankAndType<[1], [I1]>:$mask, 1460 VectorOfRank<[1]>:$value)> { 1461 1462 let summary = "scatters elements from a vector into memory as defined by an index vector and mask"; 1463 1464 let description = [{ 1465 The scatter operation scatters elements from a 1-D vector into memory as 1466 defined by a base and a 1-D index vector, but only if the corresponding 1467 bit in a 1-D mask vector is set. Otherwise, no action is taken for that 1468 element. Informally the semantics are: 1469 ``` 1470 if (mask[0]) MEM[base + index[0]] = value[0] 1471 if (mask[1]) MEM[base + index[1]] = value[1] 1472 etc. 1473 ``` 1474 The vector dialect leaves out-of-bounds and repeated index behavior 1475 undefined. Underlying implementations may enforce strict sequential 1476 semantics for the latter, though. 1477 TODO: enforce the latter always? 1478 1479 The scatter operation can be used directly where applicable, or can be used 1480 during progressively lowering to bring other memory operations closer to 1481 hardware ISA support for a scatter. The semantics of the operation closely 1482 correspond to those of the `llvm.masked.scatter` 1483 [intrinsic](https://llvm.org/docs/LangRef.html#llvm-masked-scatter-intrinsics). 1484 1485 Example: 1486 1487 ```mlir 1488 vector.scatter %base, %indices, %mask, %value 1489 : vector<16xi32>, vector<16xi1>, vector<16xf32> into memref<?xf32> 1490 ``` 1491 }]; 1492 let extraClassDeclaration = [{ 1493 MemRefType getMemRefType() { 1494 return base().getType().cast<MemRefType>(); 1495 } 1496 VectorType getIndicesVectorType() { 1497 return indices().getType().cast<VectorType>(); 1498 } 1499 VectorType getMaskVectorType() { 1500 return mask().getType().cast<VectorType>(); 1501 } 1502 VectorType getValueVectorType() { 1503 return value().getType().cast<VectorType>(); 1504 } 1505 }]; 1506 let assemblyFormat = "$base `,` $indices `,` $mask `,` $value attr-dict `:` " 1507 "type($indices) `,` type($mask) `,` type($value) `into` type($base)"; 1508 let hasCanonicalizer = 1; 1509} 1510 1511def Vector_ExpandLoadOp : 1512 Vector_Op<"expandload">, 1513 Arguments<(ins AnyMemRef:$base, 1514 VectorOfRankAndType<[1], [I1]>:$mask, 1515 VectorOfRank<[1]>:$pass_thru)>, 1516 Results<(outs VectorOfRank<[1]>:$result)> { 1517 1518 let summary = "reads elements from memory and spreads them into a vector as defined by a mask"; 1519 1520 let description = [{ 1521 The expand load reads elements from memory into a 1-D vector as defined 1522 by a base and a 1-D mask vector. When the mask is set, the next element 1523 is read from memory. Otherwise, the corresponding element is taken from 1524 a 1-D pass-through vector. Informally the semantics are: 1525 ``` 1526 index = base 1527 result[0] := mask[0] ? MEM[index++] : pass_thru[0] 1528 result[1] := mask[1] ? MEM[index++] : pass_thru[1] 1529 etc. 1530 ``` 1531 Note that the index increment is done conditionally. 1532 1533 The expand load can be used directly where applicable, or can be used 1534 during progressively lowering to bring other memory operations closer to 1535 hardware ISA support for an expand. The semantics of the operation closely 1536 correspond to those of the `llvm.masked.expandload` 1537 [intrinsic](https://llvm.org/docs/LangRef.html#llvm-masked-expandload-intrinsics). 1538 1539 Example: 1540 1541 ```mlir 1542 %0 = vector.expandload %base, %mask, %pass_thru 1543 : memref<?xf32>, vector<8xi1>, vector<8xf32> into vector<8xf32> 1544 ``` 1545 }]; 1546 let extraClassDeclaration = [{ 1547 MemRefType getMemRefType() { 1548 return base().getType().cast<MemRefType>(); 1549 } 1550 VectorType getMaskVectorType() { 1551 return mask().getType().cast<VectorType>(); 1552 } 1553 VectorType getPassThruVectorType() { 1554 return pass_thru().getType().cast<VectorType>(); 1555 } 1556 VectorType getResultVectorType() { 1557 return result().getType().cast<VectorType>(); 1558 } 1559 }]; 1560 let assemblyFormat = "$base `,` $mask `,` $pass_thru attr-dict `:` " 1561 "type($base) `,` type($mask) `,` type($pass_thru) `into` type($result)"; 1562 let hasCanonicalizer = 1; 1563} 1564 1565def Vector_CompressStoreOp : 1566 Vector_Op<"compressstore">, 1567 Arguments<(ins AnyMemRef:$base, 1568 VectorOfRankAndType<[1], [I1]>:$mask, 1569 VectorOfRank<[1]>:$value)> { 1570 1571 let summary = "writes elements selectively from a vector as defined by a mask"; 1572 1573 let description = [{ 1574 The compress store operation writes elements from a 1-D vector into memory 1575 as defined by a base and a 1-D mask vector. When the mask is set, the 1576 corresponding element from the vector is written next to memory. Otherwise, 1577 no action is taken for the element. Informally the semantics are: 1578 ``` 1579 index = base 1580 if (mask[0]) MEM[index++] = value[0] 1581 if (mask[1]) MEM[index++] = value[1] 1582 etc. 1583 ``` 1584 Note that the index increment is done conditionally. 1585 1586 The compress store can be used directly where applicable, or can be used 1587 during progressively lowering to bring other memory operations closer to 1588 hardware ISA support for a compress. The semantics of the operation closely 1589 correspond to those of the `llvm.masked.compressstore` 1590 [intrinsic](https://llvm.org/docs/LangRef.html#llvm-masked-compressstore-intrinsics). 1591 1592 Example: 1593 1594 ```mlir 1595 vector.compressstore %base, %mask, %value 1596 : memref<?xf32>, vector<8xi1>, vector<8xf32> 1597 ``` 1598 }]; 1599 let extraClassDeclaration = [{ 1600 MemRefType getMemRefType() { 1601 return base().getType().cast<MemRefType>(); 1602 } 1603 VectorType getMaskVectorType() { 1604 return mask().getType().cast<VectorType>(); 1605 } 1606 VectorType getValueVectorType() { 1607 return value().getType().cast<VectorType>(); 1608 } 1609 }]; 1610 let assemblyFormat = "$base `,` $mask `,` $value attr-dict `:` " 1611 "type($base) `,` type($mask) `,` type($value)"; 1612 let hasCanonicalizer = 1; 1613} 1614 1615def Vector_ShapeCastOp : 1616 Vector_Op<"shape_cast", [NoSideEffect]>, 1617 Arguments<(ins AnyTypeOf<[AnyVector, TupleOf<[AnyVector]>]>:$source)>, 1618 Results<(outs AnyTypeOf<[AnyVector, TupleOf<[AnyVector]>]>:$result)> { 1619 let summary = "shape_cast casts between vector shapes"; 1620 let description = [{ 1621 The shape_cast operation casts between an n-D source vector shape and 1622 a k-D result vector shape (the element type remains the same). 1623 1624 If reducing rank (n > k), result dimension sizes must be a product 1625 of contiguous source dimension sizes. 1626 If expanding rank (n < k), source dimensions must factor into a 1627 contiguous sequence of destination dimension sizes. 1628 Each source dim is expanded (or contiguous sequence of source dims combined) 1629 in source dimension list order (i.e. 0 <= i < n), to produce a contiguous 1630 sequence of result dims (or a single result dim), in result dimension list 1631 order (i.e. 0 <= j < k). The product of all source dimension sizes and all 1632 result dimension sizes must match. 1633 1634 If the source/result types are a tuple of vectors, the casting operation 1635 described above is applied to each source/result tuple element pair. 1636 1637 It is currently assumed that this operation does not require moving data, 1638 and that it will be folded away before lowering vector operations. 1639 1640 There is an exception to the folding expectation when targeting 1641 llvm.intr.matrix operations. We need a type conversion back and forth from a 1642 2-D MLIR vector to a 1-D flattened LLVM vector.shape_cast lowering to LLVM 1643 is supported in that particular case, for now. 1644 1645 Example: 1646 1647 ```mlir 1648 // Example casting to a lower vector rank. 1649 %1 = vector.shape_cast %0 : vector<5x1x4x3xf32> to vector<20x3xf32> 1650 1651 // Example casting to a higher vector rank. 1652 %3 = vector.shape_cast %2 : vector<10x12x8xf32> to vector<5x2x3x4x8xf32> 1653 1654 // Example casting a tuple of vectors of same rank, where tuple elements 1655 // may have different shapes. 1656 %5 = vector.shape_cast %4 : tuple<vector<3x4x2xf32>, vector<3x3x2xf32>> to 1657 tuple<vector<12x2xf32>, vector<9x2xf32>> 1658 ``` 1659 }]; 1660 let extraClassDeclaration = [{ 1661 VectorType getSourceVectorType() { 1662 return source().getType().cast<VectorType>(); 1663 } 1664 VectorType getResultVectorType() { 1665 return getResult().getType().cast<VectorType>(); 1666 } 1667 }]; 1668 let assemblyFormat = "$source attr-dict `:` type($source) `to` type($result)"; 1669 let hasFolder = 1; 1670 let hasCanonicalizer = 1; 1671} 1672 1673def Vector_BitCastOp : 1674 Vector_Op<"bitcast", [NoSideEffect, AllRanksMatch<["source", "result"]>]>, 1675 Arguments<(ins AnyVector:$source)>, 1676 Results<(outs AnyVector:$result)>{ 1677 let summary = "bitcast casts between vectors"; 1678 let description = [{ 1679 The bitcast operation casts between vectors of the same rank, the minor 1-D 1680 vector size is casted to a vector with a different element type but same 1681 bitwidth. 1682 1683 Example: 1684 1685 ```mlir 1686 // Example casting to a smaller element type. 1687 %1 = vector.bitcast %0 : vector<5x1x4x3xf32> to vector<5x1x4x6xi16> 1688 1689 // Example casting to a bigger element type. 1690 %3 = vector.bitcast %2 : vector<10x12x8xi8> to vector<10x12x2xi32> 1691 1692 // Example casting to an element type of the same size. 1693 %5 = vector.bitcast %4 : vector<5x1x4x3xf32> to vector<5x1x4x3xi32> 1694 ``` 1695 }]; 1696 let extraClassDeclaration = [{ 1697 VectorType getSourceVectorType() { 1698 return source().getType().cast<VectorType>(); 1699 } 1700 VectorType getResultVectorType() { 1701 return getResult().getType().cast<VectorType>(); 1702 } 1703 }]; 1704 let assemblyFormat = "$source attr-dict `:` type($source) `to` type($result)"; 1705 let hasFolder = 1; 1706} 1707 1708def Vector_TypeCastOp : 1709 Vector_Op<"type_cast", [NoSideEffect, ViewLikeOpInterface]>, 1710 Arguments<(ins StaticShapeMemRefOf<[AnyType]>:$memref)>, 1711 Results<(outs AnyMemRef:$result)> { 1712 let summary = "type_cast op converts a scalar memref to a vector memref"; 1713 let description = [{ 1714 Performs a conversion from a memref with scalar element to a memref with a 1715 *single* vector element, copying the shape of the memref to the vector. This 1716 is the minimal viable operation that is required to makeke 1717 super-vectorization operational. It can be seen as a special case of the 1718 `view` operation but scoped in the super-vectorization context. 1719 1720 Syntax: 1721 1722 ``` 1723 operation ::= `vector.type_cast` ssa-use : memref-type to memref-type 1724 ``` 1725 1726 Example: 1727 1728 ```mlir 1729 %A = alloc() : memref<5x4x3xf32> 1730 %VA = vector.type_cast %A : memref<5x4x3xf32> to memref<vector<5x4x3xf32>> 1731 ``` 1732 }]; 1733 1734 /// Build the canonical memRefType with a single vector. 1735 /// E.g. memref<4 x 5 x vector<6 x f32>> -> memref<vector<4 x 5 x 6 x f32>>. 1736 let builders = [OpBuilderDAG<(ins "Value":$source)>]; 1737 1738 let extraClassDeclaration = [{ 1739 MemRefType getMemRefType() { 1740 return memref().getType().cast<MemRefType>(); 1741 } 1742 MemRefType getResultMemRefType() { 1743 return getResult().getType().cast<MemRefType>(); 1744 } 1745 // Implement ViewLikeOpInterface. 1746 Value getViewSource() { return memref(); } 1747 }]; 1748 1749 let assemblyFormat = [{ 1750 $memref attr-dict `:` type($memref) `to` type($result) 1751 }]; 1752} 1753 1754def Vector_ConstantMaskOp : 1755 Vector_Op<"constant_mask", [NoSideEffect]>, 1756 Arguments<(ins I64ArrayAttr:$mask_dim_sizes)>, 1757 Results<(outs VectorOf<[I1]>)> { 1758 let summary = "creates a constant vector mask"; 1759 let description = [{ 1760 Creates and returns a vector mask where elements of the result vector 1761 are set to '0' or '1', based on whether the element indices are contained 1762 within a hyper-rectangular region specified by the 'mask_dim_sizes' 1763 array attribute argument. Each element of the 'mask_dim_sizes' array, 1764 specifies an exclusive upper bound [0, mask-dim-size-element-value) 1765 for a unique dimension in the vector result. The conjunction of the ranges 1766 define a hyper-rectangular region within which elements values are set to 1 1767 (otherwise element values are set to 0). 1768 1769 Example: 1770 1771 ```mlir 1772 // create a constant vector mask of size 4x3xi1 with elements in range 1773 // 0 <= row <= 2 and 0 <= col <= 1 are set to 1 (others to 0). 1774 %1 = vector.constant_mask [3, 2] : vector<4x3xi1> 1775 1776 print %1 1777 columns 1778 0 1 2 1779 |------------ 1780 0 | 1 1 0 1781 rows 1 | 1 1 0 1782 2 | 1 1 0 1783 3 | 0 0 0 1784 ``` 1785 }]; 1786 1787 let extraClassDeclaration = [{ 1788 static StringRef getMaskDimSizesAttrName() { return "mask_dim_sizes"; } 1789 }]; 1790 let assemblyFormat = "$mask_dim_sizes attr-dict `:` type(results)"; 1791} 1792 1793def Vector_CreateMaskOp : 1794 Vector_Op<"create_mask", [NoSideEffect]>, 1795 Arguments<(ins Variadic<Index>:$operands)>, Results<(outs VectorOf<[I1]>)> { 1796 let summary = "creates a vector mask"; 1797 let description = [{ 1798 Creates and returns a vector mask where elements of the result vector 1799 are set to '0' or '1', based on whether the element indices are contained 1800 within a hyper-rectangular region specified by the operands. Specifically, 1801 each operand specifies a range [0, operand-value) for a unique dimension in 1802 the vector result. The conjunction of the operand ranges define a 1803 hyper-rectangular region within which elements values are set to 1 1804 (otherwise element values are set to 0). 1805 1806 Example: 1807 1808 ```mlir 1809 // create a vector mask of size 4x3xi1 where elements in range 1810 // 0 <= row <= 2 and 0 <= col <= 1 are set to 1 (others to 0). 1811 %1 = vector.create_mask %c3, %c2 : vector<4x3xi1> 1812 1813 print %1 1814 columns 1815 0 1 2 1816 |------------ 1817 0 | 1 1 0 1818 rows 1 | 1 1 0 1819 2 | 1 1 0 1820 3 | 0 0 0 1821 ``` 1822 }]; 1823 1824 let hasCanonicalizer = 1; 1825 let assemblyFormat = "$operands attr-dict `:` type(results)"; 1826} 1827 1828def Vector_TupleOp : 1829 Vector_Op<"tuple", [NoSideEffect]>, 1830 Arguments<(ins Variadic<AnyVector>:$vectors)>, 1831 Results<(outs TupleOf<[AnyVector]>)> { 1832 let summary = "make tuple of vectors operation"; 1833 let description = [{ 1834 Returns a tuple of its operands 'vectors'. 1835 1836 Note that this operation is used during the vector op unrolling 1837 transformation and should be removed before lowering to lower-level 1838 dialects. 1839 1840 1841 Example: 1842 1843 ```mlir 1844 %0 = vector.transfer_read ... : vector<2x2xf32> 1845 %1 = vector.transfer_read ... : vector<2x1xf32> 1846 %2 = vector.transfer_read ... : vector<2x2xf32> 1847 %3 = vector.transfer_read ... : vector<2x1xf32> 1848 1849 %4 = vector.tuple %0, %1, %2, %3 1850 : vector<2x2xf32>, vector<2x1xf32>, vector<2x2xf32>, vector<2x1xf32> 1851 ``` 1852 }]; 1853 1854 let extraClassDeclaration = [{ 1855 TupleType getResultTupleType() { 1856 return getResult().getType().cast<TupleType>(); 1857 } 1858 }]; 1859} 1860 1861def Vector_TransposeOp : 1862 Vector_Op<"transpose", [NoSideEffect, 1863 PredOpTrait<"operand and result have same element type", 1864 TCresVTEtIsSameAsOpBase<0, 0>>]>, 1865 Arguments<(ins AnyVector:$vector, I64ArrayAttr:$transp)>, 1866 Results<(outs AnyVector:$result)> { 1867 let summary = "vector transpose operation"; 1868 let description = [{ 1869 Takes a n-D vector and returns the transposed n-D vector defined by 1870 the permutation of ranks in the n-sized integer array attribute. 1871 In the operation 1872 1873 ```mlir 1874 %1 = vector.transpose %0, [i_1, .., i_n] 1875 : vector<d_1 x .. x d_n x f32> 1876 to vector<d_trans[0] x .. x d_trans[n-1] x f32> 1877 ``` 1878 1879 the transp array [i_1, .., i_n] must be a permutation of [0, .., n-1]. 1880 1881 Example: 1882 1883 ```mlir 1884 %1 = vector.transpose %0, [1, 0] : vector<2x3xf32> to vector<3x2xf32> 1885 1886 [ [a, b, c], [ [a, d], 1887 [d, e, f] ] -> [b, e], 1888 [c, f] ] 1889 ``` 1890 }]; 1891 let builders = [ 1892 OpBuilderDAG<(ins "Value":$vector, "ArrayRef<int64_t>":$transp)> 1893 ]; 1894 let extraClassDeclaration = [{ 1895 VectorType getVectorType() { 1896 return vector().getType().cast<VectorType>(); 1897 } 1898 VectorType getResultType() { 1899 return result().getType().cast<VectorType>(); 1900 } 1901 void getTransp(SmallVectorImpl<int64_t> &results); 1902 static StringRef getTranspAttrName() { return "transp"; } 1903 }]; 1904 let assemblyFormat = [{ 1905 $vector `,` $transp attr-dict `:` type($vector) `to` type($result) 1906 }]; 1907 let hasCanonicalizer = 1; 1908 let hasFolder = 1; 1909} 1910 1911def Vector_TupleGetOp : 1912 Vector_Op<"tuple_get", [NoSideEffect]>, 1913 Arguments<(ins TupleOf<[AnyVector]>:$vectors, APIntAttr:$index)>, 1914 Results<(outs AnyVector)> { 1915 let summary = "vector tuple get operation"; 1916 let description = [{ 1917 Returns the tuple element of 'vectors' at 'index'. 1918 1919 Note that this operation is used during the vector op unrolling 1920 transformation and should be removed before lowering to lower-level 1921 dialects. 1922 1923 Example: 1924 1925 ```mlir 1926 %4 = vector.tuple %0, %1, %2, %3 1927 : vector<2x2xf32>, vector<2x1xf32>, vector<2x2xf32>, vector<2x1xf32>> 1928 1929 %5 = vector.tuple_get %4, 1 1930 : tuple<vector<2x2xf32>, vector<2x1xf32>, 1931 vector<2x2xf32>, vector<2x1xf32>> 1932 ``` 1933 }]; 1934 1935 let extraClassDeclaration = [{ 1936 VectorType getResultVectorType() { 1937 return getResult().getType().cast<VectorType>(); 1938 } 1939 int64_t getIndex() { 1940 auto index = (*this)->getAttrOfType<IntegerAttr>("index"); 1941 return index.getValue().getSExtValue(); 1942 } 1943 static StringRef getIndexAttrName() { return "index"; } 1944 }]; 1945 let hasFolder = 1; 1946} 1947 1948def Vector_PrintOp : 1949 Vector_Op<"print", []>, Arguments<(ins AnyType:$source)> { 1950 let summary = "print operation (for testing and debugging)"; 1951 let description = [{ 1952 Prints the source vector (or scalar) to stdout in human readable 1953 format (for testing and debugging). No return value. 1954 1955 Example: 1956 1957 ```mlir 1958 %0 = constant 0.0 : f32 1959 %1 = vector.broadcast %0 : f32 to vector<4xf32> 1960 vector.print %1 : vector<4xf32> 1961 1962 when lowered to LLVM, the vector print is unrolled into 1963 elementary printing method calls that at runtime will yield 1964 1965 ( 0.0, 0.0, 0.0, 0.0 ) 1966 1967 on stdout when linked with a small runtime support library, 1968 which only needs to provide a few printing methods (single 1969 value for all data types, opening/closing bracket, comma, 1970 newline). 1971 ``` 1972 }]; 1973 let verifier = ?; 1974 let extraClassDeclaration = [{ 1975 Type getPrintType() { 1976 return source().getType(); 1977 } 1978 }]; 1979 let assemblyFormat = "$source attr-dict `:` type($source)"; 1980} 1981 1982//===----------------------------------------------------------------------===// 1983// Ops used for supporting progressive lowering and conversion type changes. 1984// The Ops are typically not used directly by higher level dialects, but are 1985// used by intra-dialect rewriting rules to bring vector operations closer 1986// to the hardware ISA. 1987//===----------------------------------------------------------------------===// 1988 1989/// Vector dialect matrix multiplication op that operates on flattened 1-D 1990/// MLIR vectors. This is the counterpart of llvm.matrix.multiply in MLIR. 1991/// This may seem redundant with vector.contract but it serves the purposes of 1992/// more progressive lowering and localized type conversion on the path: 1993/// `vector<...x...xf32> -> vector<...xf32> -> !llvm<... x float>`. 1994def Vector_MatmulOp : Vector_Op<"matrix_multiply", [NoSideEffect, 1995 PredOpTrait<"lhs operand and result have same element type", 1996 TCresVTEtIsSameAsOpBase<0, 0>>, 1997 PredOpTrait<"rhs operand and result have same element type", 1998 TCresVTEtIsSameAsOpBase<0, 1>>]>, 1999 Arguments<( 2000 // TODO: tighten vector element types that make sense. 2001 ins VectorOfRankAndType<[1], 2002 [AnySignlessInteger, AnySignedInteger, AnyFloat]>:$lhs, 2003 VectorOfRankAndType<[1], 2004 [AnySignlessInteger, AnySignedInteger, AnyFloat]>:$rhs, 2005 I32Attr:$lhs_rows, I32Attr:$lhs_columns, I32Attr:$rhs_columns)>, 2006 Results<( 2007 outs VectorOfRankAndType<[1], 2008 [AnySignlessInteger, AnySignedInteger, AnyFloat]>:$res)> 2009{ 2010 let summary = "Vector matrix multiplication op that operates on flattened 1-D" 2011 " MLIR vectors"; 2012 let description = [{ 2013 This is the counterpart of llvm.matrix.multiply in MLIR. It serves the 2014 purposes of more progressive lowering and localized type conversion. 2015 Higher levels typically lower matrix multiplications into 'vector.contract' 2016 operations. Subsequent rewriting rule progressively lower these operations 2017 into 'vector.matrix_multiply' operations to bring the operations closer 2018 to the hardware ISA. 2019 2020 The ‘vector.matrix_multiply’ op treats `lhs` as matrix with <lhs_rows> rows 2021 and <lhs_columns> columns, `rhs` as matrix with <lhs_columns> rows and 2022 <rhs_columns> and multiplies them. The result matrix is returned embedded in 2023 the result vector. 2024 2025 Also see: 2026 2027 http://llvm.org/docs/LangRef.html#llvm-matrix-multiply-intrinsic 2028 2029 Example: 2030 2031 ```mlir 2032 %C = vector.matrix_multiply %A, %B 2033 { lhs_rows = 4: i32, lhs_columns = 16: i32 , rhs_columns = 3: i32 } : 2034 (vector<64xf64>, vector<48xf64>) -> vector<12xf64> 2035 ``` 2036 }]; 2037 let builders = [ 2038 OpBuilderDAG<(ins "Value":$lhs, "Value":$rhs, "unsigned":$lhsRows, 2039 "unsigned":$lhsColumns, "unsigned":$rhsColumns), 2040 [{ 2041 $_state.addOperands({lhs, rhs}); 2042 $_state.addAttribute("lhs_rows",$_builder.getI32IntegerAttr(lhsRows)); 2043 $_state.addAttribute("lhs_columns",$_builder.getI32IntegerAttr(lhsColumns)); 2044 $_state.addAttribute("rhs_columns",$_builder.getI32IntegerAttr(rhsColumns)); 2045 $_state.addTypes(VectorType::get(lhsRows * rhsColumns, 2046 lhs.getType().cast<VectorType>().getElementType())); 2047 }]>, 2048 ]; 2049 let verifier = ?; 2050 let assemblyFormat = "$lhs `,` $rhs attr-dict " 2051 "`:` `(` type($lhs) `,` type($rhs) `)` `->` type($res)"; 2052} 2053 2054/// Vector dialect matrix tranposition op that operates on flattened 1-D 2055/// MLIR vectors. This is the counterpart of llvm.matrix.transpose in MLIR. 2056/// This may seem redundant with vector.transpose but it serves the purposes of 2057/// more progressive lowering and localized type conversion on the path: 2058/// `vector<...x...xf32> -> vector<...xf32> -> !llvm<... x float>`. 2059def Vector_FlatTransposeOp : Vector_Op<"flat_transpose", [NoSideEffect, 2060 PredOpTrait<"source operand and result have same element type", 2061 TCresVTEtIsSameAsOpBase<0, 0>>]>, 2062 Arguments<( 2063 // TODO: tighten vector element types that make sense. 2064 ins VectorOfRankAndType<[1], 2065 [AnySignlessInteger, AnySignedInteger, AnyFloat]>:$matrix, 2066 I32Attr:$rows, I32Attr:$columns)>, 2067 Results<( 2068 outs VectorOfRankAndType<[1], 2069 [AnySignlessInteger, AnySignedInteger, AnyFloat]>:$res)> { 2070 let summary = "Vector matrix transposition on flattened 1-D MLIR vectors"; 2071 let description = [{ 2072 This is the counterpart of llvm.matrix.transpose in MLIR. It serves 2073 the purposes of more progressive lowering and localized type conversion. 2074 Higher levels typically lower matrix tranpositions into 'vector.transpose' 2075 operations. Subsequent rewriting rule progressively lower these operations 2076 into 'vector.flat_transpose' operations to bring the operations closer 2077 to the hardware ISA. 2078 2079 The ‘vector.flat_transpose’ op treats the 1-D input `matrix` as 2080 a 2-D matrix with <rows> rows and <columns> columns, and returns the 2081 transposed matrix in flattened form in 'res'. 2082 2083 Also see: 2084 2085 http://llvm.org/docs/LangRef.html#llvm-matrix-transpose-intrinsic 2086 2087 Example: 2088 2089 ```mlir 2090 %1 = vector.flat_transpose %0 { rows = 4: i32, columns = 4: i32 } 2091 : (vector<16xf32>) -> vector<16xf32> 2092 ``` 2093 }]; 2094 let verifier = ?; 2095 let assemblyFormat = "$matrix attr-dict `:` type($matrix) `->` type($res)"; 2096} 2097 2098#endif // VECTOR_OPS 2099