1# This test generates all variants of wmma intrinsics and verifies that LLVM 2# generates correct instructions for them. 3 4# Check all variants of instructions supported by PTX60 on SM70 5# RUN: python %s --ptx=60 --gpu-arch=70 > %t-ptx60-sm_70.ll 6# RUN: FileCheck %t-ptx60-sm_70.ll < %t-ptx60-sm_70.ll \ 7# RUN: --check-prefixes=INTRINSICS,M16N16 8# RUN: FileCheck %t-ptx60-sm_70.ll < %t-ptx60-sm_70.ll \ 9# RUN: --check-prefixes=INTRINSICS,NOEXTGEOM,NOINT,NOSUBINT,NOMMA 10# RUN: llc < %t-ptx60-sm_70.ll -march=nvptx64 -mcpu=sm_70 -mattr=+ptx60 \ 11# RUN: | FileCheck %t-ptx60-sm_70.ll 12 13# Check all variants of instructions supported by PTX61 on SM70 14# RUN: python %s --ptx=61 --gpu-arch=70 > %t-ptx61-sm_70.ll 15# RUN: FileCheck %t-ptx61-sm_70.ll < %t-ptx61-sm_70.ll \ 16# RUN: --check-prefixes=INTRINSICS,M16N16,EXTGEOM 17# RUN: FileCheck %t-ptx61-sm_70.ll < %t-ptx61-sm_70.ll \ 18# RUN: --check-prefixes=INTRINSICS,NOINT,NOSUBINT,NOMMA 19# RUN: llc < %t-ptx61-sm_70.ll -march=nvptx64 -mcpu=sm_70 -mattr=+ptx61 \ 20# RUN: | FileCheck %t-ptx61-sm_70.ll 21 22# Check all variants of instructions supported by PTX63 on SM72 23# RUN: python %s --ptx=63 --gpu-arch=72 > %t-ptx63-sm_72.ll 24# RUN: FileCheck %t-ptx63-sm_72.ll < %t-ptx63-sm_72.ll \ 25# RUN: --check-prefixes=INTRINSICS,M16N16,EXTGEOM,INT 26# RUN: FileCheck %t-ptx63-sm_72.ll < %t-ptx63-sm_72.ll \ 27# RUN: --check-prefixes=INTRINSICS,NOSUBINT,NOMMA 28# RUN: llc < %t-ptx63-sm_72.ll -march=nvptx64 -mcpu=sm_72 -mattr=+ptx63 \ 29# RUN: | FileCheck %t-ptx63-sm_72.ll 30 31# Check all variants of instructions supported by PTX63 on SM75 32# RUN: python %s --ptx=63 --gpu-arch=75 > %t-ptx63-sm_75.ll 33# RUN: FileCheck %t-ptx63-sm_75.ll < %t-ptx63-sm_75.ll \ 34# RUN: --check-prefixes=INTRINSICS,M16N16,EXTGEOM,INT,SUBINT 35# RUN: FileCheck %t-ptx63-sm_75.ll < %t-ptx63-sm_75.ll \ 36# RUN: --check-prefixes=INTRINSICS,NOMMA 37# RUN: llc < %t-ptx63-sm_75.ll -march=nvptx64 -mcpu=sm_75 -mattr=+ptx63 \ 38# RUN: | FileCheck %t-ptx63-sm_75.ll 39 40# Check all variants of instructions supported by PTX64 on SM70+ 41# RUN: python %s --ptx=64 --gpu-arch=70 > %t-ptx64-sm_70.ll 42# RUN: FileCheck %t-ptx64-sm_70.ll < %t-ptx64-sm_70.ll \ 43# RUN: --check-prefixes=INTRINSICS,M16N16,EXTGEOM,MMA 44# RUN: FileCheck %t-ptx64-sm_70.ll < %t-ptx64-sm_70.ll \ 45# RUN: --check-prefixes=INTRINSICS,NOINT,NOSUBINT 46# RUN: llc < %t-ptx64-sm_70.ll -march=nvptx64 -mcpu=sm_70 -mattr=+ptx64 \ 47# RUN: | FileCheck %t-ptx64-sm_70.ll 48 49from __future__ import print_function 50 51import argparse 52from itertools import product 53from string import Template 54 55class MMAType: 56 def __init__(self, ptx_type): 57 self.ptx_type = ptx_type 58 self.llvm_type = { 59 "f16" : "<2 x half>", 60 "f32" : "float", 61 "s32" : "i32", 62 "s8" : "i32", 63 "u8" : "i32", 64 "s4" : "i32", 65 "u4" : "i32", 66 "b1" : "i32", 67 }[ptx_type]; 68 69 self.ptx_reg_pattern = { 70 "f16" : "%hh[0-9]+", 71 "f32" : "%f[0-9]+", 72 }.get(ptx_type, "%r[0-9]+") 73 74 def __repr__(self): 75 return "%s/%s" % (self.ptx_type, self.llvm_type) 76 77class MMAFrag: 78 def __init__(self, geom, frag, ptx_elt_type): 79 self.geom = geom 80 self.frag = frag 81 self.is_mma = True if geom == "m8n8k4" else False; 82 self.mma_type = MMAType(ptx_elt_type); 83 self.nregs = { 84 "a:f16" : 2 if self.is_mma else 8, 85 "b:f16" : 2 if self.is_mma else 8, 86 "c:f16" : 4, 87 "d:f16" : 4, 88 "c:f32" : 8, 89 "d:f32" : 8, 90 }.get("%s:%s" % (frag, ptx_elt_type), { 91 # u8/s8 -> s32 @ m16n16k16/m8n32k16/m32n8k16 92 "m16n16k16:a:u8" : 2, 93 "m16n16k16:a:s8" : 2, 94 "m16n16k16:b:u8" : 2, 95 "m16n16k16:b:s8" : 2, 96 "m16n16k16:c:s32" : 8, 97 "m16n16k16:d:s32" : 8, 98 99 "m8n32k16:a:u8" : 1, 100 "m8n32k16:a:s8" : 1, 101 "m8n32k16:b:u8" : 4, 102 "m8n32k16:b:s8" : 4, 103 "m8n32k16:c:s32" : 8, 104 "m8n32k16:d:s32" : 8, 105 106 "m32n8k16:a:u8" : 4, 107 "m32n8k16:a:s8" : 4, 108 "m32n8k16:b:u8" : 1, 109 "m32n8k16:b:s8" : 1, 110 "m32n8k16:c:s32" : 8, 111 "m32n8k16:d:s32" : 8, 112 113 # u4/s4/b1 -> s32 @ m8n8k32 (u4/s4), m8n8k128(b1) 114 "m8n8k128:a:b1" : 1, 115 "m8n8k32:a:u4" : 1, 116 "m8n8k32:a:s4" : 1, 117 "m8n8k128:b:b1" : 1, 118 "m8n8k32:b:u4" : 1, 119 "m8n8k32:b:s4" : 1, 120 "m8n8k128:c:s32" : 2, 121 "m8n8k128:d:s32" : 2, 122 "m8n8k32:c:s32" : 2, 123 "m8n8k32:d:s32" : 2, 124 }.get("%s:%s:%s" % (geom, frag, ptx_elt_type), None)); 125 assert(self.nregs); 126 127 def __repr__(self): 128 return "%s:%s:%s%s" % (self.geom, self.frag, self.mma_type, 129 "" if self.nregs == 1 else ("*%d" % self.nregs)) 130 131class MMAOp: 132 def __init__(self, a, b, c, d): 133 self.a = a 134 self.b = b 135 self.c = c 136 self.d = d 137 138 def __repr__(self): 139 return ("{A:%s, B:%s, C:%s, D:%s}" % (self.a, self.b, self.c, self.d )) 140 141def make_mma_ops(geoms, types_a, types_b, types_c, types_d): 142 ops = [] 143 for geom, type_a, type_c in product( geoms, types_a, types_c): 144 for type_b, type_d in product(types_b if types_b else [type_a], 145 types_d if types_d else [type_c]): 146 ops.append(MMAOp(MMAFrag(geom, "a", type_a), 147 MMAFrag(geom, "b", type_b), 148 MMAFrag(geom, "c", type_c), 149 MMAFrag(geom, "d", type_d))) 150 return ops 151 152def make_ldst_ops(geoms, frags, types): 153 return [MMAFrag(geom, frag, ptx_type) for (geom, frag, ptx_type) 154 in product(geoms, frags, types)] 155 156def get_mma_ops(): 157 return (make_mma_ops(["m8n8k4"], 158 ["f16"], [], ["f16", "f32"], ["f16", "f32"]) + 159 make_mma_ops(["m16n16k16", "m32n8k16", "m8n32k16"], 160 ["f16"], [], ["f16", "f32"], ["f16", "f32"]) + 161 make_mma_ops(["m16n16k16", "m32n8k16", "m8n32k16"], 162 ["s8", "u8"], [], ["s32"], []) + 163 make_mma_ops(["m8n8k32"], 164 ["s4", "u4"], [], ["s32"], []) + 165 make_mma_ops(["m8n8k128"], 166 ["b1"], [], ["s32"], [])) 167def get_ldst_ops(kind): 168 ldst_ops = (make_ldst_ops(["m16n16k16", "m32n8k16", "m8n32k16"], 169 ["a", "b"], ["f16", "u8", "s8"]) + 170 make_ldst_ops(["m16n16k16", "m32n8k16", "m8n32k16"], 171 ["c", "d"], ["f16", "f32", "s32"]) + 172 make_ldst_ops(["m8n8k32"], ["a", "b"], ["s4","u4"]) + 173 make_ldst_ops(["m8n8k128"], ["a", "b"], ["b1"]) + 174 make_ldst_ops(["m8n8k32", "m8n8k128"], ["c", "d"], ["s32"])) 175 return [ x for x in ldst_ops if (x.frag == "d") == (kind == "store")] 176 177def is_geom_supported(geom): 178 # geometries for FP and ints. 179 if geom == "m8n8k4": 180 return ptx_version >= 64 181 if geom in ["m8n32k16", "m32n8k16"]: 182 return ptx_version >= 61 183 # geometries for sub-ints. 184 if geom in ["m8n8k32", "m8n8k128"]: 185 return ptx_version >= 63 and gpu_arch >= 75 186 if geom == "m16n16k16": 187 return ptx_version >= 60 188 assert(False) # Unexpected geometry. 189 190def is_type_supported(ptx_type): 191 if ptx_type in ["s8", "u8", "s32"]: 192 return ptx_version >= 63 and gpu_arch >= 72 193 if ptx_type in ["s4", "u4", "b1"]: 194 return ptx_version >= 63 and gpu_arch >= 75 195 return ptx_version >= 60 and gpu_arch >= 70 196 197 198def is_mma_variant_supported(op, layout_a, layout_b, satf): 199 if not (is_type_supported(op.a.mma_type.ptx_type) 200 and is_geom_supported(op.a.geom)): 201 return False 202 if op.a.geom == "m8n8k4": 203 if satf: 204 return False 205 if op.c.mma_type.ptx_type == "f32": 206 # If C is f32, D must be, too. 207 return op.d.mma_type.ptx_type == "f32" 208 209 # sub-integer require row/col layout, and no satf. 210 if op.a.mma_type.ptx_type in ["s4", "u4", "b1"]: 211 if op.a.mma_type.ptx_type == "b1" and satf: 212 return False 213 return layout_a == "row" and layout_b == "col" 214 return True 215 216def is_ldst_variant_supported(frag, layout): 217 if not (is_type_supported(frag.mma_type.ptx_type) 218 and is_geom_supported(frag.geom)): 219 return False 220 if frag.mma_type.ptx_type in ["s4", "u4", "b1"]: 221 # sub-integer require sm_75 and ptx63, row/col layout for a/b. 222 return ((frag.frag == "a" and layout == "row") 223 or (frag.frag == "b" and layout == "col") 224 or frag.frag in ["c", "d"]) 225 return True 226 227def make_wmma_slice_ty(frag): 228 return [frag.mma_type.llvm_type] * frag.nregs 229 230def make_wmma_ld_ret_ty(frag): 231 results = make_wmma_slice_ty(frag) 232 if len(results) == 1: 233 return "%s" % results[0] 234 return "{%s}" % ", ".join(results) 235 236# returns address space 237def get_aspace(space): 238 space_map = { 239 ".global" : 1, 240 ".shared" : 3, 241 ".const" : 4, 242 ".local" : 5, 243 ".param" : 101, 244 "" : 0, 245 ".generic": 0 246 } 247 return space_map[space]; 248 249def get_pspace(space): 250 return "p%di8" % get_aspace(space); 251 252def check_pattern(frag): 253 return "{{%s}}" % ", *".join([frag.mma_type.ptx_reg_pattern] * frag.nregs) 254 255def gen_wmma_load_tests(): 256 load_template = """ 257declare ${ret_ty} @${intrinsic}(i8 ${as}* %src ${extra_args}); 258 259; CHECK-LABEL: .func {{.*}}test_${function}( 260define ${ret_ty} @test_${function}(i8 ${as}* %src ${extra_args}) { 261; CHECK: ${instruction} 262; CHECK: {${check_result}} 263; CHECK: [%rd{{[0-9]+}}]${stride_pattern} 264 %v0 = call ${ret_ty} @${intrinsic}(i8 ${as}* %src ${extra_args}); 265 ret ${ret_ty} %v0; 266} 267 268; CHECK-LABEL: .func{{.*}}test_${function}_o( 269define ${ret_ty} @test_${function}_o(i8 ${as}* %src ${extra_args}) { 270; CHECK: ${instruction} 271; CHECK: {${check_result}} 272; CHECK: [%rd{{[0-9]+}}+128]${stride_pattern} 273 %src1 = getelementptr i8, i8 ${as}* %src, i32 128; 274 %v0 = call ${ret_ty} @${intrinsic}(i8 ${as}* %src1 ${extra_args}); 275 ret ${ret_ty} %v0; 276} 277""" 278 intrinsic_template = "llvm.nvvm.wmma.${geom}.load.${abc}.${layout}${stride}.${itype}.${pspace}" 279 instruction_template = "wmma.load.${abc}.sync${aligned}.${layout}.${geom}${space}.${itype}" 280 281 generated_items = [] 282 283 for frag, layout, space, stride in product( 284 get_ldst_ops("load"), 285 ["row","col"], 286 ["",".shared",".global"], 287 ["", ".stride"], 288 ): 289 if not is_ldst_variant_supported(frag, layout): 290 continue 291 292 params = { 293 "abc" : frag.frag, 294 "aligned" : ".aligned" if ptx_version >= 63 else "", 295 "layout" : layout, 296 "space" : space, 297 "stride" : stride, 298 "itype" : frag.mma_type.ptx_type, 299 "pspace" : get_pspace(space), 300 "as" : "addrspace(%d)" % get_aspace(space), 301 "geom" : frag.geom, 302 } 303 304 test_params = params 305 test_params["intrinsic"] = Template(intrinsic_template).substitute(params) 306 test_params["function"] = test_params["intrinsic"].replace(".","_") 307 test_params["instruction"] = Template(instruction_template).substitute(params) 308 test_params["ret_ty"] = make_wmma_ld_ret_ty(frag) 309 test_params["check_result"] = check_pattern(frag) 310 311 if stride: 312 test_params["extra_args"] = ", i32 %stride"; 313 test_params["stride_pattern"] = ", %r{{[0-9]+}}" 314 else: 315 test_params["extra_args"] = "" 316 test_params["stride_pattern"] = "" 317 318 print(Template(load_template).substitute(test_params)) 319 320 generated_items.append((test_params["intrinsic"], 321 test_params["instruction"])) 322 323 return generated_items 324 325def make_wmma_slice_args(frag): 326 return ", ".join(["%s %%%s%d" % (t, frag.frag, i) for i,t 327 in enumerate(make_wmma_slice_ty(frag))]) 328 329def gen_wmma_store_tests(): 330 store_template = """ 331declare void @${intrinsic}(i8 ${as}* %src, ${args}${extra_args}); 332 333; CHECK-LABEL: .func {{.*}}test_${function}( 334define void @test_${function}(i8 ${as}* %src, ${args}${extra_args}) { 335; CHECK: ${instruction} {{.*}}[%rd{{[0-9+]}} 336; CHECK: {${check_args}} 337; CHECK: ${stride_pattern} 338 call void @${intrinsic}(i8 ${as}* %src, ${args} ${extra_args}); 339 ret void 340} 341 342; CHECK-LABEL: .func{{.*}}test_${function}_o( 343define void @test_${function}_o(i8 ${as}* %src, ${args}${extra_args}) { 344; CHECK: ${instruction} {{.*}}[%rd{{[0-9+]}}+128] 345; CHECK: ${check_args} 346; CHECK: ${stride_pattern} 347 %src1 = getelementptr i8, i8 ${as}* %src, i32 128; 348 call void @${intrinsic}(i8 ${as}* %src1, ${args}${extra_args}); 349 ret void 350} 351""" 352 intrinsic_template = "llvm.nvvm.wmma.${geom}.store.${abc}.${layout}${stride}.${itype}.${pspace}" 353 instruction_template = "wmma.store.${abc}.sync${aligned}.${layout}.${geom}${space}.${itype}" 354 355 generated_items = [] 356 357 for frag, layout, space, stride in product( 358 get_ldst_ops("store"), 359 ["row","col"], 360 ["",".shared",".global"], 361 ["", ".stride"]): 362 363 if not is_ldst_variant_supported(frag, layout): 364 continue 365 366 params = { 367 "abc" : frag.frag, 368 "aligned" : ".aligned" if ptx_version >= 63 else "", 369 "layout" : layout, 370 "space" : space, 371 "stride" : stride, 372 "itype" : frag.mma_type.ptx_type, 373 "pspace" : get_pspace(space), 374 "as" : "addrspace(%d)" % get_aspace(space), 375 "geom" : frag.geom, 376 } 377 378 test_params = params 379 test_params["intrinsic"] = Template(intrinsic_template).substitute(params) 380 test_params["function"] = test_params["intrinsic"].replace(".","_") 381 test_params["instruction"] = Template(instruction_template).substitute(params) 382 test_params["ret_ty"] = make_wmma_ld_ret_ty(frag) 383 test_params["check_args"] = check_pattern(frag) 384 if stride: 385 test_params["extra_args"] = ", i32 %stride"; 386 test_params["stride_pattern"] = ", %r{{[0-9]+}};" 387 else: 388 test_params["extra_args"] = "" 389 test_params["stride_pattern"] = ";" 390 test_params["args"] = make_wmma_slice_args(frag); 391 392 print(Template(store_template).substitute(test_params)) 393 generated_items.append((test_params["intrinsic"], 394 test_params["instruction"])) 395 396 return generated_items 397 398def mma_signature(op): 399 if op.a.mma_type.ptx_type in ["s8", "u8", "s4", "u4", "b1"]: 400 # int and sub-int ops are identified by input type. 401 return op.a.mma_type.ptx_type 402 else: 403 # the rest are FP ops identified by accumulator & result type. 404 return "%s.%s" % (op.d.mma_type.ptx_type, op.c.mma_type.ptx_type) 405 406def mma_ptx_signature(op): 407 if op.a.mma_type.ptx_type in ["s8", "u8", "s4", "u4", "b1"]: 408 # int and sub-int instructions encode all four types as D.A.B.C 409 return ".".join(x.mma_type.ptx_type for x in (op.d, op.a, op.b, op.c)) 410 if op.a.geom == "m8n8k4": 411 return "%s.f16.f16.%s" % (op.d.mma_type.ptx_type, op.c.mma_type.ptx_type) 412 else: 413 # the rest are FP instructions use D.C 414 return "%s.%s" % (op.d.mma_type.ptx_type, op.c.mma_type.ptx_type) 415 416def gen_wmma_mma_tests(): 417 mma_template = """ 418declare ${ret_ty} @${intrinsic}( 419 ${args}); 420 421; CHECK-LABEL: .func {{.*}}test_${function}( 422define ${ret_ty} @test_${function}( 423 ${args}) { 424; CHECK: ${instruction} 425; CHECK-NEXT: ${check_d} 426; CHECK-NEXT: ${check_a} 427; CHECK-NEXT: ${check_b} 428; CHECK-NEXT: ${check_c} 429 %r = call ${ret_ty} @${intrinsic}( 430 ${args}); 431 ret ${ret_ty} %r; 432} 433""" 434 wmma_intrinsic_template = "llvm.nvvm.wmma.${geom}.mma.${alayout}.${blayout}.${intrinsic_signature}${satf}" 435 wmma_instruction_template = "wmma.mma${mma_variant}.sync${aligned}.${alayout}.${blayout}.${geom}.${ptx_signature}${satf}" 436 mma_intrinsic_template = "llvm.nvvm.mma.${geom}.${alayout}.${blayout}.${intrinsic_signature}" 437 mma_instruction_template = "mma.sync${aligned}.${geom}.${alayout}.${blayout}.${ptx_signature}" 438 439 generated_items=[] 440 441 for op, alayout, blayout, satf in product( 442 get_mma_ops(), 443 ["row","col"], 444 ["row","col"], 445 [".satfinite", ""]): 446 447 if not is_mma_variant_supported(op, alayout, blayout, satf): 448 continue 449 450 params = { 451 "aligned" : ".aligned" if ptx_version >= 63 else "", 452 "alayout" : alayout, 453 "blayout" : blayout, 454 "intrinsic_signature" : mma_signature(op), 455 "ptx_signature" : mma_ptx_signature(op), 456 "satf" : satf, 457 "geom" : op.a.geom, 458 "mma_variant" : ".xor.popc" if op.a.mma_type.ptx_type == "b1" else "", 459 } 460 461 if op.a.geom == "m8n8k4": 462 intrinsic_template = mma_intrinsic_template 463 instruction_template = mma_instruction_template 464 else: 465 intrinsic_template = wmma_intrinsic_template 466 instruction_template = wmma_instruction_template 467 468 test_params = params 469 test_params["intrinsic"] = Template(intrinsic_template).substitute(params) 470 test_params["function"] = test_params["intrinsic"].replace(".", "_") 471 test_params["instruction"] = Template(instruction_template).substitute(params) 472 test_params["ret_ty"] = make_wmma_ld_ret_ty(op.d) 473 test_params["check_a"] = check_pattern(op.a) 474 test_params["check_b"] = check_pattern(op.b) 475 test_params["check_c"] = check_pattern(op.c) 476 test_params["check_d"] = check_pattern(op.d) 477 args = ",\n ".join(make_wmma_slice_args(frag) 478 for frag in (op.a, op.b, op.c)) 479 test_params["args"] = args 480 print(Template(mma_template).substitute(test_params)) 481 generated_items.append((test_params["intrinsic"], 482 test_params["instruction"])) 483 484 return generated_items 485 486# Append complete list of intrinsics and instructions we've generated tests for. 487# Generate set of checks to verify that that we did generate sensible set of 488# tests for the given combination of PTX and SM variants. 489# 490def gen_check_unsupported_ops(items): 491 print("; Complete list of intrinsics supported by PTX%d on sm_%d" 492 % (ptx_version, gpu_arch)) 493 print("; INTRINSICS: {{^; INTRINSICS_LIST_BEGIN}}") 494 print(""" 495 496; NOEXTGEOM-NOT: {{m8n32|m32n8}} 497; NOINT-NOT: .{{s32|s8}} 498; NOSUBINT-NOT: {{s4|u4|b1}} 499; NOMMA-NOT: .m8n8k4. 500 501; M16N16-DAG: m16n16k16.load.{{[ab].*}}.f16.p 502; M16N16-DAG: m16n16k16.{{load|store}}.{{[cd].*\.(f16|f32)}}.p 503; M16N16-DAG: m16n16k16.mma.{{.*}}.f16.f32 504; M16N16-DAG: m16n16k16.mma.{{.*}}.f32.f16 505; M16N16-DAG: m16n16k16.mma.{{.*}}.f16.f16 506; M16N16-DAG: m16n16k16.mma.{{.*}}.f32.f32 507 508; PTX60 adds support for m32n8k16/m8n32k16 geometries. 509; EXTGEOM-DAG: m32n8k16.load.{{[ab].*}}.f16.p 510; EXTGEOM-DAG: m32n8k16.{{load|store}}.{{[cd].*\.(f16|f32)}}.p 511; EXTGEOM-DAG: m32n8k16.mma.{{.*}}.f16.f32 512; EXTGEOM-DAG: m32n8k16.mma.{{.*}}.f32.f16 513; EXTGEOM-DAG: m32n8k16.mma.{{.*}}.f16.f16 514; EXTGEOM-DAG: m32n8k16.mma.{{.*}}.f32.f32 515 516; EXTGEOM-DAG: m8n32k16.load.{{[ab].*}}.f16.p 517; EXTGEOM-DAG: m8n32k16.{{load|store}}.{{[cd].*\.(f16|f32)}}.p 518; EXTGEOM-DAG: m8n32k16.mma.{{.*}}.f16.f32 519; EXTGEOM-DAG: m8n32k16.mma.{{.*}}.f32.f16 520; EXTGEOM-DAG: m8n32k16.mma.{{.*}}.f16.f16 521; EXTGEOM-DAG: m8n32k16.mma.{{.*}}.f32.f32 522 523; INT-DAG: m16n16k16.load.{{[ab].*}}.s8.p 524; INT-DAG: m8n32k16.load.{{[ab].*}}.s8.p 525; INT-DAG: m32n8k16.load.{{[ab].*}}.s8.p 526; INT-DAG: m16n16k16.load.{{[ab].*}}.u8.p 527; INT-DAG: m8n32k16.load.{{[ab].*}}.u8.p 528; INT-DAG: m32n8k16.load.{{[ab].*}}.u8.p 529; INT-DAG: m32n8k16.{{load|store}}.{{[cd].*\.s32}}.p 530; INT-DAG: m16n16k16.mma.{{.*}}.u8 531; INT-DAG: m16n16k16.mma.{{.*}}.s8 532; INT-DAG: m8n32k16.mma.{{.*}}.u8 533; INT-DAG: m8n32k16.mma.{{.*}}.s8 534; INT-DAG: m32n8k16.mma.{{.*}}.u8 535; INT-DAG: m32n8k16.mma.{{.*}}.s8 536 537; SUBINT-DAG: m8n8k128.load.{{[ab].*}}.b1.p 538; SUBINT-DAG: m8n8k32.load.{{[ab].*}}.s4.p 539; SUBINT-DAG: m8n8k32.load.{{[ab].*}}.u4.p 540; SUBINT-DAG: m8n8k128.{{load|store}}.{{[cd].*\.s32}}.p 541; SUBINT-DAG: m8n8k32.{{load|store}}.{{[cd].*\.s32}}.p 542; SUBINT-DAG: m8n8k32.mma.{{.*}}.u4 543; SUBINT-DAG: m8n8k32.mma.{{.*}}.s4 544; SUBINT-DAG: m8n8k128.mma.{{.*}}.b1 545 546; MMA-DAG: mma.m8n8k4.{{.*}}.f16.f32 547; MMA-DAG: mma.m8n8k4.{{.*}}.f32.f16 548; MMA-DAG: mma.m8n8k4.{{.*}}.f16.f16 549; MMA-DAG: mma.m8n8k4.{{.*}}.f32.f32 550; 551 552""") 553 554 print("; INTRINSICS_LIST_BEGIN") 555 for intrinsic, instruction in sorted(items): 556 print("; ", intrinsic, " -> ", instruction,"") 557 print("; INTRINSICS_LIST_END") 558 print("; INTRINSICS: ; INTRINSICS_LIST_END") 559 560def gen_tests(): 561 items = gen_wmma_load_tests() 562 items += gen_wmma_store_tests() 563 items += gen_wmma_mma_tests() 564 gen_check_unsupported_ops(items) 565 566parser = argparse.ArgumentParser() 567parser.add_argument("--ptx", type=int, default=60) 568parser.add_argument("--gpu-arch", type=int, default=70) 569args = parser.parse_args() 570ptx_version = args.ptx 571gpu_arch = args.gpu_arch 572 573gen_tests() 574