• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# This test generates all variants of wmma intrinsics and verifies that LLVM
2# generates correct instructions for them.
3
4# Check all variants of instructions supported by PTX60 on SM70
5# RUN: python %s --ptx=60 --gpu-arch=70 > %t-ptx60-sm_70.ll
6# RUN: FileCheck %t-ptx60-sm_70.ll < %t-ptx60-sm_70.ll \
7# RUN:           --check-prefixes=INTRINSICS,M16N16
8# RUN: FileCheck %t-ptx60-sm_70.ll < %t-ptx60-sm_70.ll \
9# RUN:           --check-prefixes=INTRINSICS,NOEXTGEOM,NOINT,NOSUBINT,NOMMA
10# RUN: llc < %t-ptx60-sm_70.ll -march=nvptx64 -mcpu=sm_70 -mattr=+ptx60 \
11# RUN:           | FileCheck %t-ptx60-sm_70.ll
12
13# Check all variants of instructions supported by PTX61 on SM70
14# RUN: python %s --ptx=61 --gpu-arch=70 > %t-ptx61-sm_70.ll
15# RUN: FileCheck %t-ptx61-sm_70.ll < %t-ptx61-sm_70.ll \
16# RUN:           --check-prefixes=INTRINSICS,M16N16,EXTGEOM
17# RUN: FileCheck %t-ptx61-sm_70.ll < %t-ptx61-sm_70.ll \
18# RUN:           --check-prefixes=INTRINSICS,NOINT,NOSUBINT,NOMMA
19# RUN: llc < %t-ptx61-sm_70.ll -march=nvptx64 -mcpu=sm_70 -mattr=+ptx61 \
20# RUN:           | FileCheck %t-ptx61-sm_70.ll
21
22# Check all variants of instructions supported by PTX63 on SM72
23# RUN: python %s --ptx=63 --gpu-arch=72 > %t-ptx63-sm_72.ll
24# RUN: FileCheck %t-ptx63-sm_72.ll < %t-ptx63-sm_72.ll \
25# RUN:           --check-prefixes=INTRINSICS,M16N16,EXTGEOM,INT
26# RUN: FileCheck %t-ptx63-sm_72.ll < %t-ptx63-sm_72.ll \
27# RUN:           --check-prefixes=INTRINSICS,NOSUBINT,NOMMA
28# RUN: llc < %t-ptx63-sm_72.ll -march=nvptx64 -mcpu=sm_72 -mattr=+ptx63 \
29# RUN:           | FileCheck %t-ptx63-sm_72.ll
30
31# Check all variants of instructions supported by PTX63 on SM75
32# RUN: python %s --ptx=63 --gpu-arch=75 > %t-ptx63-sm_75.ll
33# RUN: FileCheck %t-ptx63-sm_75.ll < %t-ptx63-sm_75.ll \
34# RUN:           --check-prefixes=INTRINSICS,M16N16,EXTGEOM,INT,SUBINT
35# RUN: FileCheck %t-ptx63-sm_75.ll < %t-ptx63-sm_75.ll \
36# RUN:           --check-prefixes=INTRINSICS,NOMMA
37# RUN: llc < %t-ptx63-sm_75.ll -march=nvptx64 -mcpu=sm_75 -mattr=+ptx63 \
38# RUN:           | FileCheck %t-ptx63-sm_75.ll
39
40# Check all variants of instructions supported by PTX64 on SM70+
41# RUN: python %s --ptx=64 --gpu-arch=70 > %t-ptx64-sm_70.ll
42# RUN: FileCheck %t-ptx64-sm_70.ll < %t-ptx64-sm_70.ll \
43# RUN:           --check-prefixes=INTRINSICS,M16N16,EXTGEOM,MMA
44# RUN: FileCheck %t-ptx64-sm_70.ll < %t-ptx64-sm_70.ll \
45# RUN:           --check-prefixes=INTRINSICS,NOINT,NOSUBINT
46# RUN: llc < %t-ptx64-sm_70.ll -march=nvptx64 -mcpu=sm_70 -mattr=+ptx64 \
47# RUN:           | FileCheck %t-ptx64-sm_70.ll
48
49from __future__ import print_function
50
51import argparse
52from itertools import product
53from string import Template
54
55class MMAType:
56  def __init__(self, ptx_type):
57    self.ptx_type = ptx_type
58    self.llvm_type = {
59        "f16" : "<2 x half>",
60        "f32" : "float",
61        "s32" : "i32",
62        "s8"  : "i32",
63        "u8"  : "i32",
64        "s4"  : "i32",
65        "u4"  : "i32",
66        "b1"  : "i32",
67    }[ptx_type];
68
69    self.ptx_reg_pattern = {
70        "f16" : "%hh[0-9]+",
71        "f32" : "%f[0-9]+",
72    }.get(ptx_type, "%r[0-9]+")
73
74  def __repr__(self):
75    return "%s/%s" % (self.ptx_type, self.llvm_type)
76
77class MMAFrag:
78  def __init__(self, geom, frag, ptx_elt_type):
79    self.geom = geom
80    self.frag = frag
81    self.is_mma = True if geom == "m8n8k4" else False;
82    self.mma_type = MMAType(ptx_elt_type);
83    self.nregs = {
84        "a:f16" : 2 if self.is_mma else 8,
85        "b:f16" : 2 if self.is_mma else 8,
86        "c:f16" : 4,
87        "d:f16" : 4,
88        "c:f32" : 8,
89        "d:f32" : 8,
90    }.get("%s:%s" % (frag, ptx_elt_type), {
91        # u8/s8 -> s32 @ m16n16k16/m8n32k16/m32n8k16
92        "m16n16k16:a:u8" : 2,
93        "m16n16k16:a:s8" : 2,
94        "m16n16k16:b:u8" : 2,
95        "m16n16k16:b:s8" : 2,
96        "m16n16k16:c:s32" : 8,
97        "m16n16k16:d:s32" : 8,
98
99        "m8n32k16:a:u8" : 1,
100        "m8n32k16:a:s8" : 1,
101        "m8n32k16:b:u8" : 4,
102        "m8n32k16:b:s8" : 4,
103        "m8n32k16:c:s32" : 8,
104        "m8n32k16:d:s32" : 8,
105
106        "m32n8k16:a:u8" : 4,
107        "m32n8k16:a:s8" : 4,
108        "m32n8k16:b:u8" : 1,
109        "m32n8k16:b:s8" : 1,
110        "m32n8k16:c:s32" : 8,
111        "m32n8k16:d:s32" : 8,
112
113        # u4/s4/b1 -> s32 @ m8n8k32 (u4/s4), m8n8k128(b1)
114        "m8n8k128:a:b1" : 1,
115        "m8n8k32:a:u4" : 1,
116        "m8n8k32:a:s4" : 1,
117        "m8n8k128:b:b1" : 1,
118        "m8n8k32:b:u4" : 1,
119        "m8n8k32:b:s4" : 1,
120        "m8n8k128:c:s32" : 2,
121        "m8n8k128:d:s32" : 2,
122        "m8n8k32:c:s32" : 2,
123        "m8n8k32:d:s32" : 2,
124    }.get("%s:%s:%s" % (geom, frag, ptx_elt_type), None));
125    assert(self.nregs);
126
127  def __repr__(self):
128    return "%s:%s:%s%s" % (self.geom, self.frag, self.mma_type,
129                           "" if self.nregs == 1 else ("*%d" % self.nregs))
130
131class MMAOp:
132  def __init__(self, a, b, c, d):
133    self.a = a
134    self.b = b
135    self.c = c
136    self.d = d
137
138  def __repr__(self):
139    return ("{A:%s, B:%s, C:%s, D:%s}" % (self.a, self.b, self.c, self.d ))
140
141def make_mma_ops(geoms, types_a, types_b, types_c, types_d):
142  ops = []
143  for geom, type_a, type_c in product( geoms,  types_a, types_c):
144    for type_b, type_d in product(types_b if types_b else [type_a],
145                                  types_d if types_d else [type_c]):
146      ops.append(MMAOp(MMAFrag(geom, "a", type_a),
147                       MMAFrag(geom, "b", type_b),
148                       MMAFrag(geom, "c", type_c),
149                       MMAFrag(geom, "d", type_d)))
150  return ops
151
152def make_ldst_ops(geoms, frags, types):
153  return [MMAFrag(geom, frag, ptx_type) for (geom, frag, ptx_type)
154          in product(geoms, frags, types)]
155
156def get_mma_ops():
157  return (make_mma_ops(["m8n8k4"],
158                       ["f16"], [], ["f16", "f32"], ["f16", "f32"]) +
159          make_mma_ops(["m16n16k16", "m32n8k16", "m8n32k16"],
160                       ["f16"], [], ["f16", "f32"], ["f16", "f32"]) +
161          make_mma_ops(["m16n16k16", "m32n8k16", "m8n32k16"],
162                       ["s8", "u8"], [], ["s32"], []) +
163          make_mma_ops(["m8n8k32"],
164                       ["s4", "u4"], [], ["s32"], []) +
165          make_mma_ops(["m8n8k128"],
166                       ["b1"], [], ["s32"], []))
167def get_ldst_ops(kind):
168  ldst_ops = (make_ldst_ops(["m16n16k16", "m32n8k16", "m8n32k16"],
169                            ["a", "b"], ["f16", "u8", "s8"]) +
170              make_ldst_ops(["m16n16k16", "m32n8k16", "m8n32k16"],
171                            ["c", "d"], ["f16", "f32", "s32"]) +
172              make_ldst_ops(["m8n8k32"], ["a", "b"], ["s4","u4"]) +
173              make_ldst_ops(["m8n8k128"], ["a", "b"], ["b1"]) +
174              make_ldst_ops(["m8n8k32", "m8n8k128"],  ["c", "d"], ["s32"]))
175  return [ x for x in ldst_ops if (x.frag == "d") == (kind == "store")]
176
177def is_geom_supported(geom):
178  # geometries for FP and ints.
179  if geom == "m8n8k4":
180    return ptx_version >= 64
181  if geom in ["m8n32k16", "m32n8k16"]:
182    return ptx_version >= 61
183  # geometries for sub-ints.
184  if geom in ["m8n8k32", "m8n8k128"]:
185    return ptx_version >= 63 and gpu_arch >= 75
186  if geom == "m16n16k16":
187    return ptx_version >= 60
188  assert(False) # Unexpected geometry.
189
190def is_type_supported(ptx_type):
191  if ptx_type in ["s8", "u8", "s32"]:
192    return ptx_version >= 63 and gpu_arch >= 72
193  if ptx_type in ["s4", "u4", "b1"]:
194    return ptx_version >= 63 and gpu_arch >= 75
195  return ptx_version >= 60 and gpu_arch >= 70
196
197
198def is_mma_variant_supported(op, layout_a, layout_b, satf):
199  if not (is_type_supported(op.a.mma_type.ptx_type)
200          and is_geom_supported(op.a.geom)):
201    return False
202  if op.a.geom == "m8n8k4":
203    if satf:
204      return False
205    if op.c.mma_type.ptx_type == "f32":
206      # If C is f32, D must be, too.
207      return op.d.mma_type.ptx_type == "f32"
208
209  # sub-integer require row/col layout, and no satf.
210  if op.a.mma_type.ptx_type in ["s4", "u4", "b1"]:
211    if op.a.mma_type.ptx_type == "b1" and satf:
212      return False
213    return layout_a == "row" and layout_b == "col"
214  return True
215
216def is_ldst_variant_supported(frag, layout):
217  if not (is_type_supported(frag.mma_type.ptx_type)
218          and is_geom_supported(frag.geom)):
219    return False
220  if frag.mma_type.ptx_type in ["s4", "u4", "b1"]:
221    # sub-integer require sm_75 and ptx63, row/col layout for a/b.
222    return ((frag.frag == "a" and layout == "row")
223            or (frag.frag == "b" and layout == "col")
224            or frag.frag in ["c", "d"])
225  return True
226
227def make_wmma_slice_ty(frag):
228  return [frag.mma_type.llvm_type] * frag.nregs
229
230def make_wmma_ld_ret_ty(frag):
231  results = make_wmma_slice_ty(frag)
232  if len(results) == 1:
233    return "%s" % results[0]
234  return "{%s}" % ", ".join(results)
235
236# returns address space
237def get_aspace(space):
238  space_map = {
239      ".global" : 1,
240      ".shared" : 3,
241      ".const"  : 4,
242      ".local"  : 5,
243      ".param"  : 101,
244      ""        : 0,
245      ".generic": 0
246  }
247  return space_map[space];
248
249def get_pspace(space):
250  return "p%di8" % get_aspace(space);
251
252def check_pattern(frag):
253   return "{{%s}}" % ", *".join([frag.mma_type.ptx_reg_pattern] * frag.nregs)
254
255def gen_wmma_load_tests():
256  load_template = """
257declare ${ret_ty} @${intrinsic}(i8 ${as}* %src ${extra_args});
258
259; CHECK-LABEL: .func {{.*}}test_${function}(
260define ${ret_ty} @test_${function}(i8 ${as}* %src ${extra_args}) {
261; CHECK: ${instruction}
262; CHECK: {${check_result}}
263; CHECK: [%rd{{[0-9]+}}]${stride_pattern}
264  %v0 = call ${ret_ty} @${intrinsic}(i8 ${as}* %src ${extra_args});
265  ret ${ret_ty} %v0;
266}
267
268; CHECK-LABEL: .func{{.*}}test_${function}_o(
269define ${ret_ty} @test_${function}_o(i8 ${as}* %src ${extra_args}) {
270; CHECK: ${instruction}
271; CHECK: {${check_result}}
272; CHECK: [%rd{{[0-9]+}}+128]${stride_pattern}
273  %src1 = getelementptr i8, i8 ${as}* %src, i32 128;
274  %v0 = call ${ret_ty} @${intrinsic}(i8 ${as}* %src1 ${extra_args});
275  ret ${ret_ty} %v0;
276}
277"""
278  intrinsic_template = "llvm.nvvm.wmma.${geom}.load.${abc}.${layout}${stride}.${itype}.${pspace}"
279  instruction_template = "wmma.load.${abc}.sync${aligned}.${layout}.${geom}${space}.${itype}"
280
281  generated_items = []
282
283  for frag, layout, space, stride in product(
284      get_ldst_ops("load"),
285      ["row","col"],
286      ["",".shared",".global"],
287      ["", ".stride"],
288      ):
289    if not is_ldst_variant_supported(frag, layout):
290      continue
291
292    params = {
293        "abc" : frag.frag,
294        "aligned" : ".aligned" if ptx_version >= 63 else "",
295        "layout" : layout,
296        "space" : space,
297        "stride" : stride,
298        "itype" : frag.mma_type.ptx_type,
299        "pspace" : get_pspace(space),
300        "as"     : "addrspace(%d)" % get_aspace(space),
301        "geom"   : frag.geom,
302    }
303
304    test_params = params
305    test_params["intrinsic"] = Template(intrinsic_template).substitute(params)
306    test_params["function"] = test_params["intrinsic"].replace(".","_")
307    test_params["instruction"] = Template(instruction_template).substitute(params)
308    test_params["ret_ty"] = make_wmma_ld_ret_ty(frag)
309    test_params["check_result"] = check_pattern(frag)
310
311    if stride:
312      test_params["extra_args"] = ", i32 %stride";
313      test_params["stride_pattern"] = ", %r{{[0-9]+}}"
314    else:
315      test_params["extra_args"] = ""
316      test_params["stride_pattern"] = ""
317
318    print(Template(load_template).substitute(test_params))
319
320    generated_items.append((test_params["intrinsic"],
321                            test_params["instruction"]))
322
323  return generated_items
324
325def make_wmma_slice_args(frag):
326  return ", ".join(["%s %%%s%d" % (t, frag.frag, i) for i,t
327                  in enumerate(make_wmma_slice_ty(frag))])
328
329def gen_wmma_store_tests():
330  store_template = """
331declare void @${intrinsic}(i8 ${as}* %src, ${args}${extra_args});
332
333; CHECK-LABEL: .func {{.*}}test_${function}(
334define void @test_${function}(i8 ${as}* %src, ${args}${extra_args}) {
335; CHECK: ${instruction} {{.*}}[%rd{{[0-9+]}}
336; CHECK: {${check_args}}
337; CHECK: ${stride_pattern}
338  call void @${intrinsic}(i8 ${as}* %src, ${args} ${extra_args});
339  ret void
340}
341
342; CHECK-LABEL: .func{{.*}}test_${function}_o(
343define void @test_${function}_o(i8 ${as}* %src, ${args}${extra_args}) {
344; CHECK: ${instruction} {{.*}}[%rd{{[0-9+]}}+128]
345; CHECK: ${check_args}
346; CHECK: ${stride_pattern}
347  %src1 = getelementptr i8, i8 ${as}* %src, i32 128;
348  call void @${intrinsic}(i8 ${as}* %src1, ${args}${extra_args});
349  ret void
350}
351"""
352  intrinsic_template = "llvm.nvvm.wmma.${geom}.store.${abc}.${layout}${stride}.${itype}.${pspace}"
353  instruction_template = "wmma.store.${abc}.sync${aligned}.${layout}.${geom}${space}.${itype}"
354
355  generated_items = []
356
357  for frag, layout, space, stride in product(
358      get_ldst_ops("store"),
359      ["row","col"],
360      ["",".shared",".global"],
361      ["", ".stride"]):
362
363    if not is_ldst_variant_supported(frag, layout):
364      continue
365
366    params = {
367        "abc" : frag.frag,
368        "aligned" : ".aligned" if ptx_version >= 63 else "",
369        "layout" : layout,
370        "space" : space,
371        "stride" : stride,
372        "itype" : frag.mma_type.ptx_type,
373        "pspace" : get_pspace(space),
374        "as"     : "addrspace(%d)" % get_aspace(space),
375        "geom"   : frag.geom,
376    }
377
378    test_params = params
379    test_params["intrinsic"] = Template(intrinsic_template).substitute(params)
380    test_params["function"] = test_params["intrinsic"].replace(".","_")
381    test_params["instruction"] = Template(instruction_template).substitute(params)
382    test_params["ret_ty"] = make_wmma_ld_ret_ty(frag)
383    test_params["check_args"] = check_pattern(frag)
384    if stride:
385      test_params["extra_args"] = ", i32 %stride";
386      test_params["stride_pattern"] = ", %r{{[0-9]+}};"
387    else:
388      test_params["extra_args"] = ""
389      test_params["stride_pattern"] = ";"
390    test_params["args"] = make_wmma_slice_args(frag);
391
392    print(Template(store_template).substitute(test_params))
393    generated_items.append((test_params["intrinsic"],
394                            test_params["instruction"]))
395
396  return generated_items
397
398def mma_signature(op):
399  if op.a.mma_type.ptx_type in ["s8", "u8", "s4", "u4", "b1"]:
400    # int and sub-int ops are identified by input type.
401    return op.a.mma_type.ptx_type
402  else:
403    # the rest are FP ops identified by accumulator & result type.
404    return "%s.%s" % (op.d.mma_type.ptx_type, op.c.mma_type.ptx_type)
405
406def mma_ptx_signature(op):
407  if op.a.mma_type.ptx_type in ["s8", "u8", "s4", "u4", "b1"]:
408    # int and sub-int instructions encode all four types as D.A.B.C
409    return ".".join(x.mma_type.ptx_type for x in (op.d, op.a, op.b, op.c))
410  if op.a.geom == "m8n8k4":
411    return "%s.f16.f16.%s" % (op.d.mma_type.ptx_type, op.c.mma_type.ptx_type)
412  else:
413    # the rest are FP instructions use D.C
414    return "%s.%s" % (op.d.mma_type.ptx_type, op.c.mma_type.ptx_type)
415
416def gen_wmma_mma_tests():
417  mma_template = """
418declare ${ret_ty} @${intrinsic}(
419        ${args});
420
421; CHECK-LABEL: .func {{.*}}test_${function}(
422define ${ret_ty} @test_${function}(
423        ${args}) {
424; CHECK: ${instruction}
425; CHECK-NEXT: ${check_d}
426; CHECK-NEXT: ${check_a}
427; CHECK-NEXT: ${check_b}
428; CHECK-NEXT: ${check_c}
429  %r = call ${ret_ty} @${intrinsic}(
430        ${args});
431  ret ${ret_ty} %r;
432}
433"""
434  wmma_intrinsic_template = "llvm.nvvm.wmma.${geom}.mma.${alayout}.${blayout}.${intrinsic_signature}${satf}"
435  wmma_instruction_template = "wmma.mma${mma_variant}.sync${aligned}.${alayout}.${blayout}.${geom}.${ptx_signature}${satf}"
436  mma_intrinsic_template = "llvm.nvvm.mma.${geom}.${alayout}.${blayout}.${intrinsic_signature}"
437  mma_instruction_template = "mma.sync${aligned}.${geom}.${alayout}.${blayout}.${ptx_signature}"
438
439  generated_items=[]
440
441  for op, alayout, blayout, satf in product(
442      get_mma_ops(),
443      ["row","col"],
444      ["row","col"],
445      [".satfinite", ""]):
446
447    if not is_mma_variant_supported(op, alayout, blayout, satf):
448      continue
449
450    params = {
451        "aligned" : ".aligned" if ptx_version >= 63 else "",
452        "alayout" : alayout,
453        "blayout" : blayout,
454        "intrinsic_signature" : mma_signature(op),
455        "ptx_signature" : mma_ptx_signature(op),
456        "satf"  : satf,
457        "geom"  : op.a.geom,
458        "mma_variant" : ".xor.popc" if op.a.mma_type.ptx_type == "b1" else "",
459    }
460
461    if op.a.geom == "m8n8k4":
462      intrinsic_template = mma_intrinsic_template
463      instruction_template = mma_instruction_template
464    else:
465      intrinsic_template = wmma_intrinsic_template
466      instruction_template = wmma_instruction_template
467
468    test_params = params
469    test_params["intrinsic"] = Template(intrinsic_template).substitute(params)
470    test_params["function"] = test_params["intrinsic"].replace(".", "_")
471    test_params["instruction"] = Template(instruction_template).substitute(params)
472    test_params["ret_ty"] = make_wmma_ld_ret_ty(op.d)
473    test_params["check_a"] = check_pattern(op.a)
474    test_params["check_b"] = check_pattern(op.b)
475    test_params["check_c"] = check_pattern(op.c)
476    test_params["check_d"] = check_pattern(op.d)
477    args = ",\n        ".join(make_wmma_slice_args(frag)
478                              for frag in (op.a, op.b, op.c))
479    test_params["args"] = args
480    print(Template(mma_template).substitute(test_params))
481    generated_items.append((test_params["intrinsic"],
482                            test_params["instruction"]))
483
484  return generated_items
485
486# Append complete list of intrinsics and instructions we've generated tests for.
487# Generate set of checks to verify that that we did generate sensible set of
488# tests for the given combination of PTX and SM variants.
489#
490def gen_check_unsupported_ops(items):
491  print("; Complete list of intrinsics supported by PTX%d on sm_%d"
492        % (ptx_version, gpu_arch))
493  print("; INTRINSICS: {{^; INTRINSICS_LIST_BEGIN}}")
494  print("""
495
496; NOEXTGEOM-NOT: {{m8n32|m32n8}}
497; NOINT-NOT: .{{s32|s8}}
498; NOSUBINT-NOT: {{s4|u4|b1}}
499; NOMMA-NOT: .m8n8k4.
500
501; M16N16-DAG: m16n16k16.load.{{[ab].*}}.f16.p
502; M16N16-DAG: m16n16k16.{{load|store}}.{{[cd].*\.(f16|f32)}}.p
503; M16N16-DAG: m16n16k16.mma.{{.*}}.f16.f32
504; M16N16-DAG: m16n16k16.mma.{{.*}}.f32.f16
505; M16N16-DAG: m16n16k16.mma.{{.*}}.f16.f16
506; M16N16-DAG: m16n16k16.mma.{{.*}}.f32.f32
507
508; PTX60 adds support for m32n8k16/m8n32k16 geometries.
509; EXTGEOM-DAG: m32n8k16.load.{{[ab].*}}.f16.p
510; EXTGEOM-DAG: m32n8k16.{{load|store}}.{{[cd].*\.(f16|f32)}}.p
511; EXTGEOM-DAG: m32n8k16.mma.{{.*}}.f16.f32
512; EXTGEOM-DAG: m32n8k16.mma.{{.*}}.f32.f16
513; EXTGEOM-DAG: m32n8k16.mma.{{.*}}.f16.f16
514; EXTGEOM-DAG: m32n8k16.mma.{{.*}}.f32.f32
515
516; EXTGEOM-DAG: m8n32k16.load.{{[ab].*}}.f16.p
517; EXTGEOM-DAG: m8n32k16.{{load|store}}.{{[cd].*\.(f16|f32)}}.p
518; EXTGEOM-DAG: m8n32k16.mma.{{.*}}.f16.f32
519; EXTGEOM-DAG: m8n32k16.mma.{{.*}}.f32.f16
520; EXTGEOM-DAG: m8n32k16.mma.{{.*}}.f16.f16
521; EXTGEOM-DAG: m8n32k16.mma.{{.*}}.f32.f32
522
523; INT-DAG: m16n16k16.load.{{[ab].*}}.s8.p
524; INT-DAG: m8n32k16.load.{{[ab].*}}.s8.p
525; INT-DAG: m32n8k16.load.{{[ab].*}}.s8.p
526; INT-DAG: m16n16k16.load.{{[ab].*}}.u8.p
527; INT-DAG: m8n32k16.load.{{[ab].*}}.u8.p
528; INT-DAG: m32n8k16.load.{{[ab].*}}.u8.p
529; INT-DAG: m32n8k16.{{load|store}}.{{[cd].*\.s32}}.p
530; INT-DAG: m16n16k16.mma.{{.*}}.u8
531; INT-DAG: m16n16k16.mma.{{.*}}.s8
532; INT-DAG: m8n32k16.mma.{{.*}}.u8
533; INT-DAG: m8n32k16.mma.{{.*}}.s8
534; INT-DAG: m32n8k16.mma.{{.*}}.u8
535; INT-DAG: m32n8k16.mma.{{.*}}.s8
536
537; SUBINT-DAG: m8n8k128.load.{{[ab].*}}.b1.p
538; SUBINT-DAG: m8n8k32.load.{{[ab].*}}.s4.p
539; SUBINT-DAG: m8n8k32.load.{{[ab].*}}.u4.p
540; SUBINT-DAG: m8n8k128.{{load|store}}.{{[cd].*\.s32}}.p
541; SUBINT-DAG: m8n8k32.{{load|store}}.{{[cd].*\.s32}}.p
542; SUBINT-DAG: m8n8k32.mma.{{.*}}.u4
543; SUBINT-DAG: m8n8k32.mma.{{.*}}.s4
544; SUBINT-DAG: m8n8k128.mma.{{.*}}.b1
545
546; MMA-DAG: mma.m8n8k4.{{.*}}.f16.f32
547; MMA-DAG: mma.m8n8k4.{{.*}}.f32.f16
548; MMA-DAG: mma.m8n8k4.{{.*}}.f16.f16
549; MMA-DAG: mma.m8n8k4.{{.*}}.f32.f32
550;
551
552""")
553
554  print("; INTRINSICS_LIST_BEGIN")
555  for intrinsic, instruction in sorted(items):
556    print("; ", intrinsic, " -> ", instruction,"")
557  print("; INTRINSICS_LIST_END")
558  print("; INTRINSICS: ; INTRINSICS_LIST_END")
559
560def gen_tests():
561  items = gen_wmma_load_tests()
562  items += gen_wmma_store_tests()
563  items += gen_wmma_mma_tests()
564  gen_check_unsupported_ops(items)
565
566parser = argparse.ArgumentParser()
567parser.add_argument("--ptx", type=int, default=60)
568parser.add_argument("--gpu-arch", type=int, default=70)
569args = parser.parse_args()
570ptx_version = args.ptx
571gpu_arch = args.gpu_arch
572
573gen_tests()
574