warp_gemm.hpp Source File

warp_gemm.hpp Source File#

Composable Kernel: warp_gemm.hpp Source File
warp_gemm.hpp
Go to the documentation of this file.
1// SPDX-License-Identifier: MIT
2// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved.
3
4#pragma once
5
6#include "ck_tile/core.hpp"
9
12
13namespace ck_tile {
14
15// fp32
16
19
20template <WGAttrNumAccessEnum AttrNumAccess = WGAttrNumAccessEnum::Single>
23 4,
24 AttrNumAccess>>;
25
26template <WGAttrNumAccessEnum AttrNumAccess = WGAttrNumAccessEnum::Single>
30 4,
31 AttrNumAccess>>;
32
33// fp16
34
37
40
41#if defined(__gfx950__)
42template <WGAttrNumAccessEnum AttrNumAccess = WGAttrNumAccessEnum::Single>
45 AttrNumAccess>>;
46#else
47template <WGAttrNumAccessEnum AttrNumAccess = WGAttrNumAccessEnum::Single>
50 2,
51 AttrNumAccess>>;
52#endif
53
54#if defined(__gfx950__)
55template <WGAttrNumAccessEnum AttrNumAccess = WGAttrNumAccessEnum::Single>
58 AttrNumAccess>>;
59#else
60template <WGAttrNumAccessEnum AttrNumAccess = WGAttrNumAccessEnum::Single>
63 2,
64 AttrNumAccess>>;
65#endif
66
69 1>>;
70
73 2>>;
74
78
82
83#if defined(__gfx950__)
84template <WGAttrNumAccessEnum AttrNumAccess = WGAttrNumAccessEnum::Single>
88 AttrNumAccess>>;
89#else
90template <WGAttrNumAccessEnum AttrNumAccess = WGAttrNumAccessEnum::Single>
94 2,
95 AttrNumAccess>>;
96#endif
97
98#if defined(__gfx950__)
99template <WGAttrNumAccessEnum AttrNumAccess = WGAttrNumAccessEnum::Single>
103 AttrNumAccess>>;
104#else
105template <WGAttrNumAccessEnum AttrNumAccess = WGAttrNumAccessEnum::Single>
109 2,
110 AttrNumAccess>>;
111#endif
112
113#if defined(__gfx950__)
114using WarpGemmMfmaF16F16F32M16N16K32SwizzleBTransposedCDistribution =
117 1>>;
118
119using WarpGemmMfmaBf16Bf16F32M16N16K32SwizzleBTransposedCDistribution =
122 1>>;
123#endif
124
128
129#if defined(__gfx950__)
133#else
137 2>>;
138#endif
139
142 4>>;
143
146 4>>;
147
148// fp16 2:4 structured sparsity
151
154
155// bf16
158
161
162#if defined(__gfx950__)
163template <WGAttrNumAccessEnum AttrNumAccess = WGAttrNumAccessEnum::Single>
166 AttrNumAccess>>;
167#else
168template <WGAttrNumAccessEnum AttrNumAccess = WGAttrNumAccessEnum::Single>
171 2,
172 AttrNumAccess>>;
173#endif
174
175#if defined(__gfx950__)
176template <WGAttrNumAccessEnum AttrNumAccess = WGAttrNumAccessEnum::Single>
179 AttrNumAccess>>;
180#else
181template <WGAttrNumAccessEnum AttrNumAccess = WGAttrNumAccessEnum::Single>
184 2,
185 AttrNumAccess>>;
186#endif
187
190 1>>;
191
195 2>>;
196
200
204
205#if defined(__gfx950__)
206template <WGAttrNumAccessEnum AttrNumAccess = WGAttrNumAccessEnum::Single>
210 AttrNumAccess>>;
211#else
212template <WGAttrNumAccessEnum AttrNumAccess = WGAttrNumAccessEnum::Single>
216 2,
217 AttrNumAccess>>;
218#endif
219
220#if defined(__gfx950__)
221template <WGAttrNumAccessEnum AttrNumAccess = WGAttrNumAccessEnum::Single>
225 AttrNumAccess>>;
226#else
227template <WGAttrNumAccessEnum AttrNumAccess = WGAttrNumAccessEnum::Single>
231 2,
232 AttrNumAccess>>;
233#endif
234
238
239#if defined(__gfx950__)
243#else
247 2>>;
248#endif
249
252 4>>;
253
256 4>>;
257
258// fp8
259
262
265
268
271
274
277 2>>;
278
281 2>>;
282
285 2>>;
286
289
293
296
300
303 2>>;
304
307 2>>;
308
309template <WGAttrNumAccessEnum AttrNumAccess = WGAttrNumAccessEnum::Single>
312 AttrNumAccess>>;
313template <WGAttrNumAccessEnum AttrNumAccess = WGAttrNumAccessEnum::Single>
316 AttrNumAccess>>;
317
318template <WGAttrNumAccessEnum AttrNumAccess = WGAttrNumAccessEnum::Single>
321 AttrNumAccess>>;
322
323template <WGAttrNumAccessEnum AttrNumAccess = WGAttrNumAccessEnum::Single>
326 AttrNumAccess>>;
327
328template <WGAttrNumAccessEnum AttrNumAccess = WGAttrNumAccessEnum::Single>
331 AttrNumAccess>>;
332
333template <WGAttrNumAccessEnum AttrNumAccess = WGAttrNumAccessEnum::Single>
337 AttrNumAccess>>;
338
339template <WGAttrNumAccessEnum AttrNumAccess = WGAttrNumAccessEnum::Single>
343 AttrNumAccess>>;
344
345template <WGAttrNumAccessEnum AttrNumAccess = WGAttrNumAccessEnum::Single>
349 AttrNumAccess>>;
350
351template <WGAttrNumAccessEnum AttrNumAccess = WGAttrNumAccessEnum::Single>
355 AttrNumAccess>>;
356
357template <WGAttrNumAccessEnum AttrNumAccess = WGAttrNumAccessEnum::Single>
360 AttrNumAccess>>;
361
362template <WGAttrNumAccessEnum AttrNumAccess = WGAttrNumAccessEnum::Single>
365 AttrNumAccess>>;
366
367template <WGAttrNumAccessEnum AttrNumAccess = WGAttrNumAccessEnum::Single>
370 AttrNumAccess>>;
371
372template <WGAttrNumAccessEnum AttrNumAccess = WGAttrNumAccessEnum::Single>
375 AttrNumAccess>>;
376
380
384
388
392
393template <index_t swizzle_factor = 2>
397 2,
398 swizzle_factor>>;
399
400// int8
403
407
410
414
415} // namespace ck_tile
Definition tile/core/algorithm/cluster_descriptor.hpp:13
WarpGemmImpl< WarpGemmAttributeMfma< WarpGemmAttributeMfmaImpl_f32_16x16x128_bf8_fp8< WGAttrCtlEnum::Default_ >, AttrNumAccess > > WarpGemmMfma_f32_16x16x128_bf8_fp8
Definition warp_gemm.hpp:324
WarpGemmImpl< WarpGemmAttributeMfmaTransposedCDistribution< WarpGemmAttributeMfmaImpl_f32_32x32x16_bf8_bf8< WGAttrCtlEnum::Default_ > > > WarpGemmMfma_f32_32x32x16_bf8_bf8_CTransposed
Definition warp_gemm.hpp:389
WarpGemmImpl< WarpGemmAttributeMfma< WarpGemmAttributeMfmaImpl_f32_16x16x32_bf8_bf8< WGAttrCtlEnum::Default_ > > > WarpGemmMfma_f32_16x16x32_bf8_bf8
Definition warp_gemm.hpp:294
WarpGemmImpl< WarpGemmAttributeMfma< WarpGemmAttributeMfmaImpl_f32_32x32x16_fp8_fp8< WGAttrCtlEnum::Default_ > > > WarpGemmMfma_f32_32x32x16_fp8_fp8
Definition warp_gemm.hpp:260
WarpGemmImpl< WarpGemmAttributeMfma< WarpGemmAttributeMfmaImplBf16Bf16F32M16N16K16< WGAttrCtlEnum::Default_ > > > WarpGemmMfmaBf16Bf16F32M16N16K16
Definition warp_gemm.hpp:159
WarpGemmImpl< WarpGemmAttributeMfma< WarpGemmAttributeMfmaImpl_f32_16x16x128_fp8_fp8< WGAttrCtlEnum::Default_ >, AttrNumAccess > > WarpGemmMfma_f32_16x16x128_fp8_fp8
Definition warp_gemm.hpp:314
WarpGemmImpl< WarpGemmAttributeMfma< WarpGemmAttributeMfmaImpl_f32_32x32x64_fp8_bf8< WGAttrCtlEnum::Default_ >, AttrNumAccess > > WarpGemmMfma_f32_32x32x64_fp8_bf8
Definition warp_gemm.hpp:363
WarpGemmImpl< WarpGemmAttributeMfmaIterateK< WarpGemmAttributeMfmaImpl_f32_32x32x16_fp8_fp8< WGAttrCtlEnum::Default_ >, 2 > > WarpGemmMfma_f32_32x32x32_fp8_fp8
Definition warp_gemm.hpp:275
WarpGemmImpl< WarpGemmAttributeMfma< WarpGemmAttributeMfmaImpl_i32_16x16x32_i8< WGAttrCtlEnum::Default_ > > > WarpGemmMfma_i32_16x16x32_i8_i8
Definition warp_gemm.hpp:408
WarpGemmImpl< WarpGemmAttributeMfmaIterateK< WarpGemmAttributeMfmaImplBf16Bf16F32M16N16K16< WGAttrCtlEnum::Default_ >, 2, AttrNumAccess > > WarpGemmMfmaBf16Bf16F32M16N16K32
Definition warp_gemm.hpp:182
WarpGemmSmfmacImpl< WarpGemmAttributeSmfmac< WarpGemmAttributeSmfmacImplF16F16F32M16N16K32< WGAttrCtlEnum::Default_ > > > WarpGemmSmfmacF16F16F32M16N16K32
Definition warp_gemm.hpp:152
WarpGemmImpl< WarpGemmAttributeMfmaIterateK< WarpGemmAttributeMfmaImplF16F16F32M4N64K4< WGAttrCtlEnum::Default_ >, 4 > > WarpGemmMfmaF16F16F32M4N64K16
Definition warp_gemm.hpp:140
WarpGemmImpl< WarpGemmAttributeMfmaTransposedCDistribution< WarpGemmAttributeMfmaImpl_i32_16x16x32_i8< WGAttrCtlEnum::Default_ > > > WarpGemmMfma_i32_16x16x32_i8_i8_CTransposed
Definition warp_gemm.hpp:411
WarpGemmImpl< WarpGemmAttributeMfmaTransposedCDistribution< WarpGemmAttributeMfmaImpl_i32_32x32x16_i8< WGAttrCtlEnum::Default_ > > > WarpGemmMfma_i32_32x32x16_i8_i8_CTransposed
Definition warp_gemm.hpp:404
WarpGemmImpl< WarpGemmAttributeMfmaTransposedCDistribution< WarpGemmAttributeMfmaImpl_f32_16x16x32_fp8_fp8< WGAttrCtlEnum::Default_ > > > WarpGemmMfma_f32_16x16x32_fp8_fp8_CTransposed
Definition warp_gemm.hpp:290
WarpGemmImpl< WarpGemmAttributeMfmaTransposedCDistribution< WarpGemmAttributeMfmaImplBf16Bf16F32M32N32K8< WGAttrCtlEnum::Default_ > > > WarpGemmMfmaBf16Bf16F32M32N32K8TransposedCDistribution
Definition warp_gemm.hpp:197
WarpGemmAttributeMfmaImpl_f32_32x32x16_f8_base< fp8_t, bf8_t, Ctrl_ > WarpGemmAttributeMfmaImpl_f32_32x32x16_fp8_bf8
Definition warp_gemm_attribute_mfma_impl.hpp:1511
WarpGemmImpl< WarpGemmAttributeMfmaIterateK< WarpGemmAttributeMfmaImplBf16Bf16F32M4N64K4< WGAttrCtlEnum::Default_ >, 4 > > WarpGemmMfmaBf16Bf16F32M4N64K16
Definition warp_gemm.hpp:250
WarpGemmAttributeMfmaImpl_f32_32x32x16_f8_base< bf8_t, bf8_t, Ctrl_ > WarpGemmAttributeMfmaImpl_f32_32x32x16_bf8_bf8
Definition warp_gemm_attribute_mfma_impl.hpp:1526
WarpGemmAttributeMfmaImpl_f32_32x32x16_f8_base< fp8_t, fp8_t, Ctrl_ > WarpGemmAttributeMfmaImpl_f32_32x32x16_fp8_fp8
Definition warp_gemm_attribute_mfma_impl.hpp:1505
WarpGemmImpl< WarpGemmAttributeMfma< WarpGemmAttributeMfmaScaleImpl_f32_16x16x128_fp4< WGAttrCtlEnum::Default_ >, AttrNumAccess > > WarpGemmMfma_f32_16x16x128_fp4
Definition warp_gemm.hpp:310
WarpGemmImpl< WarpGemmAttributeMfmaTransposedCDistribution< WarpGemmAttributeMfmaImpl_f32_16x16x32_bf8_bf8< WGAttrCtlEnum::Default_ > > > WarpGemmMfma_f32_16x16x32_bf8_bf8_CTransposed
Definition warp_gemm.hpp:297
WarpGemmImpl< WarpGemmAttributeMfma< WarpGemmAttributeMfmaImplF16F16F32M16N16K16< WGAttrCtlEnum::Default_ > > > WarpGemmMfmaF16F16F32M16N16K16
Definition warp_gemm.hpp:38
WarpGemmImpl< WarpGemmAttributeMfmaTransposedCDistribution_SwizzleB< WarpGemmAttributeMfmaImplBf16Bf16F32M32N32K8< WGAttrCtlEnum::Default_ > > > WarpGemmMfmaBf16Bf16F32M32N32K8SwizzleBTransposedCDistribution
Definition warp_gemm.hpp:235
WarpGemmImpl< WarpGemmAttributeMfma< WarpGemmAttributeMfmaImpl_f32_32x32x64_fp8_fp8< WGAttrCtlEnum::Default_ >, AttrNumAccess > > WarpGemmMfma_f32_32x32x64_fp8_fp8
Definition warp_gemm.hpp:358
WarpGemmImpl< WarpGemmAttributeMfmaTransposedCDistribution< WarpGemmAttributeMfmaImpl_f32_32x32x16_fp8_fp8< WGAttrCtlEnum::Default_ > > > WarpGemmMfma_f32_32x32x16_fp8_fp8_CTransposed
Definition warp_gemm.hpp:377
WarpGemmImpl< WarpGemmAttributeMfmaIterateK< WarpGemmAttributeMfmaImpl_f32_32x32x16_fp8_bf8< WGAttrCtlEnum::Default_ >, 2 > > WarpGemmMfma_f32_32x32x32_fp8_bf8
Definition warp_gemm.hpp:283
WarpGemmImpl< WarpGemmAttributeMfmaIterateK< WarpGemmAttributeMfmaImplF16F16F32M64N4K4< WGAttrCtlEnum::Default_ >, 4 > > WarpGemmMfmaF16F16F32M64N4K16
Definition warp_gemm.hpp:144
WarpGemmAttributeMfmaImpl_f32_16x16x32_f8_base< fp8_t, fp8_t, Ctrl_ > WarpGemmAttributeMfmaImpl_f32_16x16x32_fp8_fp8
Definition warp_gemm_attribute_mfma_impl.hpp:1508
WarpGemmImpl< WarpGemmAttributeMfmaIterateK< WarpGemmAttributeMfmaImplBf16Bf16F32M64N4K4< WGAttrCtlEnum::Default_ >, 4 > > WarpGemmMfmaBf16Bf16F32M64N4K16
Definition warp_gemm.hpp:254
WarpGemmImpl< WarpGemmAttributeMfma< WarpGemmAttributeMfmaImpl_f32_16x16x32_fp8_fp8< WGAttrCtlEnum::Default_ > > > WarpGemmMfma_f32_16x16x32_fp8_fp8
Definition warp_gemm.hpp:287
WarpGemmImpl< WarpGemmAttributeMfmaIterateK< WarpGemmAttributeMfmaImplF16F16F32M32N32K8< WGAttrCtlEnum::Default_ >, 2, AttrNumAccess > > WarpGemmMfmaF16F16F32M32N32K16
Definition warp_gemm.hpp:48
WarpGemmImpl< WarpGemmAttributeMfmaIterateK< WarpGemmAttributeMfmaImplF16F16F32M16N16K16< WGAttrCtlEnum::Default_ >, 2, AttrNumAccess > > WarpGemmMfmaF16F16F32M16N16K32
Definition warp_gemm.hpp:61
WarpGemmImpl< WarpGemmAttributeMfmaIterateK< WarpGemmAttributeMfmaImplBf16Bf16F32M32N32K8< WGAttrCtlEnum::Default_ >, 2, AttrNumAccess > > WarpGemmMfmaBf16Bf16F32M32N32K16
Definition warp_gemm.hpp:169
WarpGemmImpl< WarpGemmAttributeMfmaIterateKAndTransposedCDistribution_SwizzleB< WarpGemmAttributeMfmaImplF16F16F32M32N32K8< WGAttrCtlEnum::Default_ >, 2 > > WarpGemmMfmaF16F16F32M32N32K16SwizzleBTransposedCDistribution
Definition warp_gemm.hpp:134
WarpGemmImpl< WarpGemmAttributeMfma< WarpGemmAttributeMfmaImpl_f32_16x16x128_bf8_bf8< WGAttrCtlEnum::Default_ >, AttrNumAccess > > WarpGemmMfma_f32_16x16x128_bf8_bf8
Definition warp_gemm.hpp:329
WarpGemmImpl< WarpGemmAttributeMfmaTransposedCDistribution< WarpGemmAttributeMfmaImplBf16Bf16F32M16N16K16< WGAttrCtlEnum::Default_ > > > WarpGemmMfmaBf16Bf16F32M16N16K16TransposedCDistribution
Definition warp_gemm.hpp:201
WarpGemmImpl< WarpGemmAttributeMfmaIterateK_SwizzleA< WarpGemmAttributeMfmaImplBf16Bf16F32M32N32K8< WGAttrCtlEnum::Default_ >, 2 > > WarpGemmMfmaBf16Bf16F32M32N32K16SwizzleA
Definition warp_gemm.hpp:192
WarpGemmImpl< WarpGemmAttributeMfmaTransposedCDistribution< WarpGemmAttributeMfmaImpl_f32_16x16x128_fp8_fp8< WGAttrCtlEnum::Default_ >, AttrNumAccess > > WarpGemmMfma_f32_16x16x128_fp8_fp8_CTransposed
Definition warp_gemm.hpp:334
WarpGemmImpl< WarpGemmAttributeMfma< WarpGemmAttributeMfmaImpl_f32_32x32x64_bf8_fp8< WGAttrCtlEnum::Default_ >, AttrNumAccess > > WarpGemmMfma_f32_32x32x64_bf8_fp8
Definition warp_gemm.hpp:368
WarpGemmImpl< WarpGemmAttributeMfmaTransposedCDistribution< WarpGemmAttributeMfmaImplF16F16F32M16N16K16< WGAttrCtlEnum::Default_ > > > WarpGemmMfmaF16F16F32M16N16K16TransposedCDistribution
Definition warp_gemm.hpp:79
WarpGemmImpl< WarpGemmAttributeMfmaIterateKAndTransposedCDistribution< WarpGemmAttributeMfmaImplF16F16F32M32N32K8< WGAttrCtlEnum::Default_ >, 2, AttrNumAccess > > WarpGemmMfmaF16F16F32M32N32K16TransposedCDistribution
Definition warp_gemm.hpp:91
WarpGemmAttributeMfmaImpl_f32_16x16x128_f8_bf8_base< fp8_t, bf8_t, Ctrl_ > WarpGemmAttributeMfmaImpl_f32_16x16x128_fp8_bf8
Definition warp_gemm_attribute_mfma_impl.hpp:1616
WarpGemmImpl< WarpGemmAttributeMfma< WarpGemmAttributeMfmaImplBf16Bf16F32M32N32K8< WGAttrCtlEnum::Default_ > > > WarpGemmMfmaBf16Bf16F32M32N32K8
Definition warp_gemm.hpp:156
WarpGemmImpl< WarpGemmAttributeMfmaIterateK_SwizzleA< WarpGemmAttributeMfmaImplF16F16F32M32N32K8< WGAttrCtlEnum::Default_ >, 2 > > WarpGemmMfmaF16F16F32M32N32K16SwizzleA
Definition warp_gemm.hpp:71
WarpGemmImpl< WarpGemmAttributeMfmaIterateK< WarpGemmAttributeMfmaImpl_f32_32x32x16_bf8_bf8< WGAttrCtlEnum::Default_ >, 2 > > WarpGemmMfma_f32_32x32x32_bf8_bf8
Definition warp_gemm.hpp:279
WarpGemmImpl< WarpGemmAttributeMfmaIterateK< WarpGemmAttributeMfmaImpl_f32_16x16x32_bf8_bf8< WGAttrCtlEnum::Default_ >, 2 > > WarpGemmMfma_f32_16x16x64_bf8_bf8
Definition warp_gemm.hpp:305
WarpGemmImpl< WarpGemmAttributeMfmaIterateKAndTransposedCDistribution_SwizzleB< WarpGemmAttributeMfmaImplBf16Bf16F32M32N32K8< WGAttrCtlEnum::Default_ >, 2 > > WarpGemmMfmaBf16Bf16F32M32N32K16SwizzleBTransposedCDistribution
Definition warp_gemm.hpp:244
WarpGemmAttributeMfmaImpl_f32_16x16x128_f8_bf8_base< bf8_t, bf8_t, Ctrl_ > WarpGemmAttributeMfmaImpl_f32_16x16x128_bf8_bf8
Definition warp_gemm_attribute_mfma_impl.hpp:1624
WarpGemmImpl< WarpGemmAttributeMfmaIterateK_SwizzleA< WarpGemmAttributeMfmaImplF16F16F32M32N32K8< WGAttrCtlEnum::Default_ >, 1 > > WarpGemmMfmaF16F16F32M32N32K8SwizzleA
Definition warp_gemm.hpp:67
WarpGemmImpl< WarpGemmAttributeMfmaIterateKAndTransposedCDistribution< WarpGemmAttributeMfmaImplBf16Bf16F32M16N16K16< WGAttrCtlEnum::Default_ >, 2, AttrNumAccess > > WarpGemmMfmaBf16Bf16F32M16N16K32TransposedCDistribution
Definition warp_gemm.hpp:228
WarpGemmSmfmacImpl< WarpGemmAttributeSmfmac< WarpGemmAttributeSmfmacImplF16F16F32M32N32K16< WGAttrCtlEnum::Default_ > > > WarpGemmSmfmacF16F16F32M32N32K16
Definition warp_gemm.hpp:149
WarpGemmImpl< WarpGemmAttributeMfma< WarpGemmAttributeMfmaImpl_f32_32x32x64_bf8_bf8< WGAttrCtlEnum::Default_ >, AttrNumAccess > > WarpGemmMfma_f32_32x32x64_bf8_bf8
Definition warp_gemm.hpp:373
WarpGemmAttributeMfmaImpl_f32_32x32x16_f8_base< bf8_t, fp8_t, Ctrl_ > WarpGemmAttributeMfmaImpl_f32_32x32x16_bf8_fp8
Definition warp_gemm_attribute_mfma_impl.hpp:1522
WarpGemmImpl< WarpGemmAttributeMfma< WarpGemmAttributeMfmaImpl_f32_16x16x32_fp8_bf8< WGAttrCtlEnum::Default_ > > > WarpGemmMfma_f32_16x16x32_fp8_bf8
Definition warp_gemm.hpp:266
WarpGemmImpl< WarpGemmAttributeMfmaIterateKAndTransposedCDistribution< WarpGemmAttributeMfmaImplF32F32F32M16N16K4< WGAttrCtlEnum::Default_ >, 4, AttrNumAccess > > WarpGemmMfmaF32F32F32M16N16K16TransposedCDistribution
Definition warp_gemm.hpp:27
WarpGemmImpl< WarpGemmAttributeMfma< WarpGemmAttributeMfmaImplF16F16F32M32N32K8< WGAttrCtlEnum::Default_ > > > WarpGemmMfmaF16F16F32M32N32K8
Definition warp_gemm.hpp:35
WarpGemmImpl< WarpGemmAttributeMfmaIterateKAndTransposedCDistribution< WarpGemmAttributeMfmaImplBf16Bf16F32M32N32K8< WGAttrCtlEnum::Default_ >, 2, AttrNumAccess > > WarpGemmMfmaBf16Bf16F32M32N32K16TransposedCDistribution
Definition warp_gemm.hpp:213
WarpGemmImpl< WarpGemmAttributeMfmaTransposedCDistribution_SwizzleB< WarpGemmAttributeMfmaImplF16F16F32M32N32K8< WGAttrCtlEnum::Default_ > > > WarpGemmMfmaF16F16F32M32N32K8SwizzleBTransposedCDistribution
Definition warp_gemm.hpp:125
WarpGemmAttributeMfmaImpl_f32_16x16x128_f8_bf8_base< fp8_t, fp8_t, Ctrl_ > WarpGemmAttributeMfmaImpl_f32_16x16x128_fp8_fp8
Definition warp_gemm_attribute_mfma_impl.hpp:1612
WarpGemmImpl< WarpGemmAttributeMfmaIterateKAndTransposedCDistribution< WarpGemmAttributeMfmaImplF16F16F32M16N16K16< WGAttrCtlEnum::Default_ >, 2, AttrNumAccess > > WarpGemmMfmaF16F16F32M16N16K32TransposedCDistribution
Definition warp_gemm.hpp:106
WarpGemmImpl< WarpGemmAttributeMfma< WarpGemmAttributeMfmaImplF32F32F32M16N16K4< WGAttrCtlEnum::Default_ > > > WarpGemmMfmaF32F32F32M16N16K4
Definition warp_gemm.hpp:17
WarpGemmImpl< WarpGemmAttributeMfma< WarpGemmAttributeMfmaImpl_f32_16x16x128_fp8_bf8< WGAttrCtlEnum::Default_ >, AttrNumAccess > > WarpGemmMfma_f32_16x16x128_fp8_bf8
Definition warp_gemm.hpp:319
WarpGemmImpl< WarpGemmAttributeMfmaTransposedCDistribution< WarpGemmAttributeMfmaImpl_f32_16x16x128_bf8_fp8< WGAttrCtlEnum::Default_ >, AttrNumAccess > > WarpGemmMfma_f32_16x16x128_bf8_fp8_CTransposed
Definition warp_gemm.hpp:346
WarpGemmAttributeMfmaImpl_f32_16x16x128_f8_bf8_base< bf8_t, fp8_t, Ctrl_ > WarpGemmAttributeMfmaImpl_f32_16x16x128_bf8_fp8
Definition warp_gemm_attribute_mfma_impl.hpp:1620
WarpGemmImpl< WarpGemmAttributeMfmaTransposedCDistribution< WarpGemmAttributeMfmaImpl_f32_16x16x128_fp8_bf8< WGAttrCtlEnum::Default_ >, AttrNumAccess > > WarpGemmMfma_f32_16x16x128_fp8_bf8_CTransposed
Definition warp_gemm.hpp:340
WarpGemmImpl< WarpGemmAttributeMfmaTransposedCDistribution< WarpGemmAttributeMfmaImpl_f32_32x32x16_fp8_bf8< WGAttrCtlEnum::Default_ > > > WarpGemmMfma_f32_32x32x16_fp8_bf8_CTransposed
Definition warp_gemm.hpp:381
WarpGemmImpl< WarpGemmAttributeMfmaIterateK_SwizzleA< WarpGemmAttributeMfmaImplBf16Bf16F32M32N32K8< WGAttrCtlEnum::Default_ >, 1 > > WarpGemmMfmaBf16Bf16F32M32N32K8SwizzleA
Definition warp_gemm.hpp:188
WarpGemmImpl< WarpGemmAttributeMfmaTransposedCDistribution< WarpGemmAttributeMfmaImplF16F16F32M32N32K8< WGAttrCtlEnum::Default_ > > > WarpGemmMfmaF16F16F32M32N32K8TransposedCDistribution
Definition warp_gemm.hpp:75
WarpGemmImpl< WarpGemmAttributeMfmaIterateK< WarpGemmAttributeMfmaImplF32F32F32M16N16K4< WGAttrCtlEnum::Default_ >, 4, AttrNumAccess > > WarpGemmMfmaF32F32F32M16N16K16
Definition warp_gemm.hpp:21
WarpGemmImpl< WarpGemmAttributeMfmaTransposedCDistribution< WarpGemmAttributeMfmaImpl_f32_32x32x16_bf8_fp8< WGAttrCtlEnum::Default_ > > > WarpGemmMfma_f32_32x32x16_bf8_fp8_CTransposed
Definition warp_gemm.hpp:385
WarpGemmImpl< WarpGemmAttributeMfma< WarpGemmAttributeMfmaImpl_i32_32x32x16_i8< WGAttrCtlEnum::Default_ > > > WarpGemmMfma_i32_32x32x16_i8_i8
Definition warp_gemm.hpp:401
WarpGemmAttributeMfmaImpl_f32_16x16x32_f8_base< bf8_t, bf8_t, Ctrl_ > WarpGemmAttributeMfmaImpl_f32_16x16x32_bf8_bf8
Definition warp_gemm_attribute_mfma_impl.hpp:1518
WarpGemmImpl< WarpGemmAttributeMfmaIterateK< WarpGemmAttributeMfmaImpl_f32_16x16x32_fp8_fp8< WGAttrCtlEnum::Default_ >, 2 > > WarpGemmMfma_f32_16x16x64_fp8_fp8
Definition warp_gemm.hpp:301
WarpGemmImpl< WarpGemmAttributeMfmaTransposedCDistribution< WarpGemmAttributeMfmaImpl_f32_16x16x128_bf8_bf8< WGAttrCtlEnum::Default_ >, AttrNumAccess > > WarpGemmMfma_f32_16x16x128_bf8_bf8_CTransposed
Definition warp_gemm.hpp:352
WarpGemmImpl< WarpGemmAttributeMfmaIterateKAndTransposedCDistribution_SwizzleB< WarpGemmAttributeMfmaImpl_f32_32x32x16_fp8_fp8< WGAttrCtlEnum::Default_ >, 2, swizzle_factor > > WarpGemmMfmaFp8Fp8F32M32N32K32SwizzleBTransposedCDistribution
Definition warp_gemm.hpp:394
WarpGemmImpl< WarpGemmAttributeMfma< WarpGemmAttributeMfmaImpl_f32_32x32x16_fp8_bf8< WGAttrCtlEnum::Default_ > > > WarpGemmMfma_f32_32x32x16_fp8_bf8
Definition warp_gemm.hpp:263
WarpGemmImpl< WarpGemmAttributeMfma< WarpGemmAttributeMfmaImpl_f32_32x32x16_bf8_bf8< WGAttrCtlEnum::Default_ > > > WarpGemmMfma_f32_32x32x16_bf8_bf8
Definition warp_gemm.hpp:272
WarpGemmImpl< WarpGemmAttributeMfma< WarpGemmAttributeMfmaImpl_f32_32x32x16_bf8_fp8< WGAttrCtlEnum::Default_ > > > WarpGemmMfma_f32_32x32x16_bf8_fp8
Definition warp_gemm.hpp:269
Definition warp_gemm_attribute_mfma.hpp:23
Definition warp_gemm_attribute_mfma_impl.hpp:1890
Definition warp_gemm_attribute_mfma_impl.hpp:1820
Definition warp_gemm_attribute_mfma_impl.hpp:666
Definition warp_gemm_attribute_mfma_impl.hpp:196
Definition warp_gemm_attribute_mfma_impl.hpp:1049
Definition warp_gemm_attribute_mfma_impl.hpp:577
Definition warp_gemm_attribute_mfma_impl.hpp:754
Definition warp_gemm_attribute_mfma_impl.hpp:844
Definition warp_gemm_attribute_mfma_impl.hpp:322
Definition warp_gemm_attribute_mfma_impl.hpp:385
Definition warp_gemm_attribute_mfma_impl.hpp:935
Definition warp_gemm_attribute_mfma_impl.hpp:259
Definition warp_gemm_attribute_mfma_impl.hpp:448
Definition warp_gemm_attribute_mfma_impl.hpp:512
Definition warp_gemm_attribute_mfma_impl.hpp:67
Definition warp_gemm_attribute_mfma.hpp:869
Definition warp_gemm_attribute_mfma.hpp:575
Definition warp_gemm_attribute_mfma.hpp:130
Definition warp_gemm_attribute_mfma.hpp:479
Definition warp_gemm_attribute_mfma.hpp:395
Class describing structured sparsity mfma instructions.
Definition warp_gemm_attribute_smfmac.hpp:26
Definition warp_gemm_attribute_smfmac_impl.hpp:65
Definition warp_gemm_attribute_smfmac_impl.hpp:14
Definition warp_gemm_impl.hpp:11
Definition warp_gemm_smfmac_impl.hpp:11