fused_moegemm_traits.hpp Source File

fused_moegemm_traits.hpp Source File#

Composable Kernel: fused_moegemm_traits.hpp Source File
fused_moegemm_traits.hpp
Go to the documentation of this file.
1// SPDX-License-Identifier: MIT
2// Copyright (c) 2024, Advanced Micro Devices, Inc. All rights reserved.
3
4#pragma once
5
6#include "ck_tile/core.hpp"
7
8namespace ck_tile {
9
11{
12 // permute_b_n0_k0_n1_k1_n2_k2 = 0, // 0,1,4,2,5,3,6
13 // permute_b_n0_n1_k0_k1_n2_k2 = 1, // 0,1,2,4,5,3,6
15 b_nr_kr_kw_nw_kv = 1, // 0,1,3,4,2,5
17};
18
19template <bool IsGateOnly_,
20 bool UseSmoothQuant_,
21 index_t OAtomic_, // 0-no atomic, 1-atomic-pk-f16/bf16, 2-atomic-f32
24 bool PadHiddenSize_ = false,
25 bool PadIntermediateSize_ = false,
26 bool PipeInterleave_ = true>
28{
29 // Gate+Up or Gate only
30 static constexpr bool IsGateOnly = IsGateOnly_;
31 static constexpr bool UseSmoothQuant = UseSmoothQuant_;
32 static constexpr index_t OAtomic = OAtomic_;
33 static constexpr FusedMoeGemmWeightPermuteEnum PermuteEnum = PermuteEnum_;
34 static constexpr bool PadHiddenSize = PadHiddenSize_;
35 static constexpr bool PadIntermediateSize = PadIntermediateSize_;
36 static constexpr bool PipeInterleave = PipeInterleave_;
37};
38
39// Note: this need to be a bit mask
41{
42 SLD_A = 1 << 0, // shared load a
43 SLD_B = 1 << 1,
44 GLD_A = 1 << 2, // global load a
45 GLD_B = 1 << 3,
46 SST_A = 1 << 4, // shared store a
47 SST_B = 1 << 5,
48 GST_O = 1 << 6, // global store out
49};
50} // namespace ck_tile
Definition tile/core/algorithm/cluster_descriptor.hpp:13
FusedMoeGemmPipelineSequencerEnum
Definition fused_moegemm_traits.hpp:41
@ GST_O
Definition fused_moegemm_traits.hpp:48
@ SST_A
Definition fused_moegemm_traits.hpp:46
@ SST_B
Definition fused_moegemm_traits.hpp:47
@ GLD_B
Definition fused_moegemm_traits.hpp:45
@ SLD_A
Definition fused_moegemm_traits.hpp:42
@ SLD_B
Definition fused_moegemm_traits.hpp:43
@ GLD_A
Definition fused_moegemm_traits.hpp:44
int32_t index_t
Definition integer.hpp:9
FusedMoeGemmWeightPermuteEnum
Definition fused_moegemm_traits.hpp:11
@ no_permute
Definition fused_moegemm_traits.hpp:14
@ b_nr_kr_kw_nw_kv
Definition fused_moegemm_traits.hpp:15
@ b_nr_kr_waveflatten
Definition fused_moegemm_traits.hpp:16
Definition fused_moegemm_traits.hpp:28
static constexpr FusedMoeGemmWeightPermuteEnum PermuteEnum
Definition fused_moegemm_traits.hpp:33
static constexpr bool PadHiddenSize
Definition fused_moegemm_traits.hpp:34
static constexpr bool PipeInterleave
Definition fused_moegemm_traits.hpp:36
static constexpr bool PadIntermediateSize
Definition fused_moegemm_traits.hpp:35
static constexpr bool UseSmoothQuant
Definition fused_moegemm_traits.hpp:31
static constexpr index_t OAtomic
Definition fused_moegemm_traits.hpp:32
static constexpr bool IsGateOnly
Definition fused_moegemm_traits.hpp:30