#include <moe_flatmm_kernel.hpp>
|
| template<class ScaleM = FlatmmScalePointer<-1>, class ScaleN = FlatmmScalePointer<-1>, class ExpertBias = FlatmmScalePointer<-1>> |
| static CK_TILE_HOST constexpr auto | MakeKernelArgs (const MoeFlatmmHostArgs< ScaleM, ScaleN, ExpertBias > &hostArgs) |
| static CK_TILE_HOST const std::string | GetName () |
| static constexpr auto | BlockSize () -> dim3 |
| static constexpr auto | GridSize (index_t M, index_t N, index_t KBatch) |
| template<class MoeFlatmmKernelArgs> |
| static constexpr auto | GridSize (const MoeFlatmmKernelArgs &kargs) |
| static CK_TILE_HOST_DEVICE constexpr index_t | GetSmemPingSize () |
| static CK_TILE_HOST_DEVICE constexpr index_t | GetSmemPongSize () |
| template<typename KernelArgs> |
| static CK_TILE_HOST bool | IsSupportedArgument (const KernelArgs &kargs) |
| template<memory_operation_enum DstInMemOp = IsInputGemm ? memory_operation_enum::set : memory_operation_enum::atomic_add, typename KernelArgs> |
| static CK_TILE_DEVICE auto | MakeGemmTensorViews (const ADataType *a_ptr, const BDataType *b_flat_ptr, EDataType *e_ptr, const AccDataType *exp_weight_ptr, const int expert_id, const KernelArgs &kargs, const SplitKBatchOffset &splitk_batch_offset) |
| template<typename TensorView> |
| static CK_TILE_DEVICE auto | MakeGemmPadViews (const TensorView &views) |
| template<typename PadView> |
| static CK_TILE_DEVICE auto | MakeGemmTileWindows (const PadView &views, const index_t coord_m, const index_t coord_n) |
◆ AccDataType
template<typename TilePartitioner_, typename FlatmmPipeline_, typename EpiloguePipeline_,
MoeFlatmmKind kind, typename FusedActivation = moe::MoeSilu>
| using ck_tile::MoeFlatmmKernel< TilePartitioner_, FlatmmPipeline_, EpiloguePipeline_, kind, FusedActivation >::AccDataType = float |
◆ ActivationOp
template<typename TilePartitioner_, typename FlatmmPipeline_, typename EpiloguePipeline_,
MoeFlatmmKind kind, typename FusedActivation = moe::MoeSilu>
| using ck_tile::MoeFlatmmKernel< TilePartitioner_, FlatmmPipeline_, EpiloguePipeline_, kind, FusedActivation >::ActivationOp = FusedActivation |
◆ ADataType
template<typename TilePartitioner_, typename FlatmmPipeline_, typename EpiloguePipeline_,
MoeFlatmmKind kind, typename FusedActivation = moe::MoeSilu>
◆ ALayout
template<typename TilePartitioner_, typename FlatmmPipeline_, typename EpiloguePipeline_,
MoeFlatmmKind kind, typename FusedActivation = moe::MoeSilu>
◆ BDataType
template<typename TilePartitioner_, typename FlatmmPipeline_, typename EpiloguePipeline_,
MoeFlatmmKind kind, typename FusedActivation = moe::MoeSilu>
◆ BLayout
template<typename TilePartitioner_, typename FlatmmPipeline_, typename EpiloguePipeline_,
MoeFlatmmKind kind, typename FusedActivation = moe::MoeSilu>
◆ BlockGemmShape
template<typename TilePartitioner_, typename FlatmmPipeline_, typename EpiloguePipeline_,
MoeFlatmmKind kind, typename FusedActivation = moe::MoeSilu>
| using ck_tile::MoeFlatmmKernel< TilePartitioner_, FlatmmPipeline_, EpiloguePipeline_, kind, FusedActivation >::BlockGemmShape |
Initial value:
remove_cv_t< std::remove_reference_t< T > > remove_cvref_t
Definition type_traits.hpp:21
◆ DsDataType
template<typename TilePartitioner_, typename FlatmmPipeline_, typename EpiloguePipeline_,
MoeFlatmmKind kind, typename FusedActivation = moe::MoeSilu>
◆ DsLayout
template<typename TilePartitioner_, typename FlatmmPipeline_, typename EpiloguePipeline_,
MoeFlatmmKind kind, typename FusedActivation = moe::MoeSilu>
◆ EDataType
template<typename TilePartitioner_, typename FlatmmPipeline_, typename EpiloguePipeline_,
MoeFlatmmKind kind, typename FusedActivation = moe::MoeSilu>
◆ ELayout
template<typename TilePartitioner_, typename FlatmmPipeline_, typename EpiloguePipeline_,
MoeFlatmmKind kind, typename FusedActivation = moe::MoeSilu>
◆ EpiloguePipeline
template<typename TilePartitioner_, typename FlatmmPipeline_, typename EpiloguePipeline_,
MoeFlatmmKind kind, typename FusedActivation = moe::MoeSilu>
◆ FlatmmPipeline
template<typename TilePartitioner_, typename FlatmmPipeline_, typename EpiloguePipeline_,
MoeFlatmmKind kind, typename FusedActivation = moe::MoeSilu>
◆ TilePartitioner
template<typename TilePartitioner_, typename FlatmmPipeline_, typename EpiloguePipeline_,
MoeFlatmmKind kind, typename FusedActivation = moe::MoeSilu>
◆ BlockSize()
template<typename TilePartitioner_, typename FlatmmPipeline_, typename EpiloguePipeline_,
MoeFlatmmKind kind, typename FusedActivation = moe::MoeSilu>
| constexpr auto ck_tile::MoeFlatmmKernel< TilePartitioner_, FlatmmPipeline_, EpiloguePipeline_, kind, FusedActivation >::BlockSize |
( |
| ) |
->dim3 |
|
inlinestaticconstexpr |
◆ GetName()
template<typename TilePartitioner_, typename FlatmmPipeline_, typename EpiloguePipeline_,
MoeFlatmmKind kind, typename FusedActivation = moe::MoeSilu>
◆ GetSmemPingSize()
template<typename TilePartitioner_, typename FlatmmPipeline_, typename EpiloguePipeline_,
MoeFlatmmKind kind, typename FusedActivation = moe::MoeSilu>
◆ GetSmemPongSize()
template<typename TilePartitioner_, typename FlatmmPipeline_, typename EpiloguePipeline_,
MoeFlatmmKind kind, typename FusedActivation = moe::MoeSilu>
◆ GridSize() [1/2]
template<typename TilePartitioner_, typename FlatmmPipeline_, typename EpiloguePipeline_,
MoeFlatmmKind kind, typename FusedActivation = moe::MoeSilu>
◆ GridSize() [2/2]
template<typename TilePartitioner_, typename FlatmmPipeline_, typename EpiloguePipeline_,
MoeFlatmmKind kind, typename FusedActivation = moe::MoeSilu>
◆ IsSupportedArgument()
template<typename TilePartitioner_, typename FlatmmPipeline_, typename EpiloguePipeline_,
MoeFlatmmKind kind, typename FusedActivation = moe::MoeSilu>
template<typename KernelArgs>
◆ MakeGemmPadViews()
template<typename TilePartitioner_, typename FlatmmPipeline_, typename EpiloguePipeline_,
MoeFlatmmKind kind, typename FusedActivation = moe::MoeSilu>
template<typename TensorView>
◆ MakeGemmTensorViews()
template<typename TilePartitioner_, typename FlatmmPipeline_, typename EpiloguePipeline_,
MoeFlatmmKind kind, typename FusedActivation = moe::MoeSilu>
template<
memory_operation_enum DstInMemOp = IsInputGemm ? memory_operation_enum::set : memory_operation_enum::atomic_add, typename KernelArgs>
| CK_TILE_DEVICE auto ck_tile::MoeFlatmmKernel< TilePartitioner_, FlatmmPipeline_, EpiloguePipeline_, kind, FusedActivation >::MakeGemmTensorViews |
( |
const ADataType * | a_ptr, |
|
|
const BDataType * | b_flat_ptr, |
|
|
EDataType * | e_ptr, |
|
|
const AccDataType * | exp_weight_ptr, |
|
|
const int | expert_id, |
|
|
const KernelArgs & | kargs, |
|
|
const SplitKBatchOffset & | splitk_batch_offset ) |
|
inlinestatic |
◆ MakeGemmTileWindows()
template<typename TilePartitioner_, typename FlatmmPipeline_, typename EpiloguePipeline_,
MoeFlatmmKind kind, typename FusedActivation = moe::MoeSilu>
template<typename PadView>
◆ MakeKernelArgs()
template<typename TilePartitioner_, typename FlatmmPipeline_, typename EpiloguePipeline_,
MoeFlatmmKind kind, typename FusedActivation = moe::MoeSilu>
template<class ScaleM = FlatmmScalePointer<-1>, class ScaleN = FlatmmScalePointer<-1>, class ExpertBias = FlatmmScalePointer<-1>>
◆ operator()() [1/2]
template<typename TilePartitioner_, typename FlatmmPipeline_, typename EpiloguePipeline_,
MoeFlatmmKind kind, typename FusedActivation = moe::MoeSilu>
◆ operator()() [2/2]
template<typename TilePartitioner_, typename FlatmmPipeline_, typename EpiloguePipeline_,
MoeFlatmmKind kind, typename FusedActivation = moe::MoeSilu>
◆ I0
template<typename TilePartitioner_, typename FlatmmPipeline_, typename EpiloguePipeline_,
MoeFlatmmKind kind, typename FusedActivation = moe::MoeSilu>
◆ I1
template<typename TilePartitioner_, typename FlatmmPipeline_, typename EpiloguePipeline_,
MoeFlatmmKind kind, typename FusedActivation = moe::MoeSilu>
◆ I2
template<typename TilePartitioner_, typename FlatmmPipeline_, typename EpiloguePipeline_,
MoeFlatmmKind kind, typename FusedActivation = moe::MoeSilu>
◆ I3
template<typename TilePartitioner_, typename FlatmmPipeline_, typename EpiloguePipeline_,
MoeFlatmmKind kind, typename FusedActivation = moe::MoeSilu>
◆ isCTransposed
template<typename TilePartitioner_, typename FlatmmPipeline_, typename EpiloguePipeline_,
MoeFlatmmKind kind, typename FusedActivation = moe::MoeSilu>
| index_t ck_tile::MoeFlatmmKernel< TilePartitioner_, FlatmmPipeline_, EpiloguePipeline_, kind, FusedActivation >::isCTransposed = EpiloguePipeline::isCTransposed |
|
staticconstexpr |
◆ IsGateUp
template<typename TilePartitioner_, typename FlatmmPipeline_, typename EpiloguePipeline_,
MoeFlatmmKind kind, typename FusedActivation = moe::MoeSilu>
◆ IsInputGemm
template<typename TilePartitioner_, typename FlatmmPipeline_, typename EpiloguePipeline_,
MoeFlatmmKind kind, typename FusedActivation = moe::MoeSilu>
◆ K_Pack
template<typename TilePartitioner_, typename FlatmmPipeline_, typename EpiloguePipeline_,
MoeFlatmmKind kind, typename FusedActivation = moe::MoeSilu>
◆ kBlockSize
template<typename TilePartitioner_, typename FlatmmPipeline_, typename EpiloguePipeline_,
MoeFlatmmKind kind, typename FusedActivation = moe::MoeSilu>
◆ kMPerBlock
template<typename TilePartitioner_, typename FlatmmPipeline_, typename EpiloguePipeline_,
MoeFlatmmKind kind, typename FusedActivation = moe::MoeSilu>
◆ kMPerIteration
template<typename TilePartitioner_, typename FlatmmPipeline_, typename EpiloguePipeline_,
MoeFlatmmKind kind, typename FusedActivation = moe::MoeSilu>
◆ kNPerBlock
template<typename TilePartitioner_, typename FlatmmPipeline_, typename EpiloguePipeline_,
MoeFlatmmKind kind, typename FusedActivation = moe::MoeSilu>
◆ kNPerIteration
template<typename TilePartitioner_, typename FlatmmPipeline_, typename EpiloguePipeline_,
MoeFlatmmKind kind, typename FusedActivation = moe::MoeSilu>
◆ kNRepeat
template<typename TilePartitioner_, typename FlatmmPipeline_, typename EpiloguePipeline_,
MoeFlatmmKind kind, typename FusedActivation = moe::MoeSilu>
◆ KPerXdl
template<typename TilePartitioner_, typename FlatmmPipeline_, typename EpiloguePipeline_,
MoeFlatmmKind kind, typename FusedActivation = moe::MoeSilu>
◆ MPerXdl
template<typename TilePartitioner_, typename FlatmmPipeline_, typename EpiloguePipeline_,
MoeFlatmmKind kind, typename FusedActivation = moe::MoeSilu>
◆ MWave
template<typename TilePartitioner_, typename FlatmmPipeline_, typename EpiloguePipeline_,
MoeFlatmmKind kind, typename FusedActivation = moe::MoeSilu>
◆ MXFP4_Pipeline
template<typename TilePartitioner_, typename FlatmmPipeline_, typename EpiloguePipeline_,
MoeFlatmmKind kind, typename FusedActivation = moe::MoeSilu>
◆ MXFP4K_Pack
template<typename TilePartitioner_, typename FlatmmPipeline_, typename EpiloguePipeline_,
MoeFlatmmKind kind, typename FusedActivation = moe::MoeSilu>
| int ck_tile::MoeFlatmmKernel< TilePartitioner_, FlatmmPipeline_, EpiloguePipeline_, kind, FusedActivation >::MXFP4K_Pack = 2 |
|
staticconstexpr |
◆ MXFP4N_Pack
template<typename TilePartitioner_, typename FlatmmPipeline_, typename EpiloguePipeline_,
MoeFlatmmKind kind, typename FusedActivation = moe::MoeSilu>
| int ck_tile::MoeFlatmmKernel< TilePartitioner_, FlatmmPipeline_, EpiloguePipeline_, kind, FusedActivation >::MXFP4N_Pack = 2 |
|
staticconstexpr |
◆ N_Pack
template<typename TilePartitioner_, typename FlatmmPipeline_, typename EpiloguePipeline_,
MoeFlatmmKind kind, typename FusedActivation = moe::MoeSilu>
◆ NPerXdl
template<typename TilePartitioner_, typename FlatmmPipeline_, typename EpiloguePipeline_,
MoeFlatmmKind kind, typename FusedActivation = moe::MoeSilu>
◆ NumDTensor
template<typename TilePartitioner_, typename FlatmmPipeline_, typename EpiloguePipeline_,
MoeFlatmmKind kind, typename FusedActivation = moe::MoeSilu>
◆ NWave
template<typename TilePartitioner_, typename FlatmmPipeline_, typename EpiloguePipeline_,
MoeFlatmmKind kind, typename FusedActivation = moe::MoeSilu>
◆ OutputNPerBlock
template<typename TilePartitioner_, typename FlatmmPipeline_, typename EpiloguePipeline_,
MoeFlatmmKind kind, typename FusedActivation = moe::MoeSilu>
| int ck_tile::MoeFlatmmKernel< TilePartitioner_, FlatmmPipeline_, EpiloguePipeline_, kind, FusedActivation >::OutputNPerBlock |
|
staticconstexpr |
Initial value:=
IsGateUp ? TilePartitioner::NPerBlock / 2 : TilePartitioner::NPerBlock
static constexpr bool IsGateUp
Definition moe_flatmm_kernel.hpp:225
◆ UsePersistentKernel
template<typename TilePartitioner_, typename FlatmmPipeline_, typename EpiloguePipeline_,
MoeFlatmmKind kind, typename FusedActivation = moe::MoeSilu>
| bool ck_tile::MoeFlatmmKernel< TilePartitioner_, FlatmmPipeline_, EpiloguePipeline_, kind, FusedActivation >::UsePersistentKernel = FlatmmPipeline::UsePersistentKernel |
|
staticconstexpr |
◆ WeightPackedSize
template<typename TilePartitioner_, typename FlatmmPipeline_, typename EpiloguePipeline_,
MoeFlatmmKind kind, typename FusedActivation = moe::MoeSilu>
The documentation for this struct was generated from the following file: