RawArg Struct Reference#
ck::tensor_operation::device::DeviceBatchedGemmGemm_Wmma_CShuffleV3< ALayout, B0layout, B1Layout, CLayout, ADataType, B0DataType, B1DataType, CDataType, AccDataType, CShuffleDataType, AElementwiseOperation, B0ElementwiseOperation, AccElementwiseOperation, B1ElementwiseOperation, CElementwiseOperation, GemmSpec, BlockSize, MPerBlock, LPerBlock, KPerBlock, NPerBlock, LTilePerBlock, AK1, BK1, L1, MPerWmma, LPerWmma, MRepeat, LRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, B0BlockTransferThreadClusterLengths_K0_L_K1, B0BlockTransferThreadClusterArrangeOrder, B0BlockTransferSrcAccessOrder, B0BlockTransferSrcVectorDim, B0BlockTransferSrcScalarPerVector, B0BlockTransferDstScalarPerVector_K1, B0BlockLdsAddExtraL, B1BlockTransferThreadClusterLengths_L0_N_L1, B1BlockTransferThreadClusterArrangeOrder, B1BlockTransferSrcAccessOrder, B1BlockTransferSrcVectorDim, B1BlockTransferSrcScalarPerVector, B1BlockTransferDstScalarPerVector_L1, B1BlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, BlkGemmPipeSched, BlkGemmPipelineVer >::RawArg Struct Reference
#include <device_batched_gemm_gemm_wmma_cshuffle_v3.hpp>
Inheritance diagram for ck::tensor_operation::device::DeviceBatchedGemmGemm_Wmma_CShuffleV3< ALayout, B0layout, B1Layout, CLayout, ADataType, B0DataType, B1DataType, CDataType, AccDataType, CShuffleDataType, AElementwiseOperation, B0ElementwiseOperation, AccElementwiseOperation, B1ElementwiseOperation, CElementwiseOperation, GemmSpec, BlockSize, MPerBlock, LPerBlock, KPerBlock, NPerBlock, LTilePerBlock, AK1, BK1, L1, MPerWmma, LPerWmma, MRepeat, LRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, B0BlockTransferThreadClusterLengths_K0_L_K1, B0BlockTransferThreadClusterArrangeOrder, B0BlockTransferSrcAccessOrder, B0BlockTransferSrcVectorDim, B0BlockTransferSrcScalarPerVector, B0BlockTransferDstScalarPerVector_K1, B0BlockLdsAddExtraL, B1BlockTransferThreadClusterLengths_L0_N_L1, B1BlockTransferThreadClusterArrangeOrder, B1BlockTransferSrcAccessOrder, B1BlockTransferSrcVectorDim, B1BlockTransferSrcScalarPerVector, B1BlockTransferDstScalarPerVector_L1, B1BlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, BlkGemmPipeSched, BlkGemmPipelineVer >::RawArg:
Public Types | |
| using | arr3 = std::array<ck::index_t, 3> |
Public Member Functions | |
| RawArg (const ADataType *p_a_grid_, const B0DataType *p_b0_grid_, const B1DataType *p_b1_grid_, CDataType *p_c_grid_, index_t M_, index_t N_, index_t K_, index_t O_, index_t Batch, index_t StrideA, index_t StrideB0, index_t StrideB1, index_t StrideC, index_t BatchStrideA, index_t BatchStrideB0, index_t BatchStrideB1, index_t BatchStrideC, AElementwiseOperation a_element_op_, B0ElementwiseOperation b0_element_op_, AccElementwiseOperation acc_element_op_, B1ElementwiseOperation b1_element_op_, CElementwiseOperation c_element_op_) | |
| Public Member Functions inherited from ck::tensor_operation::device::BaseArgument | |
| BaseArgument ()=default | |
| BaseArgument (const BaseArgument &)=default | |
| BaseArgument & | operator= (const BaseArgument &)=default |
| virtual | ~BaseArgument () |
Public Attributes | |
| const ADataType * | p_a_grid |
| const B0DataType * | p_b0_grid |
| const B1DataType * | p_b1_grid |
| CDataType * | p_c_grid |
| index_t | M |
| index_t | N |
| index_t | K |
| index_t | O |
| index_t | batch_count |
| arr3 | a_g_m_k_lengths |
| arr3 | a_g_m_k_strides |
| arr3 | b0_g_n_k_lengths |
| arr3 | b0_g_n_k_strides |
| arr3 | b1_g_o_n_lengths |
| arr3 | b1_g_o_n_strides |
| arr3 | c_g_m_o_lengths |
| arr3 | c_g_m_o_strides |
| AElementwiseOperation | a_element_op |
| B0ElementwiseOperation | b0_element_op |
| AccElementwiseOperation | acc_element_op |
| B1ElementwiseOperation | b1_element_op |
| CElementwiseOperation | c_element_op |
| AGridDesc | a_grid_desc |
| B0GridDesc | b0_grid_desc |
| B1GridDesc | b1_grid_desc |
| CGridDesc_M_N | c_grid_desc_m_n |
| GridwiseOp::CGridDescriptor_MBlock_MPerBlock_NBlock_NPerBlock | c_grid_desc_mblock_mperblock_nblock_nperblock |
| GridwiseOp::DefaultBlock2CTileMap | block_2_ctile_map |
| ComputeBasePtrOfStridedBatch | compute_base_ptr_of_batch |
| Public Attributes inherited from ck::tensor_operation::device::BaseArgument | |
| void * | p_workspace_ = nullptr |
Member Typedef Documentation
◆ arr3
template<typename ALayout, typename B0layout, typename B1Layout, typename CLayout, typename ADataType, typename B0DataType, typename B1DataType, typename CDataType, typename AccDataType, typename CShuffleDataType, typename AElementwiseOperation, typename B0ElementwiseOperation, typename AccElementwiseOperation, typename B1ElementwiseOperation, typename CElementwiseOperation, GemmSpecialization GemmSpec, ck::index_t BlockSize, ck::index_t MPerBlock, ck::index_t LPerBlock, ck::index_t KPerBlock, ck::index_t NPerBlock, ck::index_t LTilePerBlock, ck::index_t AK1, ck::index_t BK1, ck::index_t L1, ck::index_t MPerWmma, ck::index_t LPerWmma, ck::index_t MRepeat, ck::index_t LRepeat, ck::index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, ck::index_t ABlockTransferSrcVectorDim, ck::index_t ABlockTransferSrcScalarPerVector, ck::index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename B0BlockTransferThreadClusterLengths_K0_L_K1, typename B0BlockTransferThreadClusterArrangeOrder, typename B0BlockTransferSrcAccessOrder, ck::index_t B0BlockTransferSrcVectorDim, ck::index_t B0BlockTransferSrcScalarPerVector, ck::index_t B0BlockTransferDstScalarPerVector_K1, bool B0BlockLdsAddExtraL, typename B1BlockTransferThreadClusterLengths_L0_N_L1, typename B1BlockTransferThreadClusterArrangeOrder, typename B1BlockTransferSrcAccessOrder, ck::index_t B1BlockTransferSrcVectorDim, ck::index_t B1BlockTransferSrcScalarPerVector, ck::index_t B1BlockTransferDstScalarPerVector_L1, bool B1BlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, BlockGemmPipelineScheduler BlkGemmPipeSched = BlockGemmPipelineScheduler::Intrawave, BlockGemmPipelineVersion BlkGemmPipelineVer = BlockGemmPipelineVersion::v1>
| using ck::tensor_operation::device::DeviceBatchedGemmGemm_Wmma_CShuffleV3< ALayout, B0layout, B1Layout, CLayout, ADataType, B0DataType, B1DataType, CDataType, AccDataType, CShuffleDataType, AElementwiseOperation, B0ElementwiseOperation, AccElementwiseOperation, B1ElementwiseOperation, CElementwiseOperation, GemmSpec, BlockSize, MPerBlock, LPerBlock, KPerBlock, NPerBlock, LTilePerBlock, AK1, BK1, L1, MPerWmma, LPerWmma, MRepeat, LRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, B0BlockTransferThreadClusterLengths_K0_L_K1, B0BlockTransferThreadClusterArrangeOrder, B0BlockTransferSrcAccessOrder, B0BlockTransferSrcVectorDim, B0BlockTransferSrcScalarPerVector, B0BlockTransferDstScalarPerVector_K1, B0BlockLdsAddExtraL, B1BlockTransferThreadClusterLengths_L0_N_L1, B1BlockTransferThreadClusterArrangeOrder, B1BlockTransferSrcAccessOrder, B1BlockTransferSrcVectorDim, B1BlockTransferSrcScalarPerVector, B1BlockTransferDstScalarPerVector_L1, B1BlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, BlkGemmPipeSched, BlkGemmPipelineVer >::RawArg::arr3 = std::array<ck::index_t, 3> |
Constructor & Destructor Documentation
◆ RawArg()
template<typename ALayout, typename B0layout, typename B1Layout, typename CLayout, typename ADataType, typename B0DataType, typename B1DataType, typename CDataType, typename AccDataType, typename CShuffleDataType, typename AElementwiseOperation, typename B0ElementwiseOperation, typename AccElementwiseOperation, typename B1ElementwiseOperation, typename CElementwiseOperation, GemmSpecialization GemmSpec, ck::index_t BlockSize, ck::index_t MPerBlock, ck::index_t LPerBlock, ck::index_t KPerBlock, ck::index_t NPerBlock, ck::index_t LTilePerBlock, ck::index_t AK1, ck::index_t BK1, ck::index_t L1, ck::index_t MPerWmma, ck::index_t LPerWmma, ck::index_t MRepeat, ck::index_t LRepeat, ck::index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, ck::index_t ABlockTransferSrcVectorDim, ck::index_t ABlockTransferSrcScalarPerVector, ck::index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename B0BlockTransferThreadClusterLengths_K0_L_K1, typename B0BlockTransferThreadClusterArrangeOrder, typename B0BlockTransferSrcAccessOrder, ck::index_t B0BlockTransferSrcVectorDim, ck::index_t B0BlockTransferSrcScalarPerVector, ck::index_t B0BlockTransferDstScalarPerVector_K1, bool B0BlockLdsAddExtraL, typename B1BlockTransferThreadClusterLengths_L0_N_L1, typename B1BlockTransferThreadClusterArrangeOrder, typename B1BlockTransferSrcAccessOrder, ck::index_t B1BlockTransferSrcVectorDim, ck::index_t B1BlockTransferSrcScalarPerVector, ck::index_t B1BlockTransferDstScalarPerVector_L1, bool B1BlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, BlockGemmPipelineScheduler BlkGemmPipeSched = BlockGemmPipelineScheduler::Intrawave, BlockGemmPipelineVersion BlkGemmPipelineVer = BlockGemmPipelineVersion::v1>
|
inline |
Member Data Documentation
◆ a_element_op
template<typename ALayout, typename B0layout, typename B1Layout, typename CLayout, typename ADataType, typename B0DataType, typename B1DataType, typename CDataType, typename AccDataType, typename CShuffleDataType, typename AElementwiseOperation, typename B0ElementwiseOperation, typename AccElementwiseOperation, typename B1ElementwiseOperation, typename CElementwiseOperation, GemmSpecialization GemmSpec, ck::index_t BlockSize, ck::index_t MPerBlock, ck::index_t LPerBlock, ck::index_t KPerBlock, ck::index_t NPerBlock, ck::index_t LTilePerBlock, ck::index_t AK1, ck::index_t BK1, ck::index_t L1, ck::index_t MPerWmma, ck::index_t LPerWmma, ck::index_t MRepeat, ck::index_t LRepeat, ck::index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, ck::index_t ABlockTransferSrcVectorDim, ck::index_t ABlockTransferSrcScalarPerVector, ck::index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename B0BlockTransferThreadClusterLengths_K0_L_K1, typename B0BlockTransferThreadClusterArrangeOrder, typename B0BlockTransferSrcAccessOrder, ck::index_t B0BlockTransferSrcVectorDim, ck::index_t B0BlockTransferSrcScalarPerVector, ck::index_t B0BlockTransferDstScalarPerVector_K1, bool B0BlockLdsAddExtraL, typename B1BlockTransferThreadClusterLengths_L0_N_L1, typename B1BlockTransferThreadClusterArrangeOrder, typename B1BlockTransferSrcAccessOrder, ck::index_t B1BlockTransferSrcVectorDim, ck::index_t B1BlockTransferSrcScalarPerVector, ck::index_t B1BlockTransferDstScalarPerVector_L1, bool B1BlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, BlockGemmPipelineScheduler BlkGemmPipeSched = BlockGemmPipelineScheduler::Intrawave, BlockGemmPipelineVersion BlkGemmPipelineVer = BlockGemmPipelineVersion::v1>
| AElementwiseOperation ck::tensor_operation::device::DeviceBatchedGemmGemm_Wmma_CShuffleV3< ALayout, B0layout, B1Layout, CLayout, ADataType, B0DataType, B1DataType, CDataType, AccDataType, CShuffleDataType, AElementwiseOperation, B0ElementwiseOperation, AccElementwiseOperation, B1ElementwiseOperation, CElementwiseOperation, GemmSpec, BlockSize, MPerBlock, LPerBlock, KPerBlock, NPerBlock, LTilePerBlock, AK1, BK1, L1, MPerWmma, LPerWmma, MRepeat, LRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, B0BlockTransferThreadClusterLengths_K0_L_K1, B0BlockTransferThreadClusterArrangeOrder, B0BlockTransferSrcAccessOrder, B0BlockTransferSrcVectorDim, B0BlockTransferSrcScalarPerVector, B0BlockTransferDstScalarPerVector_K1, B0BlockLdsAddExtraL, B1BlockTransferThreadClusterLengths_L0_N_L1, B1BlockTransferThreadClusterArrangeOrder, B1BlockTransferSrcAccessOrder, B1BlockTransferSrcVectorDim, B1BlockTransferSrcScalarPerVector, B1BlockTransferDstScalarPerVector_L1, B1BlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, BlkGemmPipeSched, BlkGemmPipelineVer >::RawArg::a_element_op |
◆ a_g_m_k_lengths
template<typename ALayout, typename B0layout, typename B1Layout, typename CLayout, typename ADataType, typename B0DataType, typename B1DataType, typename CDataType, typename AccDataType, typename CShuffleDataType, typename AElementwiseOperation, typename B0ElementwiseOperation, typename AccElementwiseOperation, typename B1ElementwiseOperation, typename CElementwiseOperation, GemmSpecialization GemmSpec, ck::index_t BlockSize, ck::index_t MPerBlock, ck::index_t LPerBlock, ck::index_t KPerBlock, ck::index_t NPerBlock, ck::index_t LTilePerBlock, ck::index_t AK1, ck::index_t BK1, ck::index_t L1, ck::index_t MPerWmma, ck::index_t LPerWmma, ck::index_t MRepeat, ck::index_t LRepeat, ck::index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, ck::index_t ABlockTransferSrcVectorDim, ck::index_t ABlockTransferSrcScalarPerVector, ck::index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename B0BlockTransferThreadClusterLengths_K0_L_K1, typename B0BlockTransferThreadClusterArrangeOrder, typename B0BlockTransferSrcAccessOrder, ck::index_t B0BlockTransferSrcVectorDim, ck::index_t B0BlockTransferSrcScalarPerVector, ck::index_t B0BlockTransferDstScalarPerVector_K1, bool B0BlockLdsAddExtraL, typename B1BlockTransferThreadClusterLengths_L0_N_L1, typename B1BlockTransferThreadClusterArrangeOrder, typename B1BlockTransferSrcAccessOrder, ck::index_t B1BlockTransferSrcVectorDim, ck::index_t B1BlockTransferSrcScalarPerVector, ck::index_t B1BlockTransferDstScalarPerVector_L1, bool B1BlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, BlockGemmPipelineScheduler BlkGemmPipeSched = BlockGemmPipelineScheduler::Intrawave, BlockGemmPipelineVersion BlkGemmPipelineVer = BlockGemmPipelineVersion::v1>
| arr3 ck::tensor_operation::device::DeviceBatchedGemmGemm_Wmma_CShuffleV3< ALayout, B0layout, B1Layout, CLayout, ADataType, B0DataType, B1DataType, CDataType, AccDataType, CShuffleDataType, AElementwiseOperation, B0ElementwiseOperation, AccElementwiseOperation, B1ElementwiseOperation, CElementwiseOperation, GemmSpec, BlockSize, MPerBlock, LPerBlock, KPerBlock, NPerBlock, LTilePerBlock, AK1, BK1, L1, MPerWmma, LPerWmma, MRepeat, LRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, B0BlockTransferThreadClusterLengths_K0_L_K1, B0BlockTransferThreadClusterArrangeOrder, B0BlockTransferSrcAccessOrder, B0BlockTransferSrcVectorDim, B0BlockTransferSrcScalarPerVector, B0BlockTransferDstScalarPerVector_K1, B0BlockLdsAddExtraL, B1BlockTransferThreadClusterLengths_L0_N_L1, B1BlockTransferThreadClusterArrangeOrder, B1BlockTransferSrcAccessOrder, B1BlockTransferSrcVectorDim, B1BlockTransferSrcScalarPerVector, B1BlockTransferDstScalarPerVector_L1, B1BlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, BlkGemmPipeSched, BlkGemmPipelineVer >::RawArg::a_g_m_k_lengths |
◆ a_g_m_k_strides
template<typename ALayout, typename B0layout, typename B1Layout, typename CLayout, typename ADataType, typename B0DataType, typename B1DataType, typename CDataType, typename AccDataType, typename CShuffleDataType, typename AElementwiseOperation, typename B0ElementwiseOperation, typename AccElementwiseOperation, typename B1ElementwiseOperation, typename CElementwiseOperation, GemmSpecialization GemmSpec, ck::index_t BlockSize, ck::index_t MPerBlock, ck::index_t LPerBlock, ck::index_t KPerBlock, ck::index_t NPerBlock, ck::index_t LTilePerBlock, ck::index_t AK1, ck::index_t BK1, ck::index_t L1, ck::index_t MPerWmma, ck::index_t LPerWmma, ck::index_t MRepeat, ck::index_t LRepeat, ck::index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, ck::index_t ABlockTransferSrcVectorDim, ck::index_t ABlockTransferSrcScalarPerVector, ck::index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename B0BlockTransferThreadClusterLengths_K0_L_K1, typename B0BlockTransferThreadClusterArrangeOrder, typename B0BlockTransferSrcAccessOrder, ck::index_t B0BlockTransferSrcVectorDim, ck::index_t B0BlockTransferSrcScalarPerVector, ck::index_t B0BlockTransferDstScalarPerVector_K1, bool B0BlockLdsAddExtraL, typename B1BlockTransferThreadClusterLengths_L0_N_L1, typename B1BlockTransferThreadClusterArrangeOrder, typename B1BlockTransferSrcAccessOrder, ck::index_t B1BlockTransferSrcVectorDim, ck::index_t B1BlockTransferSrcScalarPerVector, ck::index_t B1BlockTransferDstScalarPerVector_L1, bool B1BlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, BlockGemmPipelineScheduler BlkGemmPipeSched = BlockGemmPipelineScheduler::Intrawave, BlockGemmPipelineVersion BlkGemmPipelineVer = BlockGemmPipelineVersion::v1>
| arr3 ck::tensor_operation::device::DeviceBatchedGemmGemm_Wmma_CShuffleV3< ALayout, B0layout, B1Layout, CLayout, ADataType, B0DataType, B1DataType, CDataType, AccDataType, CShuffleDataType, AElementwiseOperation, B0ElementwiseOperation, AccElementwiseOperation, B1ElementwiseOperation, CElementwiseOperation, GemmSpec, BlockSize, MPerBlock, LPerBlock, KPerBlock, NPerBlock, LTilePerBlock, AK1, BK1, L1, MPerWmma, LPerWmma, MRepeat, LRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, B0BlockTransferThreadClusterLengths_K0_L_K1, B0BlockTransferThreadClusterArrangeOrder, B0BlockTransferSrcAccessOrder, B0BlockTransferSrcVectorDim, B0BlockTransferSrcScalarPerVector, B0BlockTransferDstScalarPerVector_K1, B0BlockLdsAddExtraL, B1BlockTransferThreadClusterLengths_L0_N_L1, B1BlockTransferThreadClusterArrangeOrder, B1BlockTransferSrcAccessOrder, B1BlockTransferSrcVectorDim, B1BlockTransferSrcScalarPerVector, B1BlockTransferDstScalarPerVector_L1, B1BlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, BlkGemmPipeSched, BlkGemmPipelineVer >::RawArg::a_g_m_k_strides |
◆ a_grid_desc
template<typename ALayout, typename B0layout, typename B1Layout, typename CLayout, typename ADataType, typename B0DataType, typename B1DataType, typename CDataType, typename AccDataType, typename CShuffleDataType, typename AElementwiseOperation, typename B0ElementwiseOperation, typename AccElementwiseOperation, typename B1ElementwiseOperation, typename CElementwiseOperation, GemmSpecialization GemmSpec, ck::index_t BlockSize, ck::index_t MPerBlock, ck::index_t LPerBlock, ck::index_t KPerBlock, ck::index_t NPerBlock, ck::index_t LTilePerBlock, ck::index_t AK1, ck::index_t BK1, ck::index_t L1, ck::index_t MPerWmma, ck::index_t LPerWmma, ck::index_t MRepeat, ck::index_t LRepeat, ck::index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, ck::index_t ABlockTransferSrcVectorDim, ck::index_t ABlockTransferSrcScalarPerVector, ck::index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename B0BlockTransferThreadClusterLengths_K0_L_K1, typename B0BlockTransferThreadClusterArrangeOrder, typename B0BlockTransferSrcAccessOrder, ck::index_t B0BlockTransferSrcVectorDim, ck::index_t B0BlockTransferSrcScalarPerVector, ck::index_t B0BlockTransferDstScalarPerVector_K1, bool B0BlockLdsAddExtraL, typename B1BlockTransferThreadClusterLengths_L0_N_L1, typename B1BlockTransferThreadClusterArrangeOrder, typename B1BlockTransferSrcAccessOrder, ck::index_t B1BlockTransferSrcVectorDim, ck::index_t B1BlockTransferSrcScalarPerVector, ck::index_t B1BlockTransferDstScalarPerVector_L1, bool B1BlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, BlockGemmPipelineScheduler BlkGemmPipeSched = BlockGemmPipelineScheduler::Intrawave, BlockGemmPipelineVersion BlkGemmPipelineVer = BlockGemmPipelineVersion::v1>
| AGridDesc ck::tensor_operation::device::DeviceBatchedGemmGemm_Wmma_CShuffleV3< ALayout, B0layout, B1Layout, CLayout, ADataType, B0DataType, B1DataType, CDataType, AccDataType, CShuffleDataType, AElementwiseOperation, B0ElementwiseOperation, AccElementwiseOperation, B1ElementwiseOperation, CElementwiseOperation, GemmSpec, BlockSize, MPerBlock, LPerBlock, KPerBlock, NPerBlock, LTilePerBlock, AK1, BK1, L1, MPerWmma, LPerWmma, MRepeat, LRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, B0BlockTransferThreadClusterLengths_K0_L_K1, B0BlockTransferThreadClusterArrangeOrder, B0BlockTransferSrcAccessOrder, B0BlockTransferSrcVectorDim, B0BlockTransferSrcScalarPerVector, B0BlockTransferDstScalarPerVector_K1, B0BlockLdsAddExtraL, B1BlockTransferThreadClusterLengths_L0_N_L1, B1BlockTransferThreadClusterArrangeOrder, B1BlockTransferSrcAccessOrder, B1BlockTransferSrcVectorDim, B1BlockTransferSrcScalarPerVector, B1BlockTransferDstScalarPerVector_L1, B1BlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, BlkGemmPipeSched, BlkGemmPipelineVer >::RawArg::a_grid_desc |
◆ acc_element_op
template<typename ALayout, typename B0layout, typename B1Layout, typename CLayout, typename ADataType, typename B0DataType, typename B1DataType, typename CDataType, typename AccDataType, typename CShuffleDataType, typename AElementwiseOperation, typename B0ElementwiseOperation, typename AccElementwiseOperation, typename B1ElementwiseOperation, typename CElementwiseOperation, GemmSpecialization GemmSpec, ck::index_t BlockSize, ck::index_t MPerBlock, ck::index_t LPerBlock, ck::index_t KPerBlock, ck::index_t NPerBlock, ck::index_t LTilePerBlock, ck::index_t AK1, ck::index_t BK1, ck::index_t L1, ck::index_t MPerWmma, ck::index_t LPerWmma, ck::index_t MRepeat, ck::index_t LRepeat, ck::index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, ck::index_t ABlockTransferSrcVectorDim, ck::index_t ABlockTransferSrcScalarPerVector, ck::index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename B0BlockTransferThreadClusterLengths_K0_L_K1, typename B0BlockTransferThreadClusterArrangeOrder, typename B0BlockTransferSrcAccessOrder, ck::index_t B0BlockTransferSrcVectorDim, ck::index_t B0BlockTransferSrcScalarPerVector, ck::index_t B0BlockTransferDstScalarPerVector_K1, bool B0BlockLdsAddExtraL, typename B1BlockTransferThreadClusterLengths_L0_N_L1, typename B1BlockTransferThreadClusterArrangeOrder, typename B1BlockTransferSrcAccessOrder, ck::index_t B1BlockTransferSrcVectorDim, ck::index_t B1BlockTransferSrcScalarPerVector, ck::index_t B1BlockTransferDstScalarPerVector_L1, bool B1BlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, BlockGemmPipelineScheduler BlkGemmPipeSched = BlockGemmPipelineScheduler::Intrawave, BlockGemmPipelineVersion BlkGemmPipelineVer = BlockGemmPipelineVersion::v1>
| AccElementwiseOperation ck::tensor_operation::device::DeviceBatchedGemmGemm_Wmma_CShuffleV3< ALayout, B0layout, B1Layout, CLayout, ADataType, B0DataType, B1DataType, CDataType, AccDataType, CShuffleDataType, AElementwiseOperation, B0ElementwiseOperation, AccElementwiseOperation, B1ElementwiseOperation, CElementwiseOperation, GemmSpec, BlockSize, MPerBlock, LPerBlock, KPerBlock, NPerBlock, LTilePerBlock, AK1, BK1, L1, MPerWmma, LPerWmma, MRepeat, LRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, B0BlockTransferThreadClusterLengths_K0_L_K1, B0BlockTransferThreadClusterArrangeOrder, B0BlockTransferSrcAccessOrder, B0BlockTransferSrcVectorDim, B0BlockTransferSrcScalarPerVector, B0BlockTransferDstScalarPerVector_K1, B0BlockLdsAddExtraL, B1BlockTransferThreadClusterLengths_L0_N_L1, B1BlockTransferThreadClusterArrangeOrder, B1BlockTransferSrcAccessOrder, B1BlockTransferSrcVectorDim, B1BlockTransferSrcScalarPerVector, B1BlockTransferDstScalarPerVector_L1, B1BlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, BlkGemmPipeSched, BlkGemmPipelineVer >::RawArg::acc_element_op |
◆ b0_element_op
template<typename ALayout, typename B0layout, typename B1Layout, typename CLayout, typename ADataType, typename B0DataType, typename B1DataType, typename CDataType, typename AccDataType, typename CShuffleDataType, typename AElementwiseOperation, typename B0ElementwiseOperation, typename AccElementwiseOperation, typename B1ElementwiseOperation, typename CElementwiseOperation, GemmSpecialization GemmSpec, ck::index_t BlockSize, ck::index_t MPerBlock, ck::index_t LPerBlock, ck::index_t KPerBlock, ck::index_t NPerBlock, ck::index_t LTilePerBlock, ck::index_t AK1, ck::index_t BK1, ck::index_t L1, ck::index_t MPerWmma, ck::index_t LPerWmma, ck::index_t MRepeat, ck::index_t LRepeat, ck::index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, ck::index_t ABlockTransferSrcVectorDim, ck::index_t ABlockTransferSrcScalarPerVector, ck::index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename B0BlockTransferThreadClusterLengths_K0_L_K1, typename B0BlockTransferThreadClusterArrangeOrder, typename B0BlockTransferSrcAccessOrder, ck::index_t B0BlockTransferSrcVectorDim, ck::index_t B0BlockTransferSrcScalarPerVector, ck::index_t B0BlockTransferDstScalarPerVector_K1, bool B0BlockLdsAddExtraL, typename B1BlockTransferThreadClusterLengths_L0_N_L1, typename B1BlockTransferThreadClusterArrangeOrder, typename B1BlockTransferSrcAccessOrder, ck::index_t B1BlockTransferSrcVectorDim, ck::index_t B1BlockTransferSrcScalarPerVector, ck::index_t B1BlockTransferDstScalarPerVector_L1, bool B1BlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, BlockGemmPipelineScheduler BlkGemmPipeSched = BlockGemmPipelineScheduler::Intrawave, BlockGemmPipelineVersion BlkGemmPipelineVer = BlockGemmPipelineVersion::v1>
| B0ElementwiseOperation ck::tensor_operation::device::DeviceBatchedGemmGemm_Wmma_CShuffleV3< ALayout, B0layout, B1Layout, CLayout, ADataType, B0DataType, B1DataType, CDataType, AccDataType, CShuffleDataType, AElementwiseOperation, B0ElementwiseOperation, AccElementwiseOperation, B1ElementwiseOperation, CElementwiseOperation, GemmSpec, BlockSize, MPerBlock, LPerBlock, KPerBlock, NPerBlock, LTilePerBlock, AK1, BK1, L1, MPerWmma, LPerWmma, MRepeat, LRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, B0BlockTransferThreadClusterLengths_K0_L_K1, B0BlockTransferThreadClusterArrangeOrder, B0BlockTransferSrcAccessOrder, B0BlockTransferSrcVectorDim, B0BlockTransferSrcScalarPerVector, B0BlockTransferDstScalarPerVector_K1, B0BlockLdsAddExtraL, B1BlockTransferThreadClusterLengths_L0_N_L1, B1BlockTransferThreadClusterArrangeOrder, B1BlockTransferSrcAccessOrder, B1BlockTransferSrcVectorDim, B1BlockTransferSrcScalarPerVector, B1BlockTransferDstScalarPerVector_L1, B1BlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, BlkGemmPipeSched, BlkGemmPipelineVer >::RawArg::b0_element_op |
◆ b0_g_n_k_lengths
template<typename ALayout, typename B0layout, typename B1Layout, typename CLayout, typename ADataType, typename B0DataType, typename B1DataType, typename CDataType, typename AccDataType, typename CShuffleDataType, typename AElementwiseOperation, typename B0ElementwiseOperation, typename AccElementwiseOperation, typename B1ElementwiseOperation, typename CElementwiseOperation, GemmSpecialization GemmSpec, ck::index_t BlockSize, ck::index_t MPerBlock, ck::index_t LPerBlock, ck::index_t KPerBlock, ck::index_t NPerBlock, ck::index_t LTilePerBlock, ck::index_t AK1, ck::index_t BK1, ck::index_t L1, ck::index_t MPerWmma, ck::index_t LPerWmma, ck::index_t MRepeat, ck::index_t LRepeat, ck::index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, ck::index_t ABlockTransferSrcVectorDim, ck::index_t ABlockTransferSrcScalarPerVector, ck::index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename B0BlockTransferThreadClusterLengths_K0_L_K1, typename B0BlockTransferThreadClusterArrangeOrder, typename B0BlockTransferSrcAccessOrder, ck::index_t B0BlockTransferSrcVectorDim, ck::index_t B0BlockTransferSrcScalarPerVector, ck::index_t B0BlockTransferDstScalarPerVector_K1, bool B0BlockLdsAddExtraL, typename B1BlockTransferThreadClusterLengths_L0_N_L1, typename B1BlockTransferThreadClusterArrangeOrder, typename B1BlockTransferSrcAccessOrder, ck::index_t B1BlockTransferSrcVectorDim, ck::index_t B1BlockTransferSrcScalarPerVector, ck::index_t B1BlockTransferDstScalarPerVector_L1, bool B1BlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, BlockGemmPipelineScheduler BlkGemmPipeSched = BlockGemmPipelineScheduler::Intrawave, BlockGemmPipelineVersion BlkGemmPipelineVer = BlockGemmPipelineVersion::v1>
| arr3 ck::tensor_operation::device::DeviceBatchedGemmGemm_Wmma_CShuffleV3< ALayout, B0layout, B1Layout, CLayout, ADataType, B0DataType, B1DataType, CDataType, AccDataType, CShuffleDataType, AElementwiseOperation, B0ElementwiseOperation, AccElementwiseOperation, B1ElementwiseOperation, CElementwiseOperation, GemmSpec, BlockSize, MPerBlock, LPerBlock, KPerBlock, NPerBlock, LTilePerBlock, AK1, BK1, L1, MPerWmma, LPerWmma, MRepeat, LRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, B0BlockTransferThreadClusterLengths_K0_L_K1, B0BlockTransferThreadClusterArrangeOrder, B0BlockTransferSrcAccessOrder, B0BlockTransferSrcVectorDim, B0BlockTransferSrcScalarPerVector, B0BlockTransferDstScalarPerVector_K1, B0BlockLdsAddExtraL, B1BlockTransferThreadClusterLengths_L0_N_L1, B1BlockTransferThreadClusterArrangeOrder, B1BlockTransferSrcAccessOrder, B1BlockTransferSrcVectorDim, B1BlockTransferSrcScalarPerVector, B1BlockTransferDstScalarPerVector_L1, B1BlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, BlkGemmPipeSched, BlkGemmPipelineVer >::RawArg::b0_g_n_k_lengths |
◆ b0_g_n_k_strides
template<typename ALayout, typename B0layout, typename B1Layout, typename CLayout, typename ADataType, typename B0DataType, typename B1DataType, typename CDataType, typename AccDataType, typename CShuffleDataType, typename AElementwiseOperation, typename B0ElementwiseOperation, typename AccElementwiseOperation, typename B1ElementwiseOperation, typename CElementwiseOperation, GemmSpecialization GemmSpec, ck::index_t BlockSize, ck::index_t MPerBlock, ck::index_t LPerBlock, ck::index_t KPerBlock, ck::index_t NPerBlock, ck::index_t LTilePerBlock, ck::index_t AK1, ck::index_t BK1, ck::index_t L1, ck::index_t MPerWmma, ck::index_t LPerWmma, ck::index_t MRepeat, ck::index_t LRepeat, ck::index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, ck::index_t ABlockTransferSrcVectorDim, ck::index_t ABlockTransferSrcScalarPerVector, ck::index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename B0BlockTransferThreadClusterLengths_K0_L_K1, typename B0BlockTransferThreadClusterArrangeOrder, typename B0BlockTransferSrcAccessOrder, ck::index_t B0BlockTransferSrcVectorDim, ck::index_t B0BlockTransferSrcScalarPerVector, ck::index_t B0BlockTransferDstScalarPerVector_K1, bool B0BlockLdsAddExtraL, typename B1BlockTransferThreadClusterLengths_L0_N_L1, typename B1BlockTransferThreadClusterArrangeOrder, typename B1BlockTransferSrcAccessOrder, ck::index_t B1BlockTransferSrcVectorDim, ck::index_t B1BlockTransferSrcScalarPerVector, ck::index_t B1BlockTransferDstScalarPerVector_L1, bool B1BlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, BlockGemmPipelineScheduler BlkGemmPipeSched = BlockGemmPipelineScheduler::Intrawave, BlockGemmPipelineVersion BlkGemmPipelineVer = BlockGemmPipelineVersion::v1>
| arr3 ck::tensor_operation::device::DeviceBatchedGemmGemm_Wmma_CShuffleV3< ALayout, B0layout, B1Layout, CLayout, ADataType, B0DataType, B1DataType, CDataType, AccDataType, CShuffleDataType, AElementwiseOperation, B0ElementwiseOperation, AccElementwiseOperation, B1ElementwiseOperation, CElementwiseOperation, GemmSpec, BlockSize, MPerBlock, LPerBlock, KPerBlock, NPerBlock, LTilePerBlock, AK1, BK1, L1, MPerWmma, LPerWmma, MRepeat, LRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, B0BlockTransferThreadClusterLengths_K0_L_K1, B0BlockTransferThreadClusterArrangeOrder, B0BlockTransferSrcAccessOrder, B0BlockTransferSrcVectorDim, B0BlockTransferSrcScalarPerVector, B0BlockTransferDstScalarPerVector_K1, B0BlockLdsAddExtraL, B1BlockTransferThreadClusterLengths_L0_N_L1, B1BlockTransferThreadClusterArrangeOrder, B1BlockTransferSrcAccessOrder, B1BlockTransferSrcVectorDim, B1BlockTransferSrcScalarPerVector, B1BlockTransferDstScalarPerVector_L1, B1BlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, BlkGemmPipeSched, BlkGemmPipelineVer >::RawArg::b0_g_n_k_strides |
◆ b0_grid_desc
template<typename ALayout, typename B0layout, typename B1Layout, typename CLayout, typename ADataType, typename B0DataType, typename B1DataType, typename CDataType, typename AccDataType, typename CShuffleDataType, typename AElementwiseOperation, typename B0ElementwiseOperation, typename AccElementwiseOperation, typename B1ElementwiseOperation, typename CElementwiseOperation, GemmSpecialization GemmSpec, ck::index_t BlockSize, ck::index_t MPerBlock, ck::index_t LPerBlock, ck::index_t KPerBlock, ck::index_t NPerBlock, ck::index_t LTilePerBlock, ck::index_t AK1, ck::index_t BK1, ck::index_t L1, ck::index_t MPerWmma, ck::index_t LPerWmma, ck::index_t MRepeat, ck::index_t LRepeat, ck::index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, ck::index_t ABlockTransferSrcVectorDim, ck::index_t ABlockTransferSrcScalarPerVector, ck::index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename B0BlockTransferThreadClusterLengths_K0_L_K1, typename B0BlockTransferThreadClusterArrangeOrder, typename B0BlockTransferSrcAccessOrder, ck::index_t B0BlockTransferSrcVectorDim, ck::index_t B0BlockTransferSrcScalarPerVector, ck::index_t B0BlockTransferDstScalarPerVector_K1, bool B0BlockLdsAddExtraL, typename B1BlockTransferThreadClusterLengths_L0_N_L1, typename B1BlockTransferThreadClusterArrangeOrder, typename B1BlockTransferSrcAccessOrder, ck::index_t B1BlockTransferSrcVectorDim, ck::index_t B1BlockTransferSrcScalarPerVector, ck::index_t B1BlockTransferDstScalarPerVector_L1, bool B1BlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, BlockGemmPipelineScheduler BlkGemmPipeSched = BlockGemmPipelineScheduler::Intrawave, BlockGemmPipelineVersion BlkGemmPipelineVer = BlockGemmPipelineVersion::v1>
| B0GridDesc ck::tensor_operation::device::DeviceBatchedGemmGemm_Wmma_CShuffleV3< ALayout, B0layout, B1Layout, CLayout, ADataType, B0DataType, B1DataType, CDataType, AccDataType, CShuffleDataType, AElementwiseOperation, B0ElementwiseOperation, AccElementwiseOperation, B1ElementwiseOperation, CElementwiseOperation, GemmSpec, BlockSize, MPerBlock, LPerBlock, KPerBlock, NPerBlock, LTilePerBlock, AK1, BK1, L1, MPerWmma, LPerWmma, MRepeat, LRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, B0BlockTransferThreadClusterLengths_K0_L_K1, B0BlockTransferThreadClusterArrangeOrder, B0BlockTransferSrcAccessOrder, B0BlockTransferSrcVectorDim, B0BlockTransferSrcScalarPerVector, B0BlockTransferDstScalarPerVector_K1, B0BlockLdsAddExtraL, B1BlockTransferThreadClusterLengths_L0_N_L1, B1BlockTransferThreadClusterArrangeOrder, B1BlockTransferSrcAccessOrder, B1BlockTransferSrcVectorDim, B1BlockTransferSrcScalarPerVector, B1BlockTransferDstScalarPerVector_L1, B1BlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, BlkGemmPipeSched, BlkGemmPipelineVer >::RawArg::b0_grid_desc |
◆ b1_element_op
template<typename ALayout, typename B0layout, typename B1Layout, typename CLayout, typename ADataType, typename B0DataType, typename B1DataType, typename CDataType, typename AccDataType, typename CShuffleDataType, typename AElementwiseOperation, typename B0ElementwiseOperation, typename AccElementwiseOperation, typename B1ElementwiseOperation, typename CElementwiseOperation, GemmSpecialization GemmSpec, ck::index_t BlockSize, ck::index_t MPerBlock, ck::index_t LPerBlock, ck::index_t KPerBlock, ck::index_t NPerBlock, ck::index_t LTilePerBlock, ck::index_t AK1, ck::index_t BK1, ck::index_t L1, ck::index_t MPerWmma, ck::index_t LPerWmma, ck::index_t MRepeat, ck::index_t LRepeat, ck::index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, ck::index_t ABlockTransferSrcVectorDim, ck::index_t ABlockTransferSrcScalarPerVector, ck::index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename B0BlockTransferThreadClusterLengths_K0_L_K1, typename B0BlockTransferThreadClusterArrangeOrder, typename B0BlockTransferSrcAccessOrder, ck::index_t B0BlockTransferSrcVectorDim, ck::index_t B0BlockTransferSrcScalarPerVector, ck::index_t B0BlockTransferDstScalarPerVector_K1, bool B0BlockLdsAddExtraL, typename B1BlockTransferThreadClusterLengths_L0_N_L1, typename B1BlockTransferThreadClusterArrangeOrder, typename B1BlockTransferSrcAccessOrder, ck::index_t B1BlockTransferSrcVectorDim, ck::index_t B1BlockTransferSrcScalarPerVector, ck::index_t B1BlockTransferDstScalarPerVector_L1, bool B1BlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, BlockGemmPipelineScheduler BlkGemmPipeSched = BlockGemmPipelineScheduler::Intrawave, BlockGemmPipelineVersion BlkGemmPipelineVer = BlockGemmPipelineVersion::v1>
| B1ElementwiseOperation ck::tensor_operation::device::DeviceBatchedGemmGemm_Wmma_CShuffleV3< ALayout, B0layout, B1Layout, CLayout, ADataType, B0DataType, B1DataType, CDataType, AccDataType, CShuffleDataType, AElementwiseOperation, B0ElementwiseOperation, AccElementwiseOperation, B1ElementwiseOperation, CElementwiseOperation, GemmSpec, BlockSize, MPerBlock, LPerBlock, KPerBlock, NPerBlock, LTilePerBlock, AK1, BK1, L1, MPerWmma, LPerWmma, MRepeat, LRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, B0BlockTransferThreadClusterLengths_K0_L_K1, B0BlockTransferThreadClusterArrangeOrder, B0BlockTransferSrcAccessOrder, B0BlockTransferSrcVectorDim, B0BlockTransferSrcScalarPerVector, B0BlockTransferDstScalarPerVector_K1, B0BlockLdsAddExtraL, B1BlockTransferThreadClusterLengths_L0_N_L1, B1BlockTransferThreadClusterArrangeOrder, B1BlockTransferSrcAccessOrder, B1BlockTransferSrcVectorDim, B1BlockTransferSrcScalarPerVector, B1BlockTransferDstScalarPerVector_L1, B1BlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, BlkGemmPipeSched, BlkGemmPipelineVer >::RawArg::b1_element_op |
◆ b1_g_o_n_lengths
template<typename ALayout, typename B0layout, typename B1Layout, typename CLayout, typename ADataType, typename B0DataType, typename B1DataType, typename CDataType, typename AccDataType, typename CShuffleDataType, typename AElementwiseOperation, typename B0ElementwiseOperation, typename AccElementwiseOperation, typename B1ElementwiseOperation, typename CElementwiseOperation, GemmSpecialization GemmSpec, ck::index_t BlockSize, ck::index_t MPerBlock, ck::index_t LPerBlock, ck::index_t KPerBlock, ck::index_t NPerBlock, ck::index_t LTilePerBlock, ck::index_t AK1, ck::index_t BK1, ck::index_t L1, ck::index_t MPerWmma, ck::index_t LPerWmma, ck::index_t MRepeat, ck::index_t LRepeat, ck::index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, ck::index_t ABlockTransferSrcVectorDim, ck::index_t ABlockTransferSrcScalarPerVector, ck::index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename B0BlockTransferThreadClusterLengths_K0_L_K1, typename B0BlockTransferThreadClusterArrangeOrder, typename B0BlockTransferSrcAccessOrder, ck::index_t B0BlockTransferSrcVectorDim, ck::index_t B0BlockTransferSrcScalarPerVector, ck::index_t B0BlockTransferDstScalarPerVector_K1, bool B0BlockLdsAddExtraL, typename B1BlockTransferThreadClusterLengths_L0_N_L1, typename B1BlockTransferThreadClusterArrangeOrder, typename B1BlockTransferSrcAccessOrder, ck::index_t B1BlockTransferSrcVectorDim, ck::index_t B1BlockTransferSrcScalarPerVector, ck::index_t B1BlockTransferDstScalarPerVector_L1, bool B1BlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, BlockGemmPipelineScheduler BlkGemmPipeSched = BlockGemmPipelineScheduler::Intrawave, BlockGemmPipelineVersion BlkGemmPipelineVer = BlockGemmPipelineVersion::v1>
| arr3 ck::tensor_operation::device::DeviceBatchedGemmGemm_Wmma_CShuffleV3< ALayout, B0layout, B1Layout, CLayout, ADataType, B0DataType, B1DataType, CDataType, AccDataType, CShuffleDataType, AElementwiseOperation, B0ElementwiseOperation, AccElementwiseOperation, B1ElementwiseOperation, CElementwiseOperation, GemmSpec, BlockSize, MPerBlock, LPerBlock, KPerBlock, NPerBlock, LTilePerBlock, AK1, BK1, L1, MPerWmma, LPerWmma, MRepeat, LRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, B0BlockTransferThreadClusterLengths_K0_L_K1, B0BlockTransferThreadClusterArrangeOrder, B0BlockTransferSrcAccessOrder, B0BlockTransferSrcVectorDim, B0BlockTransferSrcScalarPerVector, B0BlockTransferDstScalarPerVector_K1, B0BlockLdsAddExtraL, B1BlockTransferThreadClusterLengths_L0_N_L1, B1BlockTransferThreadClusterArrangeOrder, B1BlockTransferSrcAccessOrder, B1BlockTransferSrcVectorDim, B1BlockTransferSrcScalarPerVector, B1BlockTransferDstScalarPerVector_L1, B1BlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, BlkGemmPipeSched, BlkGemmPipelineVer >::RawArg::b1_g_o_n_lengths |
◆ b1_g_o_n_strides
template<typename ALayout, typename B0layout, typename B1Layout, typename CLayout, typename ADataType, typename B0DataType, typename B1DataType, typename CDataType, typename AccDataType, typename CShuffleDataType, typename AElementwiseOperation, typename B0ElementwiseOperation, typename AccElementwiseOperation, typename B1ElementwiseOperation, typename CElementwiseOperation, GemmSpecialization GemmSpec, ck::index_t BlockSize, ck::index_t MPerBlock, ck::index_t LPerBlock, ck::index_t KPerBlock, ck::index_t NPerBlock, ck::index_t LTilePerBlock, ck::index_t AK1, ck::index_t BK1, ck::index_t L1, ck::index_t MPerWmma, ck::index_t LPerWmma, ck::index_t MRepeat, ck::index_t LRepeat, ck::index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, ck::index_t ABlockTransferSrcVectorDim, ck::index_t ABlockTransferSrcScalarPerVector, ck::index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename B0BlockTransferThreadClusterLengths_K0_L_K1, typename B0BlockTransferThreadClusterArrangeOrder, typename B0BlockTransferSrcAccessOrder, ck::index_t B0BlockTransferSrcVectorDim, ck::index_t B0BlockTransferSrcScalarPerVector, ck::index_t B0BlockTransferDstScalarPerVector_K1, bool B0BlockLdsAddExtraL, typename B1BlockTransferThreadClusterLengths_L0_N_L1, typename B1BlockTransferThreadClusterArrangeOrder, typename B1BlockTransferSrcAccessOrder, ck::index_t B1BlockTransferSrcVectorDim, ck::index_t B1BlockTransferSrcScalarPerVector, ck::index_t B1BlockTransferDstScalarPerVector_L1, bool B1BlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, BlockGemmPipelineScheduler BlkGemmPipeSched = BlockGemmPipelineScheduler::Intrawave, BlockGemmPipelineVersion BlkGemmPipelineVer = BlockGemmPipelineVersion::v1>
| arr3 ck::tensor_operation::device::DeviceBatchedGemmGemm_Wmma_CShuffleV3< ALayout, B0layout, B1Layout, CLayout, ADataType, B0DataType, B1DataType, CDataType, AccDataType, CShuffleDataType, AElementwiseOperation, B0ElementwiseOperation, AccElementwiseOperation, B1ElementwiseOperation, CElementwiseOperation, GemmSpec, BlockSize, MPerBlock, LPerBlock, KPerBlock, NPerBlock, LTilePerBlock, AK1, BK1, L1, MPerWmma, LPerWmma, MRepeat, LRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, B0BlockTransferThreadClusterLengths_K0_L_K1, B0BlockTransferThreadClusterArrangeOrder, B0BlockTransferSrcAccessOrder, B0BlockTransferSrcVectorDim, B0BlockTransferSrcScalarPerVector, B0BlockTransferDstScalarPerVector_K1, B0BlockLdsAddExtraL, B1BlockTransferThreadClusterLengths_L0_N_L1, B1BlockTransferThreadClusterArrangeOrder, B1BlockTransferSrcAccessOrder, B1BlockTransferSrcVectorDim, B1BlockTransferSrcScalarPerVector, B1BlockTransferDstScalarPerVector_L1, B1BlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, BlkGemmPipeSched, BlkGemmPipelineVer >::RawArg::b1_g_o_n_strides |
◆ b1_grid_desc
template<typename ALayout, typename B0layout, typename B1Layout, typename CLayout, typename ADataType, typename B0DataType, typename B1DataType, typename CDataType, typename AccDataType, typename CShuffleDataType, typename AElementwiseOperation, typename B0ElementwiseOperation, typename AccElementwiseOperation, typename B1ElementwiseOperation, typename CElementwiseOperation, GemmSpecialization GemmSpec, ck::index_t BlockSize, ck::index_t MPerBlock, ck::index_t LPerBlock, ck::index_t KPerBlock, ck::index_t NPerBlock, ck::index_t LTilePerBlock, ck::index_t AK1, ck::index_t BK1, ck::index_t L1, ck::index_t MPerWmma, ck::index_t LPerWmma, ck::index_t MRepeat, ck::index_t LRepeat, ck::index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, ck::index_t ABlockTransferSrcVectorDim, ck::index_t ABlockTransferSrcScalarPerVector, ck::index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename B0BlockTransferThreadClusterLengths_K0_L_K1, typename B0BlockTransferThreadClusterArrangeOrder, typename B0BlockTransferSrcAccessOrder, ck::index_t B0BlockTransferSrcVectorDim, ck::index_t B0BlockTransferSrcScalarPerVector, ck::index_t B0BlockTransferDstScalarPerVector_K1, bool B0BlockLdsAddExtraL, typename B1BlockTransferThreadClusterLengths_L0_N_L1, typename B1BlockTransferThreadClusterArrangeOrder, typename B1BlockTransferSrcAccessOrder, ck::index_t B1BlockTransferSrcVectorDim, ck::index_t B1BlockTransferSrcScalarPerVector, ck::index_t B1BlockTransferDstScalarPerVector_L1, bool B1BlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, BlockGemmPipelineScheduler BlkGemmPipeSched = BlockGemmPipelineScheduler::Intrawave, BlockGemmPipelineVersion BlkGemmPipelineVer = BlockGemmPipelineVersion::v1>
| B1GridDesc ck::tensor_operation::device::DeviceBatchedGemmGemm_Wmma_CShuffleV3< ALayout, B0layout, B1Layout, CLayout, ADataType, B0DataType, B1DataType, CDataType, AccDataType, CShuffleDataType, AElementwiseOperation, B0ElementwiseOperation, AccElementwiseOperation, B1ElementwiseOperation, CElementwiseOperation, GemmSpec, BlockSize, MPerBlock, LPerBlock, KPerBlock, NPerBlock, LTilePerBlock, AK1, BK1, L1, MPerWmma, LPerWmma, MRepeat, LRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, B0BlockTransferThreadClusterLengths_K0_L_K1, B0BlockTransferThreadClusterArrangeOrder, B0BlockTransferSrcAccessOrder, B0BlockTransferSrcVectorDim, B0BlockTransferSrcScalarPerVector, B0BlockTransferDstScalarPerVector_K1, B0BlockLdsAddExtraL, B1BlockTransferThreadClusterLengths_L0_N_L1, B1BlockTransferThreadClusterArrangeOrder, B1BlockTransferSrcAccessOrder, B1BlockTransferSrcVectorDim, B1BlockTransferSrcScalarPerVector, B1BlockTransferDstScalarPerVector_L1, B1BlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, BlkGemmPipeSched, BlkGemmPipelineVer >::RawArg::b1_grid_desc |
◆ batch_count
template<typename ALayout, typename B0layout, typename B1Layout, typename CLayout, typename ADataType, typename B0DataType, typename B1DataType, typename CDataType, typename AccDataType, typename CShuffleDataType, typename AElementwiseOperation, typename B0ElementwiseOperation, typename AccElementwiseOperation, typename B1ElementwiseOperation, typename CElementwiseOperation, GemmSpecialization GemmSpec, ck::index_t BlockSize, ck::index_t MPerBlock, ck::index_t LPerBlock, ck::index_t KPerBlock, ck::index_t NPerBlock, ck::index_t LTilePerBlock, ck::index_t AK1, ck::index_t BK1, ck::index_t L1, ck::index_t MPerWmma, ck::index_t LPerWmma, ck::index_t MRepeat, ck::index_t LRepeat, ck::index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, ck::index_t ABlockTransferSrcVectorDim, ck::index_t ABlockTransferSrcScalarPerVector, ck::index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename B0BlockTransferThreadClusterLengths_K0_L_K1, typename B0BlockTransferThreadClusterArrangeOrder, typename B0BlockTransferSrcAccessOrder, ck::index_t B0BlockTransferSrcVectorDim, ck::index_t B0BlockTransferSrcScalarPerVector, ck::index_t B0BlockTransferDstScalarPerVector_K1, bool B0BlockLdsAddExtraL, typename B1BlockTransferThreadClusterLengths_L0_N_L1, typename B1BlockTransferThreadClusterArrangeOrder, typename B1BlockTransferSrcAccessOrder, ck::index_t B1BlockTransferSrcVectorDim, ck::index_t B1BlockTransferSrcScalarPerVector, ck::index_t B1BlockTransferDstScalarPerVector_L1, bool B1BlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, BlockGemmPipelineScheduler BlkGemmPipeSched = BlockGemmPipelineScheduler::Intrawave, BlockGemmPipelineVersion BlkGemmPipelineVer = BlockGemmPipelineVersion::v1>
| index_t ck::tensor_operation::device::DeviceBatchedGemmGemm_Wmma_CShuffleV3< ALayout, B0layout, B1Layout, CLayout, ADataType, B0DataType, B1DataType, CDataType, AccDataType, CShuffleDataType, AElementwiseOperation, B0ElementwiseOperation, AccElementwiseOperation, B1ElementwiseOperation, CElementwiseOperation, GemmSpec, BlockSize, MPerBlock, LPerBlock, KPerBlock, NPerBlock, LTilePerBlock, AK1, BK1, L1, MPerWmma, LPerWmma, MRepeat, LRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, B0BlockTransferThreadClusterLengths_K0_L_K1, B0BlockTransferThreadClusterArrangeOrder, B0BlockTransferSrcAccessOrder, B0BlockTransferSrcVectorDim, B0BlockTransferSrcScalarPerVector, B0BlockTransferDstScalarPerVector_K1, B0BlockLdsAddExtraL, B1BlockTransferThreadClusterLengths_L0_N_L1, B1BlockTransferThreadClusterArrangeOrder, B1BlockTransferSrcAccessOrder, B1BlockTransferSrcVectorDim, B1BlockTransferSrcScalarPerVector, B1BlockTransferDstScalarPerVector_L1, B1BlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, BlkGemmPipeSched, BlkGemmPipelineVer >::RawArg::batch_count |
◆ block_2_ctile_map
template<typename ALayout, typename B0layout, typename B1Layout, typename CLayout, typename ADataType, typename B0DataType, typename B1DataType, typename CDataType, typename AccDataType, typename CShuffleDataType, typename AElementwiseOperation, typename B0ElementwiseOperation, typename AccElementwiseOperation, typename B1ElementwiseOperation, typename CElementwiseOperation, GemmSpecialization GemmSpec, ck::index_t BlockSize, ck::index_t MPerBlock, ck::index_t LPerBlock, ck::index_t KPerBlock, ck::index_t NPerBlock, ck::index_t LTilePerBlock, ck::index_t AK1, ck::index_t BK1, ck::index_t L1, ck::index_t MPerWmma, ck::index_t LPerWmma, ck::index_t MRepeat, ck::index_t LRepeat, ck::index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, ck::index_t ABlockTransferSrcVectorDim, ck::index_t ABlockTransferSrcScalarPerVector, ck::index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename B0BlockTransferThreadClusterLengths_K0_L_K1, typename B0BlockTransferThreadClusterArrangeOrder, typename B0BlockTransferSrcAccessOrder, ck::index_t B0BlockTransferSrcVectorDim, ck::index_t B0BlockTransferSrcScalarPerVector, ck::index_t B0BlockTransferDstScalarPerVector_K1, bool B0BlockLdsAddExtraL, typename B1BlockTransferThreadClusterLengths_L0_N_L1, typename B1BlockTransferThreadClusterArrangeOrder, typename B1BlockTransferSrcAccessOrder, ck::index_t B1BlockTransferSrcVectorDim, ck::index_t B1BlockTransferSrcScalarPerVector, ck::index_t B1BlockTransferDstScalarPerVector_L1, bool B1BlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, BlockGemmPipelineScheduler BlkGemmPipeSched = BlockGemmPipelineScheduler::Intrawave, BlockGemmPipelineVersion BlkGemmPipelineVer = BlockGemmPipelineVersion::v1>
| GridwiseOp::DefaultBlock2CTileMap ck::tensor_operation::device::DeviceBatchedGemmGemm_Wmma_CShuffleV3< ALayout, B0layout, B1Layout, CLayout, ADataType, B0DataType, B1DataType, CDataType, AccDataType, CShuffleDataType, AElementwiseOperation, B0ElementwiseOperation, AccElementwiseOperation, B1ElementwiseOperation, CElementwiseOperation, GemmSpec, BlockSize, MPerBlock, LPerBlock, KPerBlock, NPerBlock, LTilePerBlock, AK1, BK1, L1, MPerWmma, LPerWmma, MRepeat, LRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, B0BlockTransferThreadClusterLengths_K0_L_K1, B0BlockTransferThreadClusterArrangeOrder, B0BlockTransferSrcAccessOrder, B0BlockTransferSrcVectorDim, B0BlockTransferSrcScalarPerVector, B0BlockTransferDstScalarPerVector_K1, B0BlockLdsAddExtraL, B1BlockTransferThreadClusterLengths_L0_N_L1, B1BlockTransferThreadClusterArrangeOrder, B1BlockTransferSrcAccessOrder, B1BlockTransferSrcVectorDim, B1BlockTransferSrcScalarPerVector, B1BlockTransferDstScalarPerVector_L1, B1BlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, BlkGemmPipeSched, BlkGemmPipelineVer >::RawArg::block_2_ctile_map |
◆ c_element_op
template<typename ALayout, typename B0layout, typename B1Layout, typename CLayout, typename ADataType, typename B0DataType, typename B1DataType, typename CDataType, typename AccDataType, typename CShuffleDataType, typename AElementwiseOperation, typename B0ElementwiseOperation, typename AccElementwiseOperation, typename B1ElementwiseOperation, typename CElementwiseOperation, GemmSpecialization GemmSpec, ck::index_t BlockSize, ck::index_t MPerBlock, ck::index_t LPerBlock, ck::index_t KPerBlock, ck::index_t NPerBlock, ck::index_t LTilePerBlock, ck::index_t AK1, ck::index_t BK1, ck::index_t L1, ck::index_t MPerWmma, ck::index_t LPerWmma, ck::index_t MRepeat, ck::index_t LRepeat, ck::index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, ck::index_t ABlockTransferSrcVectorDim, ck::index_t ABlockTransferSrcScalarPerVector, ck::index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename B0BlockTransferThreadClusterLengths_K0_L_K1, typename B0BlockTransferThreadClusterArrangeOrder, typename B0BlockTransferSrcAccessOrder, ck::index_t B0BlockTransferSrcVectorDim, ck::index_t B0BlockTransferSrcScalarPerVector, ck::index_t B0BlockTransferDstScalarPerVector_K1, bool B0BlockLdsAddExtraL, typename B1BlockTransferThreadClusterLengths_L0_N_L1, typename B1BlockTransferThreadClusterArrangeOrder, typename B1BlockTransferSrcAccessOrder, ck::index_t B1BlockTransferSrcVectorDim, ck::index_t B1BlockTransferSrcScalarPerVector, ck::index_t B1BlockTransferDstScalarPerVector_L1, bool B1BlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, BlockGemmPipelineScheduler BlkGemmPipeSched = BlockGemmPipelineScheduler::Intrawave, BlockGemmPipelineVersion BlkGemmPipelineVer = BlockGemmPipelineVersion::v1>
| CElementwiseOperation ck::tensor_operation::device::DeviceBatchedGemmGemm_Wmma_CShuffleV3< ALayout, B0layout, B1Layout, CLayout, ADataType, B0DataType, B1DataType, CDataType, AccDataType, CShuffleDataType, AElementwiseOperation, B0ElementwiseOperation, AccElementwiseOperation, B1ElementwiseOperation, CElementwiseOperation, GemmSpec, BlockSize, MPerBlock, LPerBlock, KPerBlock, NPerBlock, LTilePerBlock, AK1, BK1, L1, MPerWmma, LPerWmma, MRepeat, LRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, B0BlockTransferThreadClusterLengths_K0_L_K1, B0BlockTransferThreadClusterArrangeOrder, B0BlockTransferSrcAccessOrder, B0BlockTransferSrcVectorDim, B0BlockTransferSrcScalarPerVector, B0BlockTransferDstScalarPerVector_K1, B0BlockLdsAddExtraL, B1BlockTransferThreadClusterLengths_L0_N_L1, B1BlockTransferThreadClusterArrangeOrder, B1BlockTransferSrcAccessOrder, B1BlockTransferSrcVectorDim, B1BlockTransferSrcScalarPerVector, B1BlockTransferDstScalarPerVector_L1, B1BlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, BlkGemmPipeSched, BlkGemmPipelineVer >::RawArg::c_element_op |
◆ c_g_m_o_lengths
template<typename ALayout, typename B0layout, typename B1Layout, typename CLayout, typename ADataType, typename B0DataType, typename B1DataType, typename CDataType, typename AccDataType, typename CShuffleDataType, typename AElementwiseOperation, typename B0ElementwiseOperation, typename AccElementwiseOperation, typename B1ElementwiseOperation, typename CElementwiseOperation, GemmSpecialization GemmSpec, ck::index_t BlockSize, ck::index_t MPerBlock, ck::index_t LPerBlock, ck::index_t KPerBlock, ck::index_t NPerBlock, ck::index_t LTilePerBlock, ck::index_t AK1, ck::index_t BK1, ck::index_t L1, ck::index_t MPerWmma, ck::index_t LPerWmma, ck::index_t MRepeat, ck::index_t LRepeat, ck::index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, ck::index_t ABlockTransferSrcVectorDim, ck::index_t ABlockTransferSrcScalarPerVector, ck::index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename B0BlockTransferThreadClusterLengths_K0_L_K1, typename B0BlockTransferThreadClusterArrangeOrder, typename B0BlockTransferSrcAccessOrder, ck::index_t B0BlockTransferSrcVectorDim, ck::index_t B0BlockTransferSrcScalarPerVector, ck::index_t B0BlockTransferDstScalarPerVector_K1, bool B0BlockLdsAddExtraL, typename B1BlockTransferThreadClusterLengths_L0_N_L1, typename B1BlockTransferThreadClusterArrangeOrder, typename B1BlockTransferSrcAccessOrder, ck::index_t B1BlockTransferSrcVectorDim, ck::index_t B1BlockTransferSrcScalarPerVector, ck::index_t B1BlockTransferDstScalarPerVector_L1, bool B1BlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, BlockGemmPipelineScheduler BlkGemmPipeSched = BlockGemmPipelineScheduler::Intrawave, BlockGemmPipelineVersion BlkGemmPipelineVer = BlockGemmPipelineVersion::v1>
| arr3 ck::tensor_operation::device::DeviceBatchedGemmGemm_Wmma_CShuffleV3< ALayout, B0layout, B1Layout, CLayout, ADataType, B0DataType, B1DataType, CDataType, AccDataType, CShuffleDataType, AElementwiseOperation, B0ElementwiseOperation, AccElementwiseOperation, B1ElementwiseOperation, CElementwiseOperation, GemmSpec, BlockSize, MPerBlock, LPerBlock, KPerBlock, NPerBlock, LTilePerBlock, AK1, BK1, L1, MPerWmma, LPerWmma, MRepeat, LRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, B0BlockTransferThreadClusterLengths_K0_L_K1, B0BlockTransferThreadClusterArrangeOrder, B0BlockTransferSrcAccessOrder, B0BlockTransferSrcVectorDim, B0BlockTransferSrcScalarPerVector, B0BlockTransferDstScalarPerVector_K1, B0BlockLdsAddExtraL, B1BlockTransferThreadClusterLengths_L0_N_L1, B1BlockTransferThreadClusterArrangeOrder, B1BlockTransferSrcAccessOrder, B1BlockTransferSrcVectorDim, B1BlockTransferSrcScalarPerVector, B1BlockTransferDstScalarPerVector_L1, B1BlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, BlkGemmPipeSched, BlkGemmPipelineVer >::RawArg::c_g_m_o_lengths |
◆ c_g_m_o_strides
template<typename ALayout, typename B0layout, typename B1Layout, typename CLayout, typename ADataType, typename B0DataType, typename B1DataType, typename CDataType, typename AccDataType, typename CShuffleDataType, typename AElementwiseOperation, typename B0ElementwiseOperation, typename AccElementwiseOperation, typename B1ElementwiseOperation, typename CElementwiseOperation, GemmSpecialization GemmSpec, ck::index_t BlockSize, ck::index_t MPerBlock, ck::index_t LPerBlock, ck::index_t KPerBlock, ck::index_t NPerBlock, ck::index_t LTilePerBlock, ck::index_t AK1, ck::index_t BK1, ck::index_t L1, ck::index_t MPerWmma, ck::index_t LPerWmma, ck::index_t MRepeat, ck::index_t LRepeat, ck::index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, ck::index_t ABlockTransferSrcVectorDim, ck::index_t ABlockTransferSrcScalarPerVector, ck::index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename B0BlockTransferThreadClusterLengths_K0_L_K1, typename B0BlockTransferThreadClusterArrangeOrder, typename B0BlockTransferSrcAccessOrder, ck::index_t B0BlockTransferSrcVectorDim, ck::index_t B0BlockTransferSrcScalarPerVector, ck::index_t B0BlockTransferDstScalarPerVector_K1, bool B0BlockLdsAddExtraL, typename B1BlockTransferThreadClusterLengths_L0_N_L1, typename B1BlockTransferThreadClusterArrangeOrder, typename B1BlockTransferSrcAccessOrder, ck::index_t B1BlockTransferSrcVectorDim, ck::index_t B1BlockTransferSrcScalarPerVector, ck::index_t B1BlockTransferDstScalarPerVector_L1, bool B1BlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, BlockGemmPipelineScheduler BlkGemmPipeSched = BlockGemmPipelineScheduler::Intrawave, BlockGemmPipelineVersion BlkGemmPipelineVer = BlockGemmPipelineVersion::v1>
| arr3 ck::tensor_operation::device::DeviceBatchedGemmGemm_Wmma_CShuffleV3< ALayout, B0layout, B1Layout, CLayout, ADataType, B0DataType, B1DataType, CDataType, AccDataType, CShuffleDataType, AElementwiseOperation, B0ElementwiseOperation, AccElementwiseOperation, B1ElementwiseOperation, CElementwiseOperation, GemmSpec, BlockSize, MPerBlock, LPerBlock, KPerBlock, NPerBlock, LTilePerBlock, AK1, BK1, L1, MPerWmma, LPerWmma, MRepeat, LRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, B0BlockTransferThreadClusterLengths_K0_L_K1, B0BlockTransferThreadClusterArrangeOrder, B0BlockTransferSrcAccessOrder, B0BlockTransferSrcVectorDim, B0BlockTransferSrcScalarPerVector, B0BlockTransferDstScalarPerVector_K1, B0BlockLdsAddExtraL, B1BlockTransferThreadClusterLengths_L0_N_L1, B1BlockTransferThreadClusterArrangeOrder, B1BlockTransferSrcAccessOrder, B1BlockTransferSrcVectorDim, B1BlockTransferSrcScalarPerVector, B1BlockTransferDstScalarPerVector_L1, B1BlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, BlkGemmPipeSched, BlkGemmPipelineVer >::RawArg::c_g_m_o_strides |
◆ c_grid_desc_m_n
template<typename ALayout, typename B0layout, typename B1Layout, typename CLayout, typename ADataType, typename B0DataType, typename B1DataType, typename CDataType, typename AccDataType, typename CShuffleDataType, typename AElementwiseOperation, typename B0ElementwiseOperation, typename AccElementwiseOperation, typename B1ElementwiseOperation, typename CElementwiseOperation, GemmSpecialization GemmSpec, ck::index_t BlockSize, ck::index_t MPerBlock, ck::index_t LPerBlock, ck::index_t KPerBlock, ck::index_t NPerBlock, ck::index_t LTilePerBlock, ck::index_t AK1, ck::index_t BK1, ck::index_t L1, ck::index_t MPerWmma, ck::index_t LPerWmma, ck::index_t MRepeat, ck::index_t LRepeat, ck::index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, ck::index_t ABlockTransferSrcVectorDim, ck::index_t ABlockTransferSrcScalarPerVector, ck::index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename B0BlockTransferThreadClusterLengths_K0_L_K1, typename B0BlockTransferThreadClusterArrangeOrder, typename B0BlockTransferSrcAccessOrder, ck::index_t B0BlockTransferSrcVectorDim, ck::index_t B0BlockTransferSrcScalarPerVector, ck::index_t B0BlockTransferDstScalarPerVector_K1, bool B0BlockLdsAddExtraL, typename B1BlockTransferThreadClusterLengths_L0_N_L1, typename B1BlockTransferThreadClusterArrangeOrder, typename B1BlockTransferSrcAccessOrder, ck::index_t B1BlockTransferSrcVectorDim, ck::index_t B1BlockTransferSrcScalarPerVector, ck::index_t B1BlockTransferDstScalarPerVector_L1, bool B1BlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, BlockGemmPipelineScheduler BlkGemmPipeSched = BlockGemmPipelineScheduler::Intrawave, BlockGemmPipelineVersion BlkGemmPipelineVer = BlockGemmPipelineVersion::v1>
| CGridDesc_M_N ck::tensor_operation::device::DeviceBatchedGemmGemm_Wmma_CShuffleV3< ALayout, B0layout, B1Layout, CLayout, ADataType, B0DataType, B1DataType, CDataType, AccDataType, CShuffleDataType, AElementwiseOperation, B0ElementwiseOperation, AccElementwiseOperation, B1ElementwiseOperation, CElementwiseOperation, GemmSpec, BlockSize, MPerBlock, LPerBlock, KPerBlock, NPerBlock, LTilePerBlock, AK1, BK1, L1, MPerWmma, LPerWmma, MRepeat, LRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, B0BlockTransferThreadClusterLengths_K0_L_K1, B0BlockTransferThreadClusterArrangeOrder, B0BlockTransferSrcAccessOrder, B0BlockTransferSrcVectorDim, B0BlockTransferSrcScalarPerVector, B0BlockTransferDstScalarPerVector_K1, B0BlockLdsAddExtraL, B1BlockTransferThreadClusterLengths_L0_N_L1, B1BlockTransferThreadClusterArrangeOrder, B1BlockTransferSrcAccessOrder, B1BlockTransferSrcVectorDim, B1BlockTransferSrcScalarPerVector, B1BlockTransferDstScalarPerVector_L1, B1BlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, BlkGemmPipeSched, BlkGemmPipelineVer >::RawArg::c_grid_desc_m_n |
◆ c_grid_desc_mblock_mperblock_nblock_nperblock
template<typename ALayout, typename B0layout, typename B1Layout, typename CLayout, typename ADataType, typename B0DataType, typename B1DataType, typename CDataType, typename AccDataType, typename CShuffleDataType, typename AElementwiseOperation, typename B0ElementwiseOperation, typename AccElementwiseOperation, typename B1ElementwiseOperation, typename CElementwiseOperation, GemmSpecialization GemmSpec, ck::index_t BlockSize, ck::index_t MPerBlock, ck::index_t LPerBlock, ck::index_t KPerBlock, ck::index_t NPerBlock, ck::index_t LTilePerBlock, ck::index_t AK1, ck::index_t BK1, ck::index_t L1, ck::index_t MPerWmma, ck::index_t LPerWmma, ck::index_t MRepeat, ck::index_t LRepeat, ck::index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, ck::index_t ABlockTransferSrcVectorDim, ck::index_t ABlockTransferSrcScalarPerVector, ck::index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename B0BlockTransferThreadClusterLengths_K0_L_K1, typename B0BlockTransferThreadClusterArrangeOrder, typename B0BlockTransferSrcAccessOrder, ck::index_t B0BlockTransferSrcVectorDim, ck::index_t B0BlockTransferSrcScalarPerVector, ck::index_t B0BlockTransferDstScalarPerVector_K1, bool B0BlockLdsAddExtraL, typename B1BlockTransferThreadClusterLengths_L0_N_L1, typename B1BlockTransferThreadClusterArrangeOrder, typename B1BlockTransferSrcAccessOrder, ck::index_t B1BlockTransferSrcVectorDim, ck::index_t B1BlockTransferSrcScalarPerVector, ck::index_t B1BlockTransferDstScalarPerVector_L1, bool B1BlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, BlockGemmPipelineScheduler BlkGemmPipeSched = BlockGemmPipelineScheduler::Intrawave, BlockGemmPipelineVersion BlkGemmPipelineVer = BlockGemmPipelineVersion::v1>
| GridwiseOp::CGridDescriptor_MBlock_MPerBlock_NBlock_NPerBlock ck::tensor_operation::device::DeviceBatchedGemmGemm_Wmma_CShuffleV3< ALayout, B0layout, B1Layout, CLayout, ADataType, B0DataType, B1DataType, CDataType, AccDataType, CShuffleDataType, AElementwiseOperation, B0ElementwiseOperation, AccElementwiseOperation, B1ElementwiseOperation, CElementwiseOperation, GemmSpec, BlockSize, MPerBlock, LPerBlock, KPerBlock, NPerBlock, LTilePerBlock, AK1, BK1, L1, MPerWmma, LPerWmma, MRepeat, LRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, B0BlockTransferThreadClusterLengths_K0_L_K1, B0BlockTransferThreadClusterArrangeOrder, B0BlockTransferSrcAccessOrder, B0BlockTransferSrcVectorDim, B0BlockTransferSrcScalarPerVector, B0BlockTransferDstScalarPerVector_K1, B0BlockLdsAddExtraL, B1BlockTransferThreadClusterLengths_L0_N_L1, B1BlockTransferThreadClusterArrangeOrder, B1BlockTransferSrcAccessOrder, B1BlockTransferSrcVectorDim, B1BlockTransferSrcScalarPerVector, B1BlockTransferDstScalarPerVector_L1, B1BlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, BlkGemmPipeSched, BlkGemmPipelineVer >::RawArg::c_grid_desc_mblock_mperblock_nblock_nperblock |
◆ compute_base_ptr_of_batch
template<typename ALayout, typename B0layout, typename B1Layout, typename CLayout, typename ADataType, typename B0DataType, typename B1DataType, typename CDataType, typename AccDataType, typename CShuffleDataType, typename AElementwiseOperation, typename B0ElementwiseOperation, typename AccElementwiseOperation, typename B1ElementwiseOperation, typename CElementwiseOperation, GemmSpecialization GemmSpec, ck::index_t BlockSize, ck::index_t MPerBlock, ck::index_t LPerBlock, ck::index_t KPerBlock, ck::index_t NPerBlock, ck::index_t LTilePerBlock, ck::index_t AK1, ck::index_t BK1, ck::index_t L1, ck::index_t MPerWmma, ck::index_t LPerWmma, ck::index_t MRepeat, ck::index_t LRepeat, ck::index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, ck::index_t ABlockTransferSrcVectorDim, ck::index_t ABlockTransferSrcScalarPerVector, ck::index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename B0BlockTransferThreadClusterLengths_K0_L_K1, typename B0BlockTransferThreadClusterArrangeOrder, typename B0BlockTransferSrcAccessOrder, ck::index_t B0BlockTransferSrcVectorDim, ck::index_t B0BlockTransferSrcScalarPerVector, ck::index_t B0BlockTransferDstScalarPerVector_K1, bool B0BlockLdsAddExtraL, typename B1BlockTransferThreadClusterLengths_L0_N_L1, typename B1BlockTransferThreadClusterArrangeOrder, typename B1BlockTransferSrcAccessOrder, ck::index_t B1BlockTransferSrcVectorDim, ck::index_t B1BlockTransferSrcScalarPerVector, ck::index_t B1BlockTransferDstScalarPerVector_L1, bool B1BlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, BlockGemmPipelineScheduler BlkGemmPipeSched = BlockGemmPipelineScheduler::Intrawave, BlockGemmPipelineVersion BlkGemmPipelineVer = BlockGemmPipelineVersion::v1>
| ComputeBasePtrOfStridedBatch ck::tensor_operation::device::DeviceBatchedGemmGemm_Wmma_CShuffleV3< ALayout, B0layout, B1Layout, CLayout, ADataType, B0DataType, B1DataType, CDataType, AccDataType, CShuffleDataType, AElementwiseOperation, B0ElementwiseOperation, AccElementwiseOperation, B1ElementwiseOperation, CElementwiseOperation, GemmSpec, BlockSize, MPerBlock, LPerBlock, KPerBlock, NPerBlock, LTilePerBlock, AK1, BK1, L1, MPerWmma, LPerWmma, MRepeat, LRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, B0BlockTransferThreadClusterLengths_K0_L_K1, B0BlockTransferThreadClusterArrangeOrder, B0BlockTransferSrcAccessOrder, B0BlockTransferSrcVectorDim, B0BlockTransferSrcScalarPerVector, B0BlockTransferDstScalarPerVector_K1, B0BlockLdsAddExtraL, B1BlockTransferThreadClusterLengths_L0_N_L1, B1BlockTransferThreadClusterArrangeOrder, B1BlockTransferSrcAccessOrder, B1BlockTransferSrcVectorDim, B1BlockTransferSrcScalarPerVector, B1BlockTransferDstScalarPerVector_L1, B1BlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, BlkGemmPipeSched, BlkGemmPipelineVer >::RawArg::compute_base_ptr_of_batch |
◆ K
template<typename ALayout, typename B0layout, typename B1Layout, typename CLayout, typename ADataType, typename B0DataType, typename B1DataType, typename CDataType, typename AccDataType, typename CShuffleDataType, typename AElementwiseOperation, typename B0ElementwiseOperation, typename AccElementwiseOperation, typename B1ElementwiseOperation, typename CElementwiseOperation, GemmSpecialization GemmSpec, ck::index_t BlockSize, ck::index_t MPerBlock, ck::index_t LPerBlock, ck::index_t KPerBlock, ck::index_t NPerBlock, ck::index_t LTilePerBlock, ck::index_t AK1, ck::index_t BK1, ck::index_t L1, ck::index_t MPerWmma, ck::index_t LPerWmma, ck::index_t MRepeat, ck::index_t LRepeat, ck::index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, ck::index_t ABlockTransferSrcVectorDim, ck::index_t ABlockTransferSrcScalarPerVector, ck::index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename B0BlockTransferThreadClusterLengths_K0_L_K1, typename B0BlockTransferThreadClusterArrangeOrder, typename B0BlockTransferSrcAccessOrder, ck::index_t B0BlockTransferSrcVectorDim, ck::index_t B0BlockTransferSrcScalarPerVector, ck::index_t B0BlockTransferDstScalarPerVector_K1, bool B0BlockLdsAddExtraL, typename B1BlockTransferThreadClusterLengths_L0_N_L1, typename B1BlockTransferThreadClusterArrangeOrder, typename B1BlockTransferSrcAccessOrder, ck::index_t B1BlockTransferSrcVectorDim, ck::index_t B1BlockTransferSrcScalarPerVector, ck::index_t B1BlockTransferDstScalarPerVector_L1, bool B1BlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, BlockGemmPipelineScheduler BlkGemmPipeSched = BlockGemmPipelineScheduler::Intrawave, BlockGemmPipelineVersion BlkGemmPipelineVer = BlockGemmPipelineVersion::v1>
| index_t ck::tensor_operation::device::DeviceBatchedGemmGemm_Wmma_CShuffleV3< ALayout, B0layout, B1Layout, CLayout, ADataType, B0DataType, B1DataType, CDataType, AccDataType, CShuffleDataType, AElementwiseOperation, B0ElementwiseOperation, AccElementwiseOperation, B1ElementwiseOperation, CElementwiseOperation, GemmSpec, BlockSize, MPerBlock, LPerBlock, KPerBlock, NPerBlock, LTilePerBlock, AK1, BK1, L1, MPerWmma, LPerWmma, MRepeat, LRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, B0BlockTransferThreadClusterLengths_K0_L_K1, B0BlockTransferThreadClusterArrangeOrder, B0BlockTransferSrcAccessOrder, B0BlockTransferSrcVectorDim, B0BlockTransferSrcScalarPerVector, B0BlockTransferDstScalarPerVector_K1, B0BlockLdsAddExtraL, B1BlockTransferThreadClusterLengths_L0_N_L1, B1BlockTransferThreadClusterArrangeOrder, B1BlockTransferSrcAccessOrder, B1BlockTransferSrcVectorDim, B1BlockTransferSrcScalarPerVector, B1BlockTransferDstScalarPerVector_L1, B1BlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, BlkGemmPipeSched, BlkGemmPipelineVer >::RawArg::K |
◆ M
template<typename ALayout, typename B0layout, typename B1Layout, typename CLayout, typename ADataType, typename B0DataType, typename B1DataType, typename CDataType, typename AccDataType, typename CShuffleDataType, typename AElementwiseOperation, typename B0ElementwiseOperation, typename AccElementwiseOperation, typename B1ElementwiseOperation, typename CElementwiseOperation, GemmSpecialization GemmSpec, ck::index_t BlockSize, ck::index_t MPerBlock, ck::index_t LPerBlock, ck::index_t KPerBlock, ck::index_t NPerBlock, ck::index_t LTilePerBlock, ck::index_t AK1, ck::index_t BK1, ck::index_t L1, ck::index_t MPerWmma, ck::index_t LPerWmma, ck::index_t MRepeat, ck::index_t LRepeat, ck::index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, ck::index_t ABlockTransferSrcVectorDim, ck::index_t ABlockTransferSrcScalarPerVector, ck::index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename B0BlockTransferThreadClusterLengths_K0_L_K1, typename B0BlockTransferThreadClusterArrangeOrder, typename B0BlockTransferSrcAccessOrder, ck::index_t B0BlockTransferSrcVectorDim, ck::index_t B0BlockTransferSrcScalarPerVector, ck::index_t B0BlockTransferDstScalarPerVector_K1, bool B0BlockLdsAddExtraL, typename B1BlockTransferThreadClusterLengths_L0_N_L1, typename B1BlockTransferThreadClusterArrangeOrder, typename B1BlockTransferSrcAccessOrder, ck::index_t B1BlockTransferSrcVectorDim, ck::index_t B1BlockTransferSrcScalarPerVector, ck::index_t B1BlockTransferDstScalarPerVector_L1, bool B1BlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, BlockGemmPipelineScheduler BlkGemmPipeSched = BlockGemmPipelineScheduler::Intrawave, BlockGemmPipelineVersion BlkGemmPipelineVer = BlockGemmPipelineVersion::v1>
| index_t ck::tensor_operation::device::DeviceBatchedGemmGemm_Wmma_CShuffleV3< ALayout, B0layout, B1Layout, CLayout, ADataType, B0DataType, B1DataType, CDataType, AccDataType, CShuffleDataType, AElementwiseOperation, B0ElementwiseOperation, AccElementwiseOperation, B1ElementwiseOperation, CElementwiseOperation, GemmSpec, BlockSize, MPerBlock, LPerBlock, KPerBlock, NPerBlock, LTilePerBlock, AK1, BK1, L1, MPerWmma, LPerWmma, MRepeat, LRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, B0BlockTransferThreadClusterLengths_K0_L_K1, B0BlockTransferThreadClusterArrangeOrder, B0BlockTransferSrcAccessOrder, B0BlockTransferSrcVectorDim, B0BlockTransferSrcScalarPerVector, B0BlockTransferDstScalarPerVector_K1, B0BlockLdsAddExtraL, B1BlockTransferThreadClusterLengths_L0_N_L1, B1BlockTransferThreadClusterArrangeOrder, B1BlockTransferSrcAccessOrder, B1BlockTransferSrcVectorDim, B1BlockTransferSrcScalarPerVector, B1BlockTransferDstScalarPerVector_L1, B1BlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, BlkGemmPipeSched, BlkGemmPipelineVer >::RawArg::M |
◆ N
template<typename ALayout, typename B0layout, typename B1Layout, typename CLayout, typename ADataType, typename B0DataType, typename B1DataType, typename CDataType, typename AccDataType, typename CShuffleDataType, typename AElementwiseOperation, typename B0ElementwiseOperation, typename AccElementwiseOperation, typename B1ElementwiseOperation, typename CElementwiseOperation, GemmSpecialization GemmSpec, ck::index_t BlockSize, ck::index_t MPerBlock, ck::index_t LPerBlock, ck::index_t KPerBlock, ck::index_t NPerBlock, ck::index_t LTilePerBlock, ck::index_t AK1, ck::index_t BK1, ck::index_t L1, ck::index_t MPerWmma, ck::index_t LPerWmma, ck::index_t MRepeat, ck::index_t LRepeat, ck::index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, ck::index_t ABlockTransferSrcVectorDim, ck::index_t ABlockTransferSrcScalarPerVector, ck::index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename B0BlockTransferThreadClusterLengths_K0_L_K1, typename B0BlockTransferThreadClusterArrangeOrder, typename B0BlockTransferSrcAccessOrder, ck::index_t B0BlockTransferSrcVectorDim, ck::index_t B0BlockTransferSrcScalarPerVector, ck::index_t B0BlockTransferDstScalarPerVector_K1, bool B0BlockLdsAddExtraL, typename B1BlockTransferThreadClusterLengths_L0_N_L1, typename B1BlockTransferThreadClusterArrangeOrder, typename B1BlockTransferSrcAccessOrder, ck::index_t B1BlockTransferSrcVectorDim, ck::index_t B1BlockTransferSrcScalarPerVector, ck::index_t B1BlockTransferDstScalarPerVector_L1, bool B1BlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, BlockGemmPipelineScheduler BlkGemmPipeSched = BlockGemmPipelineScheduler::Intrawave, BlockGemmPipelineVersion BlkGemmPipelineVer = BlockGemmPipelineVersion::v1>
| index_t ck::tensor_operation::device::DeviceBatchedGemmGemm_Wmma_CShuffleV3< ALayout, B0layout, B1Layout, CLayout, ADataType, B0DataType, B1DataType, CDataType, AccDataType, CShuffleDataType, AElementwiseOperation, B0ElementwiseOperation, AccElementwiseOperation, B1ElementwiseOperation, CElementwiseOperation, GemmSpec, BlockSize, MPerBlock, LPerBlock, KPerBlock, NPerBlock, LTilePerBlock, AK1, BK1, L1, MPerWmma, LPerWmma, MRepeat, LRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, B0BlockTransferThreadClusterLengths_K0_L_K1, B0BlockTransferThreadClusterArrangeOrder, B0BlockTransferSrcAccessOrder, B0BlockTransferSrcVectorDim, B0BlockTransferSrcScalarPerVector, B0BlockTransferDstScalarPerVector_K1, B0BlockLdsAddExtraL, B1BlockTransferThreadClusterLengths_L0_N_L1, B1BlockTransferThreadClusterArrangeOrder, B1BlockTransferSrcAccessOrder, B1BlockTransferSrcVectorDim, B1BlockTransferSrcScalarPerVector, B1BlockTransferDstScalarPerVector_L1, B1BlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, BlkGemmPipeSched, BlkGemmPipelineVer >::RawArg::N |
◆ O
template<typename ALayout, typename B0layout, typename B1Layout, typename CLayout, typename ADataType, typename B0DataType, typename B1DataType, typename CDataType, typename AccDataType, typename CShuffleDataType, typename AElementwiseOperation, typename B0ElementwiseOperation, typename AccElementwiseOperation, typename B1ElementwiseOperation, typename CElementwiseOperation, GemmSpecialization GemmSpec, ck::index_t BlockSize, ck::index_t MPerBlock, ck::index_t LPerBlock, ck::index_t KPerBlock, ck::index_t NPerBlock, ck::index_t LTilePerBlock, ck::index_t AK1, ck::index_t BK1, ck::index_t L1, ck::index_t MPerWmma, ck::index_t LPerWmma, ck::index_t MRepeat, ck::index_t LRepeat, ck::index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, ck::index_t ABlockTransferSrcVectorDim, ck::index_t ABlockTransferSrcScalarPerVector, ck::index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename B0BlockTransferThreadClusterLengths_K0_L_K1, typename B0BlockTransferThreadClusterArrangeOrder, typename B0BlockTransferSrcAccessOrder, ck::index_t B0BlockTransferSrcVectorDim, ck::index_t B0BlockTransferSrcScalarPerVector, ck::index_t B0BlockTransferDstScalarPerVector_K1, bool B0BlockLdsAddExtraL, typename B1BlockTransferThreadClusterLengths_L0_N_L1, typename B1BlockTransferThreadClusterArrangeOrder, typename B1BlockTransferSrcAccessOrder, ck::index_t B1BlockTransferSrcVectorDim, ck::index_t B1BlockTransferSrcScalarPerVector, ck::index_t B1BlockTransferDstScalarPerVector_L1, bool B1BlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, BlockGemmPipelineScheduler BlkGemmPipeSched = BlockGemmPipelineScheduler::Intrawave, BlockGemmPipelineVersion BlkGemmPipelineVer = BlockGemmPipelineVersion::v1>
| index_t ck::tensor_operation::device::DeviceBatchedGemmGemm_Wmma_CShuffleV3< ALayout, B0layout, B1Layout, CLayout, ADataType, B0DataType, B1DataType, CDataType, AccDataType, CShuffleDataType, AElementwiseOperation, B0ElementwiseOperation, AccElementwiseOperation, B1ElementwiseOperation, CElementwiseOperation, GemmSpec, BlockSize, MPerBlock, LPerBlock, KPerBlock, NPerBlock, LTilePerBlock, AK1, BK1, L1, MPerWmma, LPerWmma, MRepeat, LRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, B0BlockTransferThreadClusterLengths_K0_L_K1, B0BlockTransferThreadClusterArrangeOrder, B0BlockTransferSrcAccessOrder, B0BlockTransferSrcVectorDim, B0BlockTransferSrcScalarPerVector, B0BlockTransferDstScalarPerVector_K1, B0BlockLdsAddExtraL, B1BlockTransferThreadClusterLengths_L0_N_L1, B1BlockTransferThreadClusterArrangeOrder, B1BlockTransferSrcAccessOrder, B1BlockTransferSrcVectorDim, B1BlockTransferSrcScalarPerVector, B1BlockTransferDstScalarPerVector_L1, B1BlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, BlkGemmPipeSched, BlkGemmPipelineVer >::RawArg::O |
◆ p_a_grid
template<typename ALayout, typename B0layout, typename B1Layout, typename CLayout, typename ADataType, typename B0DataType, typename B1DataType, typename CDataType, typename AccDataType, typename CShuffleDataType, typename AElementwiseOperation, typename B0ElementwiseOperation, typename AccElementwiseOperation, typename B1ElementwiseOperation, typename CElementwiseOperation, GemmSpecialization GemmSpec, ck::index_t BlockSize, ck::index_t MPerBlock, ck::index_t LPerBlock, ck::index_t KPerBlock, ck::index_t NPerBlock, ck::index_t LTilePerBlock, ck::index_t AK1, ck::index_t BK1, ck::index_t L1, ck::index_t MPerWmma, ck::index_t LPerWmma, ck::index_t MRepeat, ck::index_t LRepeat, ck::index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, ck::index_t ABlockTransferSrcVectorDim, ck::index_t ABlockTransferSrcScalarPerVector, ck::index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename B0BlockTransferThreadClusterLengths_K0_L_K1, typename B0BlockTransferThreadClusterArrangeOrder, typename B0BlockTransferSrcAccessOrder, ck::index_t B0BlockTransferSrcVectorDim, ck::index_t B0BlockTransferSrcScalarPerVector, ck::index_t B0BlockTransferDstScalarPerVector_K1, bool B0BlockLdsAddExtraL, typename B1BlockTransferThreadClusterLengths_L0_N_L1, typename B1BlockTransferThreadClusterArrangeOrder, typename B1BlockTransferSrcAccessOrder, ck::index_t B1BlockTransferSrcVectorDim, ck::index_t B1BlockTransferSrcScalarPerVector, ck::index_t B1BlockTransferDstScalarPerVector_L1, bool B1BlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, BlockGemmPipelineScheduler BlkGemmPipeSched = BlockGemmPipelineScheduler::Intrawave, BlockGemmPipelineVersion BlkGemmPipelineVer = BlockGemmPipelineVersion::v1>
| const ADataType* ck::tensor_operation::device::DeviceBatchedGemmGemm_Wmma_CShuffleV3< ALayout, B0layout, B1Layout, CLayout, ADataType, B0DataType, B1DataType, CDataType, AccDataType, CShuffleDataType, AElementwiseOperation, B0ElementwiseOperation, AccElementwiseOperation, B1ElementwiseOperation, CElementwiseOperation, GemmSpec, BlockSize, MPerBlock, LPerBlock, KPerBlock, NPerBlock, LTilePerBlock, AK1, BK1, L1, MPerWmma, LPerWmma, MRepeat, LRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, B0BlockTransferThreadClusterLengths_K0_L_K1, B0BlockTransferThreadClusterArrangeOrder, B0BlockTransferSrcAccessOrder, B0BlockTransferSrcVectorDim, B0BlockTransferSrcScalarPerVector, B0BlockTransferDstScalarPerVector_K1, B0BlockLdsAddExtraL, B1BlockTransferThreadClusterLengths_L0_N_L1, B1BlockTransferThreadClusterArrangeOrder, B1BlockTransferSrcAccessOrder, B1BlockTransferSrcVectorDim, B1BlockTransferSrcScalarPerVector, B1BlockTransferDstScalarPerVector_L1, B1BlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, BlkGemmPipeSched, BlkGemmPipelineVer >::RawArg::p_a_grid |
◆ p_b0_grid
template<typename ALayout, typename B0layout, typename B1Layout, typename CLayout, typename ADataType, typename B0DataType, typename B1DataType, typename CDataType, typename AccDataType, typename CShuffleDataType, typename AElementwiseOperation, typename B0ElementwiseOperation, typename AccElementwiseOperation, typename B1ElementwiseOperation, typename CElementwiseOperation, GemmSpecialization GemmSpec, ck::index_t BlockSize, ck::index_t MPerBlock, ck::index_t LPerBlock, ck::index_t KPerBlock, ck::index_t NPerBlock, ck::index_t LTilePerBlock, ck::index_t AK1, ck::index_t BK1, ck::index_t L1, ck::index_t MPerWmma, ck::index_t LPerWmma, ck::index_t MRepeat, ck::index_t LRepeat, ck::index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, ck::index_t ABlockTransferSrcVectorDim, ck::index_t ABlockTransferSrcScalarPerVector, ck::index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename B0BlockTransferThreadClusterLengths_K0_L_K1, typename B0BlockTransferThreadClusterArrangeOrder, typename B0BlockTransferSrcAccessOrder, ck::index_t B0BlockTransferSrcVectorDim, ck::index_t B0BlockTransferSrcScalarPerVector, ck::index_t B0BlockTransferDstScalarPerVector_K1, bool B0BlockLdsAddExtraL, typename B1BlockTransferThreadClusterLengths_L0_N_L1, typename B1BlockTransferThreadClusterArrangeOrder, typename B1BlockTransferSrcAccessOrder, ck::index_t B1BlockTransferSrcVectorDim, ck::index_t B1BlockTransferSrcScalarPerVector, ck::index_t B1BlockTransferDstScalarPerVector_L1, bool B1BlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, BlockGemmPipelineScheduler BlkGemmPipeSched = BlockGemmPipelineScheduler::Intrawave, BlockGemmPipelineVersion BlkGemmPipelineVer = BlockGemmPipelineVersion::v1>
| const B0DataType* ck::tensor_operation::device::DeviceBatchedGemmGemm_Wmma_CShuffleV3< ALayout, B0layout, B1Layout, CLayout, ADataType, B0DataType, B1DataType, CDataType, AccDataType, CShuffleDataType, AElementwiseOperation, B0ElementwiseOperation, AccElementwiseOperation, B1ElementwiseOperation, CElementwiseOperation, GemmSpec, BlockSize, MPerBlock, LPerBlock, KPerBlock, NPerBlock, LTilePerBlock, AK1, BK1, L1, MPerWmma, LPerWmma, MRepeat, LRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, B0BlockTransferThreadClusterLengths_K0_L_K1, B0BlockTransferThreadClusterArrangeOrder, B0BlockTransferSrcAccessOrder, B0BlockTransferSrcVectorDim, B0BlockTransferSrcScalarPerVector, B0BlockTransferDstScalarPerVector_K1, B0BlockLdsAddExtraL, B1BlockTransferThreadClusterLengths_L0_N_L1, B1BlockTransferThreadClusterArrangeOrder, B1BlockTransferSrcAccessOrder, B1BlockTransferSrcVectorDim, B1BlockTransferSrcScalarPerVector, B1BlockTransferDstScalarPerVector_L1, B1BlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, BlkGemmPipeSched, BlkGemmPipelineVer >::RawArg::p_b0_grid |
◆ p_b1_grid
template<typename ALayout, typename B0layout, typename B1Layout, typename CLayout, typename ADataType, typename B0DataType, typename B1DataType, typename CDataType, typename AccDataType, typename CShuffleDataType, typename AElementwiseOperation, typename B0ElementwiseOperation, typename AccElementwiseOperation, typename B1ElementwiseOperation, typename CElementwiseOperation, GemmSpecialization GemmSpec, ck::index_t BlockSize, ck::index_t MPerBlock, ck::index_t LPerBlock, ck::index_t KPerBlock, ck::index_t NPerBlock, ck::index_t LTilePerBlock, ck::index_t AK1, ck::index_t BK1, ck::index_t L1, ck::index_t MPerWmma, ck::index_t LPerWmma, ck::index_t MRepeat, ck::index_t LRepeat, ck::index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, ck::index_t ABlockTransferSrcVectorDim, ck::index_t ABlockTransferSrcScalarPerVector, ck::index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename B0BlockTransferThreadClusterLengths_K0_L_K1, typename B0BlockTransferThreadClusterArrangeOrder, typename B0BlockTransferSrcAccessOrder, ck::index_t B0BlockTransferSrcVectorDim, ck::index_t B0BlockTransferSrcScalarPerVector, ck::index_t B0BlockTransferDstScalarPerVector_K1, bool B0BlockLdsAddExtraL, typename B1BlockTransferThreadClusterLengths_L0_N_L1, typename B1BlockTransferThreadClusterArrangeOrder, typename B1BlockTransferSrcAccessOrder, ck::index_t B1BlockTransferSrcVectorDim, ck::index_t B1BlockTransferSrcScalarPerVector, ck::index_t B1BlockTransferDstScalarPerVector_L1, bool B1BlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, BlockGemmPipelineScheduler BlkGemmPipeSched = BlockGemmPipelineScheduler::Intrawave, BlockGemmPipelineVersion BlkGemmPipelineVer = BlockGemmPipelineVersion::v1>
| const B1DataType* ck::tensor_operation::device::DeviceBatchedGemmGemm_Wmma_CShuffleV3< ALayout, B0layout, B1Layout, CLayout, ADataType, B0DataType, B1DataType, CDataType, AccDataType, CShuffleDataType, AElementwiseOperation, B0ElementwiseOperation, AccElementwiseOperation, B1ElementwiseOperation, CElementwiseOperation, GemmSpec, BlockSize, MPerBlock, LPerBlock, KPerBlock, NPerBlock, LTilePerBlock, AK1, BK1, L1, MPerWmma, LPerWmma, MRepeat, LRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, B0BlockTransferThreadClusterLengths_K0_L_K1, B0BlockTransferThreadClusterArrangeOrder, B0BlockTransferSrcAccessOrder, B0BlockTransferSrcVectorDim, B0BlockTransferSrcScalarPerVector, B0BlockTransferDstScalarPerVector_K1, B0BlockLdsAddExtraL, B1BlockTransferThreadClusterLengths_L0_N_L1, B1BlockTransferThreadClusterArrangeOrder, B1BlockTransferSrcAccessOrder, B1BlockTransferSrcVectorDim, B1BlockTransferSrcScalarPerVector, B1BlockTransferDstScalarPerVector_L1, B1BlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, BlkGemmPipeSched, BlkGemmPipelineVer >::RawArg::p_b1_grid |
◆ p_c_grid
template<typename ALayout, typename B0layout, typename B1Layout, typename CLayout, typename ADataType, typename B0DataType, typename B1DataType, typename CDataType, typename AccDataType, typename CShuffleDataType, typename AElementwiseOperation, typename B0ElementwiseOperation, typename AccElementwiseOperation, typename B1ElementwiseOperation, typename CElementwiseOperation, GemmSpecialization GemmSpec, ck::index_t BlockSize, ck::index_t MPerBlock, ck::index_t LPerBlock, ck::index_t KPerBlock, ck::index_t NPerBlock, ck::index_t LTilePerBlock, ck::index_t AK1, ck::index_t BK1, ck::index_t L1, ck::index_t MPerWmma, ck::index_t LPerWmma, ck::index_t MRepeat, ck::index_t LRepeat, ck::index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, ck::index_t ABlockTransferSrcVectorDim, ck::index_t ABlockTransferSrcScalarPerVector, ck::index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename B0BlockTransferThreadClusterLengths_K0_L_K1, typename B0BlockTransferThreadClusterArrangeOrder, typename B0BlockTransferSrcAccessOrder, ck::index_t B0BlockTransferSrcVectorDim, ck::index_t B0BlockTransferSrcScalarPerVector, ck::index_t B0BlockTransferDstScalarPerVector_K1, bool B0BlockLdsAddExtraL, typename B1BlockTransferThreadClusterLengths_L0_N_L1, typename B1BlockTransferThreadClusterArrangeOrder, typename B1BlockTransferSrcAccessOrder, ck::index_t B1BlockTransferSrcVectorDim, ck::index_t B1BlockTransferSrcScalarPerVector, ck::index_t B1BlockTransferDstScalarPerVector_L1, bool B1BlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, BlockGemmPipelineScheduler BlkGemmPipeSched = BlockGemmPipelineScheduler::Intrawave, BlockGemmPipelineVersion BlkGemmPipelineVer = BlockGemmPipelineVersion::v1>
| CDataType* ck::tensor_operation::device::DeviceBatchedGemmGemm_Wmma_CShuffleV3< ALayout, B0layout, B1Layout, CLayout, ADataType, B0DataType, B1DataType, CDataType, AccDataType, CShuffleDataType, AElementwiseOperation, B0ElementwiseOperation, AccElementwiseOperation, B1ElementwiseOperation, CElementwiseOperation, GemmSpec, BlockSize, MPerBlock, LPerBlock, KPerBlock, NPerBlock, LTilePerBlock, AK1, BK1, L1, MPerWmma, LPerWmma, MRepeat, LRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, B0BlockTransferThreadClusterLengths_K0_L_K1, B0BlockTransferThreadClusterArrangeOrder, B0BlockTransferSrcAccessOrder, B0BlockTransferSrcVectorDim, B0BlockTransferSrcScalarPerVector, B0BlockTransferDstScalarPerVector_K1, B0BlockLdsAddExtraL, B1BlockTransferThreadClusterLengths_L0_N_L1, B1BlockTransferThreadClusterArrangeOrder, B1BlockTransferSrcAccessOrder, B1BlockTransferSrcVectorDim, B1BlockTransferSrcScalarPerVector, B1BlockTransferDstScalarPerVector_L1, B1BlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, BlkGemmPipeSched, BlkGemmPipelineVer >::RawArg::p_c_grid |
The documentation for this struct was generated from the following file: