DeviceBatchedGemmGemm< ALayout, B0Layout, B1Layout, CLayout, ADataType, B0DataType, B1DataType, CDataType, AElementwiseOperation, B0ElementwiseOperation, Acc0ElementwiseOperation, B1ElementwiseOperation, CElementwiseOperation > Struct Template Reference

DeviceBatchedGemmGemm&lt; ALayout, B0Layout, B1Layout, CLayout, ADataType, B0DataType, B1DataType, CDataType, AElementwiseOperation, B0ElementwiseOperation, Acc0ElementwiseOperation, B1ElementwiseOperation, CElementwiseOperation &gt; Struct Template Reference#

Composable Kernel: ck::tensor_operation::device::DeviceBatchedGemmGemm< ALayout, B0Layout, B1Layout, CLayout, ADataType, B0DataType, B1DataType, CDataType, AElementwiseOperation, B0ElementwiseOperation, Acc0ElementwiseOperation, B1ElementwiseOperation, CElementwiseOperation > Struct Template Reference
ck::tensor_operation::device::DeviceBatchedGemmGemm< ALayout, B0Layout, B1Layout, CLayout, ADataType, B0DataType, B1DataType, CDataType, AElementwiseOperation, B0ElementwiseOperation, Acc0ElementwiseOperation, B1ElementwiseOperation, CElementwiseOperation > Struct Template Referenceabstract

#include <device_batched_gemm_gemm.hpp>

Inheritance diagram for ck::tensor_operation::device::DeviceBatchedGemmGemm< ALayout, B0Layout, B1Layout, CLayout, ADataType, B0DataType, B1DataType, CDataType, AElementwiseOperation, B0ElementwiseOperation, Acc0ElementwiseOperation, B1ElementwiseOperation, CElementwiseOperation >:
ck::tensor_operation::device::BaseOperator

Public Member Functions

virtual std::unique_ptr< BaseArgumentMakeArgumentPointer (const void *p_a, const void *p_b0, const void *p_b1, void *p_c, ck::index_t M, ck::index_t N, ck::index_t K, ck::index_t O, ck::index_t Batch, ck::index_t StrideA, ck::index_t StrideB0, ck::index_t StrideB1, ck::index_t StrideC, ck::index_t BatchStrideA, ck::index_t BatchStrideB0, ck::index_t BatchStrideB1, ck::index_t BatchStrideC, AElementwiseOperation a_element_op, B0ElementwiseOperation b0_element_op, Acc0ElementwiseOperation acc0_element_op, B1ElementwiseOperation b1_element_op, CElementwiseOperation c_element_op)=0
virtual std::unique_ptr< BaseInvokerMakeInvokerPointer ()=0
Public Member Functions inherited from ck::tensor_operation::device::BaseOperator
 BaseOperator ()=default
 BaseOperator (const BaseOperator &)=default
BaseOperatoroperator= (const BaseOperator &)=default
virtual bool IsSupportedArgument (const BaseArgument *)
virtual std::string GetTypeString () const
virtual std::string GetInstanceString () const
virtual std::string GetTypeIdName () const
virtual std::optional< std::string > GetObjectName () const
virtual std::optional< std::string > GetTemplateInfo () const
virtual std::string GetTypeIdHashCode () const
virtual size_t GetWorkSpaceSize (const BaseArgument *) const
virtual void SetWorkSpacePointer (BaseArgument *p_arg, void *p_workspace, const StreamConfig &=StreamConfig{}) const
virtual ~BaseOperator ()

Member Function Documentation

◆ MakeArgumentPointer()

template<typename ALayout, typename B0Layout, typename B1Layout, typename CLayout, typename ADataType, typename B0DataType, typename B1DataType, typename CDataType, typename AElementwiseOperation, typename B0ElementwiseOperation, typename Acc0ElementwiseOperation, typename B1ElementwiseOperation, typename CElementwiseOperation>
virtual std::unique_ptr< BaseArgument > ck::tensor_operation::device::DeviceBatchedGemmGemm< ALayout, B0Layout, B1Layout, CLayout, ADataType, B0DataType, B1DataType, CDataType, AElementwiseOperation, B0ElementwiseOperation, Acc0ElementwiseOperation, B1ElementwiseOperation, CElementwiseOperation >::MakeArgumentPointer ( const void * p_a,
const void * p_b0,
const void * p_b1,
void * p_c,
ck::index_t M,
ck::index_t N,
ck::index_t K,
ck::index_t O,
ck::index_t Batch,
ck::index_t StrideA,
ck::index_t StrideB0,
ck::index_t StrideB1,
ck::index_t StrideC,
ck::index_t BatchStrideA,
ck::index_t BatchStrideB0,
ck::index_t BatchStrideB1,
ck::index_t BatchStrideC,
AElementwiseOperation a_element_op,
B0ElementwiseOperation b0_element_op,
Acc0ElementwiseOperation acc0_element_op,
B1ElementwiseOperation b1_element_op,
CElementwiseOperation c_element_op )
pure virtual

Implemented in ck::tensor_operation::device::DeviceBatchedGemmGemm_Wmma_CShuffleV3< ALayout, B0layout, B1Layout, CLayout, ADataType, B0DataType, B1DataType, CDataType, AccDataType, CShuffleDataType, AElementwiseOperation, B0ElementwiseOperation, AccElementwiseOperation, B1ElementwiseOperation, CElementwiseOperation, GemmSpec, BlockSize, MPerBlock, LPerBlock, KPerBlock, NPerBlock, LTilePerBlock, AK1, BK1, L1, MPerWmma, LPerWmma, MRepeat, LRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, B0BlockTransferThreadClusterLengths_K0_L_K1, B0BlockTransferThreadClusterArrangeOrder, B0BlockTransferSrcAccessOrder, B0BlockTransferSrcVectorDim, B0BlockTransferSrcScalarPerVector, B0BlockTransferDstScalarPerVector_K1, B0BlockLdsAddExtraL, B1BlockTransferThreadClusterLengths_L0_N_L1, B1BlockTransferThreadClusterArrangeOrder, B1BlockTransferSrcAccessOrder, B1BlockTransferSrcVectorDim, B1BlockTransferSrcScalarPerVector, B1BlockTransferDstScalarPerVector_L1, B1BlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, BlkGemmPipeSched, BlkGemmPipelineVer >, and ck::tensor_operation::device::DeviceBatchedGemmGemm_Xdl_CShuffle< ALayout, BLayout, B1Layout, CLayout, ADataType, BDataType, B1DataType, CDataType, GemmAccDataType, CShuffleDataType, AElementwiseOperation, BElementwiseOperation, AccElementwiseOperation, B1ElementwiseOperation, CElementwiseOperation, GemmSpec, NumGemmKPrefetchStage, BlockSize, MPerBlock, NPerBlock, KPerBlock, Gemm1NPerBlock, Gemm1KPerBlock, AK1, BK1, B1K1, MPerXDL, NPerXDL, MXdlPerWave, NXdlPerWave, Gemm1NXdlPerWave, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BBlockLdsExtraN, B1BlockTransferThreadClusterLengths_BK0_N_BK1, B1BlockTransferThreadClusterArrangeOrder, B1BlockTransferSrcAccessOrder, B1BlockTransferSrcVectorDim, B1BlockTransferSrcScalarPerVector, B1BlockTransferDstScalarPerVector_BK1, B1BlockLdsExtraN, CShuffleMXdlPerWavePerShuffle, CShuffleNXdlPerWavePerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, LoopSched >.

◆ MakeInvokerPointer()

template<typename ALayout, typename B0Layout, typename B1Layout, typename CLayout, typename ADataType, typename B0DataType, typename B1DataType, typename CDataType, typename AElementwiseOperation, typename B0ElementwiseOperation, typename Acc0ElementwiseOperation, typename B1ElementwiseOperation, typename CElementwiseOperation>
virtual std::unique_ptr< BaseInvoker > ck::tensor_operation::device::DeviceBatchedGemmGemm< ALayout, B0Layout, B1Layout, CLayout, ADataType, B0DataType, B1DataType, CDataType, AElementwiseOperation, B0ElementwiseOperation, Acc0ElementwiseOperation, B1ElementwiseOperation, CElementwiseOperation >::MakeInvokerPointer ( )
pure virtual

Implemented in ck::tensor_operation::device::DeviceBatchedGemmGemm_Wmma_CShuffleV3< ALayout, B0layout, B1Layout, CLayout, ADataType, B0DataType, B1DataType, CDataType, AccDataType, CShuffleDataType, AElementwiseOperation, B0ElementwiseOperation, AccElementwiseOperation, B1ElementwiseOperation, CElementwiseOperation, GemmSpec, BlockSize, MPerBlock, LPerBlock, KPerBlock, NPerBlock, LTilePerBlock, AK1, BK1, L1, MPerWmma, LPerWmma, MRepeat, LRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, B0BlockTransferThreadClusterLengths_K0_L_K1, B0BlockTransferThreadClusterArrangeOrder, B0BlockTransferSrcAccessOrder, B0BlockTransferSrcVectorDim, B0BlockTransferSrcScalarPerVector, B0BlockTransferDstScalarPerVector_K1, B0BlockLdsAddExtraL, B1BlockTransferThreadClusterLengths_L0_N_L1, B1BlockTransferThreadClusterArrangeOrder, B1BlockTransferSrcAccessOrder, B1BlockTransferSrcVectorDim, B1BlockTransferSrcScalarPerVector, B1BlockTransferDstScalarPerVector_L1, B1BlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, BlkGemmPipeSched, BlkGemmPipelineVer >, and ck::tensor_operation::device::DeviceBatchedGemmGemm_Xdl_CShuffle< ALayout, BLayout, B1Layout, CLayout, ADataType, BDataType, B1DataType, CDataType, GemmAccDataType, CShuffleDataType, AElementwiseOperation, BElementwiseOperation, AccElementwiseOperation, B1ElementwiseOperation, CElementwiseOperation, GemmSpec, NumGemmKPrefetchStage, BlockSize, MPerBlock, NPerBlock, KPerBlock, Gemm1NPerBlock, Gemm1KPerBlock, AK1, BK1, B1K1, MPerXDL, NPerXDL, MXdlPerWave, NXdlPerWave, Gemm1NXdlPerWave, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BBlockLdsExtraN, B1BlockTransferThreadClusterLengths_BK0_N_BK1, B1BlockTransferThreadClusterArrangeOrder, B1BlockTransferSrcAccessOrder, B1BlockTransferSrcVectorDim, B1BlockTransferSrcScalarPerVector, B1BlockTransferDstScalarPerVector_BK1, B1BlockLdsExtraN, CShuffleMXdlPerWavePerShuffle, CShuffleNXdlPerWavePerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, LoopSched >.


The documentation for this struct was generated from the following file: