epilogue_cshuffle_v3_wmma.hpp Source File#
epilogue_cshuffle_v3_wmma.hpp
Go to the documentation of this file.
Definition ck.hpp:268
__host__ __device__ constexpr auto generate_tuple(F &&f, Number< N >)
Definition tuple_helper.hpp:21
__host__ __device__ constexpr auto make_dynamic_buffer(T *p, ElementSpaceSize element_space_size)
Definition dynamic_buffer.hpp:472
__host__ __device__ constexpr auto generate_tie(F &&f, Number< N >)
Definition tuple_helper.hpp:34
__host__ __device__ constexpr auto concat_tuple_of_reference(const Tuple< X &... > &tx, const Tuple< Y &... > &ty)
Definition tuple_helper.hpp:42
Definition epilogue_cshuffle_v3_wmma_base.hpp:29
static constexpr index_t NumDTensor
Definition epilogue_cshuffle_v3_wmma_base.hpp:38
static __device__ auto GetLDSToVmemEpilogueDescriptor(CDsDescRefs &c_ds_desc_refs, EGridDesc &e_grid_desc_mblock_mperblock_nblock_nperblock, CDEElementwiseOperation &cde_element_op, const index_t &block_m_id, const index_t &block_n_id)
Definition epilogue_cshuffle_v3_wmma_base.hpp:204
SpaceFillingCurve< Sequence< MRepeat, 1, 1, NRepeat, 1, 1, BlockwiseGemmPipe::MAccVgprs >, Sequence< 0, 1, 2, 3, 4, 5, 6 >, Sequence< CShuffleMRepeatPerShuffle, 1, 1, CShuffleNRepeatPerShuffle, 1, 1, BlockwiseGemmPipe::MAccVgprs > > SpaceFillingCurveVgpr
Definition epilogue_cshuffle_v3_wmma_base.hpp:42
static __device__ constexpr auto GetCShuffleBlockDescriptor_MShRepeat_MPerShRepeat_NShRepeat_NPerShRepeat()
Definition epilogue_cshuffle_v3_wmma_base.hpp:63
static constexpr auto I1
Definition epilogue_cshuffle_v3_wmma_base.hpp:31
static __device__ auto GetVgprToLDSEpilogueDescriptor()
Definition epilogue_cshuffle_v3_wmma_base.hpp:118
SpaceFillingCurve< Sequence< 1, MPerBlock, 1, NPerBlock >, Sequence< 0, 2, 1, 3 >, Sequence< 1, CShuffleMRepeatPerShuffle *BlockwiseGemmPipe::MWaves *MPerWmma, 1, CShuffleNRepeatPerShuffle *BlockwiseGemmPipe::NWaves *NPerWmma > > SpaceFillingCurveVmem
Definition epilogue_cshuffle_v3_wmma_base.hpp:53
static __device__ constexpr auto GetCShuffleLDSDescriptor()
Definition epilogue_cshuffle_v3_wmma_base.hpp:78
Definition epilogue_cshuffle_v3_wmma.hpp:45
static __device__ constexpr auto GetCShuffleBlockDescriptor_MShRepeat_MPerShRepeat_NShRepeat_NPerShRepeat()
Definition epilogue_cshuffle_v3_wmma_base.hpp:63
static __device__ auto GetVgprToLDSEpilogueDescriptor()
Definition epilogue_cshuffle_v3_wmma_base.hpp:118
EpilogueCShuffleBase< DsDataType, EDataType, AccDataType, CShuffleDataType, MPerBlock, NPerBlock, MPerWmma, NPerWmma, MRepeat, NRepeat, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CDEShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDEShuffleBlockTransferScalarPerVectors, CDEElementwiseOperation, ThisThreadBlock, BlockwiseGemmPipe > Base
Definition epilogue_cshuffle_v3_wmma.hpp:46
static __device__ constexpr auto GetCShuffleLDSDescriptor()
Definition epilogue_cshuffle_v3_wmma_base.hpp:78
static __device__ void Run(CThreadBuf &c_thread_buf, DsGridPointer p_ds_grid, EDataType *p_e_grid, void *p_shared, const DsGridDesc_MBlock_MPerBlock_NBlock_NPerBlock &ds_grid_desc_mblock_mperblock_nblock_nperblock, const EGridDesc_MBlock_MPerBlock_NBlock_NPerBlock &e_grid_desc_mblock_mperblock_nblock_nperblock, CDEElementwiseOperation &cde_element_op, const index_t &block_m_id, const index_t &block_n_id)
Definition epilogue_cshuffle_v3_wmma.hpp:76
Definition thread_group.hpp:12
Definition functional2.hpp:33