BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1< BlockSize, FloatA, FloatB, FloatAcc, AK0MK1BlockDesc, BK0NK1BlockDesc, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack, ComputeTypeA, ComputeTypeB > Struct Template Reference#
Public Types |
Public Member Functions |
Static Public Member Functions |
Public Attributes |
Static Public Attributes |
Protected Types |
Protected Attributes |
Static Protected Attributes |
List of all members
ck::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1< BlockSize, FloatA, FloatB, FloatAcc, AK0MK1BlockDesc, BK0NK1BlockDesc, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack, ComputeTypeA, ComputeTypeB > Struct Template Reference
#include <blockwise_gemm_smfmac_xdlops.hpp>
Public Types | |
| using | ThisThreadBlock = ThisThreadBlock<BlockSize> |
| using | ThisThreadBlock = ThisThreadBlock<BlockSize> |
| using | ElementDataTypeA |
| using | ElementDataTypeB |
Public Member Functions | |
| __host__ __device__ constexpr auto & | GetCThreadBuffer () |
| __host__ __device__ | BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1 () |
| template<typename AThreadBuf, typename IdxBuf, int32_t num_elems> | |
| __device__ void | SetIdxSqueezeA (AThreadBuf &a_thread_buf, IdxBuf &idx_buf) |
| template<typename ABlockBuffer, typename BBlockBuffer, typename CThreadBuffer> | |
| __device__ void | Run (const ABlockBuffer &a_block_buf, const BBlockBuffer &b_block_buf, CThreadBuffer &c_thread_buf) const |
| __host__ __device__ constexpr auto & | GetCThreadBuffer () |
| __host__ __device__ | BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1 () |
| template<typename ABlockBuffer, typename BBlockBuffer, typename CThreadBuffer> | |
| __device__ void | Run (const ABlockBuffer &a_block_buf, const BBlockBuffer &b_block_buf, CThreadBuffer &c_thread_buf) const |
Static Public Member Functions | |
| static __device__ auto | GetWaveIdx () |
| static __device__ auto | CalculateAThreadOriginDataIndex () |
| static __device__ auto | CalculateBThreadOriginDataIndex () |
| template<index_t m0, index_t n0, index_t xdlops_i, index_t blk_i> | |
| static __device__ auto | CalculateCThreadOriginDataIndex (Number< m0 >, Number< n0 >, Number< xdlops_i >, Number< blk_i >) |
| template<index_t m0, index_t n0, index_t xdlops_i, index_t blk_i> | |
| static __device__ auto | CalculateCThreadOriginDataIndex8D (Number< m0 >, Number< n0 >, Number< xdlops_i >, Number< blk_i >) |
| __host__ static __device__ constexpr auto | GetCThreadDescriptor_M0_N0_M1_N1_M2_M3_M4_N2 () |
| __host__ static __device__ constexpr auto | GetCThreadDescriptor_G_M0_N0_M1_N1_M2_M3_M4_N2 () |
| __host__ static __device__ constexpr auto | GetCBlockDescriptor_M0_N0_M1_N1_M2_M3_M4_N2 () |
| __host__ static __device__ constexpr auto | GetCBlockDescriptor_G_M0_N0_M1_N1_M2_M3_M4_N2 () |
| template<typename CGridDesc_M_N> | |
| __host__ static __device__ constexpr auto | MakeCGridDescriptor_M0_N0_M1_N1_M2_M3_M4_N2 (const CGridDesc_M_N &c_grid_desc_m_n) |
| template<typename CGridDesc_G_M_N> | |
| __host__ static __device__ constexpr auto | MakeCGridDescriptor_G_M0_N0_M1_N1_M2_M3_M4_N2 (const CGridDesc_G_M_N &c_grid_desc_g_m_n) |
| __host__ static __device__ constexpr auto | MakeABlockDescriptor_M0_M1_M2_K () |
| __host__ static __device__ constexpr auto | MakeBBlockDescriptor_N0_N1_N2_K () |
| static __device__ auto | GetWaveIdx () |
| static __device__ auto | CalculateAThreadOriginDataIndex () |
| static __device__ auto | CalculateBThreadOriginDataIndex () |
| template<index_t m0, index_t n0, index_t xdlops_i, index_t blk_i> | |
| static __device__ auto | CalculateCThreadOriginDataIndex (Number< m0 >, Number< n0 >, Number< xdlops_i >, Number< blk_i >) |
| template<index_t m0, index_t n0, index_t xdlops_i, index_t blk_i> | |
| static __device__ auto | CalculateCThreadOriginDataIndex8D (Number< m0 >, Number< n0 >, Number< xdlops_i >, Number< blk_i >) |
| __host__ static __device__ constexpr auto | GetCThreadDescriptor_M0_N0_M1_N1_M2_M3_M4_N2 () |
| __host__ static __device__ constexpr auto | GetCThreadDescriptor_G_M0_N0_M1_N1_M2_M3_M4_N2 () |
| __host__ static __device__ constexpr auto | GetCBlockDescriptor_M0_N0_M1_N1_M2_M3_M4_N2 () |
| __host__ static __device__ constexpr auto | GetCBlockDescriptor_G_M0_N0_M1_N1_M2_M3_M4_N2 () |
| template<typename CGridDesc_M_N> | |
| __host__ static __device__ constexpr auto | MakeCGridDescriptor_M0_N0_M1_N1_M2_M3_M4_N2 (const CGridDesc_M_N &c_grid_desc_m_n) |
| template<typename CGridDesc_G_M_N> | |
| __host__ static __device__ constexpr auto | MakeCGridDescriptor_G_M0_N0_M1_N1_M2_M3_M4_N2 (const CGridDesc_G_M_N &c_grid_desc_g_m_n) |
| __host__ static __device__ constexpr auto | MakeABlockDescriptor_M0_M1_M2_K () |
| __host__ static __device__ constexpr auto | MakeBBlockDescriptor_N0_N1_N2_K () |
Public Attributes | |
| StaticBufferTupleOfVector< AddressSpaceEnum::Vgpr, FloatAcc, MRepeat *NRepeat, xdlops_gemm.GetRegSizePerXdlops(), true > | c_thread_buf_ |
Static Public Attributes | |
| static constexpr auto | I0 = Number<0>{} |
| static constexpr auto | I1 = Number<1>{} |
| static constexpr auto | I2 = Number<2>{} |
| static constexpr auto | I3 = Number<3>{} |
| static constexpr index_t | MWaves = MPerBlock / (MRepeat * MPerXDL) |
| static constexpr index_t | NWaves = NPerBlock / (NRepeat * NPerXDL) |
| static constexpr index_t | WaveSize = BlockSize / MWaves / NWaves |
| static constexpr index_t | MPerBlock = AK0MK1BlockDesc{}.GetLength(I1) |
| static constexpr index_t | NPerBlock = BK0NK1BlockDesc{}.GetLength(I1) |
| static constexpr index_t | KPerBlock |
| static constexpr index_t | A_K0 = AK0MK1BlockDesc{}.GetLength(I0) |
| static constexpr index_t | B_K0 = BK0NK1BlockDesc{}.GetLength(I0) |
| static constexpr index_t | A_K1 = AK0MK1BlockDesc{}.GetLength(I2) |
| static constexpr index_t | B_K1 = BK0NK1BlockDesc{}.GetLength(I2) |
| static constexpr auto | xdlops_gemm |
| static constexpr index_t | KPerThread = KPerBlock / xdlops_gemm.K0PerXdlops |
| static constexpr auto | a_block_desc_m0_m1_m2_k = MakeABlockDescriptor_M0_M1_M2_K() |
| static constexpr auto | b_block_desc_n0_n1_n2_k = MakeBBlockDescriptor_N0_N1_N2_K() |
Protected Types | |
| using | AThreadCopy |
| using | BThreadCopy |
| using | AThreadCopy |
| using | BThreadCopy |
Protected Attributes | |
| AThreadCopy | a_thread_copy_ {CalculateAThreadOriginDataIndex()} |
| BThreadCopy | b_thread_copy_ {CalculateBThreadOriginDataIndex()} |
Static Protected Attributes | |
| static constexpr auto | a_thread_desc_ |
| static constexpr auto | b_thread_desc_ |
| static constexpr auto | c_thread_desc_ |
Member Typedef Documentation
◆ AThreadCopy [1/2]
template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, typename ComputeTypeA = FloatA, typename ComputeTypeB = FloatB>
|
protected |
Initial value:
ThreadwiseTensorSliceTransfer_v4<FloatA,
decltype(a_block_desc_m0_m1_m2_k),
decltype(a_thread_desc_),
3,
A_K1,
A_K1>
static constexpr auto a_block_desc_m0_m1_m2_k
Definition blockwise_gemm_dpp.hpp:254
static constexpr auto a_thread_desc_
Definition blockwise_gemm_dpp.hpp:312
static constexpr index_t A_K1
Definition blockwise_gemm_dpp.hpp:52
ADataType ComputeTypeA
Definition blockwise_gemm_mx_pipeline_xdlops_base.hpp:34
Definition utility/sequence.hpp:43
Definition threadwise_tensor_slice_transfer.hpp:1260
◆ AThreadCopy [2/2]
template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, typename ComputeTypeA = FloatA, typename ComputeTypeB = FloatB>
|
protected |
Initial value:
ThreadwiseTensorSliceTransfer_v4<FloatA,
decltype(a_block_desc_m0_m1_m2_k),
decltype(a_thread_desc_),
3,
A_K1,
A_K1>
conditional_t< is_same_v< ComputeTypeA, ck::tf32_t >, float, ComputeTypeA > ElementDataTypeA
Definition blockwise_gemm_xdlops.hpp:52
◆ BThreadCopy [1/2]
template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, typename ComputeTypeA = FloatA, typename ComputeTypeB = FloatB>
|
protected |
Initial value:
ThreadwiseTensorSliceTransfer_v4<FloatB,
decltype(b_block_desc_n0_n1_n2_k),
decltype(b_thread_desc_),
3,
B_K1,
B_K1>
static constexpr index_t B_K1
Definition blockwise_gemm_dpp.hpp:53
static constexpr auto b_thread_desc_
Definition blockwise_gemm_dpp.hpp:316
static constexpr auto b_block_desc_n0_n1_n2_k
Definition blockwise_gemm_dpp.hpp:255
BDataType ComputeTypeB
Definition blockwise_gemm_mx_pipeline_xdlops_base.hpp:35
◆ BThreadCopy [2/2]
template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, typename ComputeTypeA = FloatA, typename ComputeTypeB = FloatB>
|
protected |
Initial value:
ThreadwiseTensorSliceTransfer_v4<FloatB,
decltype(b_block_desc_n0_n1_n2_k),
decltype(b_thread_desc_),
3,
B_K1,
B_K1>
conditional_t< is_same_v< ComputeTypeB, ck::tf32_t >, float, ComputeTypeB > ElementDataTypeB
Definition blockwise_gemm_xdlops.hpp:54
◆ ElementDataTypeA
template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, typename ComputeTypeA = FloatA, typename ComputeTypeB = FloatB>
| using ck::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1< BlockSize, FloatA, FloatB, FloatAcc, AK0MK1BlockDesc, BK0NK1BlockDesc, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack, ComputeTypeA, ComputeTypeB >::ElementDataTypeA |
Initial value:
conditional_t<is_same_v<ComputeTypeA, ck::tf32_t>, float, ComputeTypeA>
typename conditional< predicate, X, Y >::type conditional_t
Definition utility/functional.hpp:115
◆ ElementDataTypeB
template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, typename ComputeTypeA = FloatA, typename ComputeTypeB = FloatB>
| using ck::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1< BlockSize, FloatA, FloatB, FloatAcc, AK0MK1BlockDesc, BK0NK1BlockDesc, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack, ComputeTypeA, ComputeTypeB >::ElementDataTypeB |
Initial value:
conditional_t<is_same_v<ComputeTypeB, ck::tf32_t>, float, ComputeTypeB>
◆ ThisThreadBlock [1/2]
template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, typename ComputeTypeA = FloatA, typename ComputeTypeB = FloatB>
| using ck::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1< BlockSize, FloatA, FloatB, FloatAcc, AK0MK1BlockDesc, BK0NK1BlockDesc, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack, ComputeTypeA, ComputeTypeB >::ThisThreadBlock = ThisThreadBlock<BlockSize> |
◆ ThisThreadBlock [2/2]
template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, typename ComputeTypeA = FloatA, typename ComputeTypeB = FloatB>
| using ck::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1< BlockSize, FloatA, FloatB, FloatAcc, AK0MK1BlockDesc, BK0NK1BlockDesc, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack, ComputeTypeA, ComputeTypeB >::ThisThreadBlock = ThisThreadBlock<BlockSize> |
Constructor & Destructor Documentation
◆ BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1() [1/2]
template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, typename ComputeTypeA = FloatA, typename ComputeTypeB = FloatB>
|
inline |
◆ BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1() [2/2]
template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, typename ComputeTypeA = FloatA, typename ComputeTypeB = FloatB>
|
inline |
Member Function Documentation
◆ CalculateAThreadOriginDataIndex() [1/2]
template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, typename ComputeTypeA = FloatA, typename ComputeTypeB = FloatB>
|
inlinestatic |
◆ CalculateAThreadOriginDataIndex() [2/2]
template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, typename ComputeTypeA = FloatA, typename ComputeTypeB = FloatB>
|
inlinestatic |
◆ CalculateBThreadOriginDataIndex() [1/2]
template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, typename ComputeTypeA = FloatA, typename ComputeTypeB = FloatB>
|
inlinestatic |
◆ CalculateBThreadOriginDataIndex() [2/2]
template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, typename ComputeTypeA = FloatA, typename ComputeTypeB = FloatB>
|
inlinestatic |
◆ CalculateCThreadOriginDataIndex() [1/2]
template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, typename ComputeTypeA = FloatA, typename ComputeTypeB = FloatB>
|
inlinestatic |
◆ CalculateCThreadOriginDataIndex() [2/2]
template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, typename ComputeTypeA = FloatA, typename ComputeTypeB = FloatB>
|
inlinestatic |
◆ CalculateCThreadOriginDataIndex8D() [1/2]
template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, typename ComputeTypeA = FloatA, typename ComputeTypeB = FloatB>
|
inlinestatic |
◆ CalculateCThreadOriginDataIndex8D() [2/2]
template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, typename ComputeTypeA = FloatA, typename ComputeTypeB = FloatB>
|
inlinestatic |
◆ GetCBlockDescriptor_G_M0_N0_M1_N1_M2_M3_M4_N2() [1/2]
template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, typename ComputeTypeA = FloatA, typename ComputeTypeB = FloatB>
|
inlinestaticconstexpr |
◆ GetCBlockDescriptor_G_M0_N0_M1_N1_M2_M3_M4_N2() [2/2]
template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, typename ComputeTypeA = FloatA, typename ComputeTypeB = FloatB>
|
inlinestaticconstexpr |
◆ GetCBlockDescriptor_M0_N0_M1_N1_M2_M3_M4_N2() [1/2]
template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, typename ComputeTypeA = FloatA, typename ComputeTypeB = FloatB>
|
inlinestaticconstexpr |
◆ GetCBlockDescriptor_M0_N0_M1_N1_M2_M3_M4_N2() [2/2]
template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, typename ComputeTypeA = FloatA, typename ComputeTypeB = FloatB>
|
inlinestaticconstexpr |
◆ GetCThreadBuffer() [1/2]
template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, typename ComputeTypeA = FloatA, typename ComputeTypeB = FloatB>
|
inlineconstexpr |
◆ GetCThreadBuffer() [2/2]
template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, typename ComputeTypeA = FloatA, typename ComputeTypeB = FloatB>
|
inlineconstexpr |
◆ GetCThreadDescriptor_G_M0_N0_M1_N1_M2_M3_M4_N2() [1/2]
template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, typename ComputeTypeA = FloatA, typename ComputeTypeB = FloatB>
|
inlinestaticconstexpr |
◆ GetCThreadDescriptor_G_M0_N0_M1_N1_M2_M3_M4_N2() [2/2]
template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, typename ComputeTypeA = FloatA, typename ComputeTypeB = FloatB>
|
inlinestaticconstexpr |
◆ GetCThreadDescriptor_M0_N0_M1_N1_M2_M3_M4_N2() [1/2]
template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, typename ComputeTypeA = FloatA, typename ComputeTypeB = FloatB>
|
inlinestaticconstexpr |
◆ GetCThreadDescriptor_M0_N0_M1_N1_M2_M3_M4_N2() [2/2]
template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, typename ComputeTypeA = FloatA, typename ComputeTypeB = FloatB>
|
inlinestaticconstexpr |
◆ GetWaveIdx() [1/2]
template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, typename ComputeTypeA = FloatA, typename ComputeTypeB = FloatB>
|
inlinestatic |
◆ GetWaveIdx() [2/2]
template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, typename ComputeTypeA = FloatA, typename ComputeTypeB = FloatB>
|
inlinestatic |
◆ MakeABlockDescriptor_M0_M1_M2_K() [1/2]
template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, typename ComputeTypeA = FloatA, typename ComputeTypeB = FloatB>
|
inlinestaticconstexpr |
◆ MakeABlockDescriptor_M0_M1_M2_K() [2/2]
template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, typename ComputeTypeA = FloatA, typename ComputeTypeB = FloatB>
|
inlinestaticconstexpr |
◆ MakeBBlockDescriptor_N0_N1_N2_K() [1/2]
template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, typename ComputeTypeA = FloatA, typename ComputeTypeB = FloatB>
|
inlinestaticconstexpr |
◆ MakeBBlockDescriptor_N0_N1_N2_K() [2/2]
template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, typename ComputeTypeA = FloatA, typename ComputeTypeB = FloatB>
|
inlinestaticconstexpr |
◆ MakeCGridDescriptor_G_M0_N0_M1_N1_M2_M3_M4_N2() [1/2]
template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, typename ComputeTypeA = FloatA, typename ComputeTypeB = FloatB>
template<typename CGridDesc_G_M_N>
|
inlinestaticconstexpr |
◆ MakeCGridDescriptor_G_M0_N0_M1_N1_M2_M3_M4_N2() [2/2]
template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, typename ComputeTypeA = FloatA, typename ComputeTypeB = FloatB>
template<typename CGridDesc_G_M_N>
|
inlinestaticconstexpr |
◆ MakeCGridDescriptor_M0_N0_M1_N1_M2_M3_M4_N2() [1/2]
template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, typename ComputeTypeA = FloatA, typename ComputeTypeB = FloatB>
template<typename CGridDesc_M_N>
|
inlinestaticconstexpr |
◆ MakeCGridDescriptor_M0_N0_M1_N1_M2_M3_M4_N2() [2/2]
template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, typename ComputeTypeA = FloatA, typename ComputeTypeB = FloatB>
template<typename CGridDesc_M_N>
|
inlinestaticconstexpr |
◆ Run() [1/2]
template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, typename ComputeTypeA = FloatA, typename ComputeTypeB = FloatB>
template<typename ABlockBuffer, typename BBlockBuffer, typename CThreadBuffer>
|
inline |
◆ Run() [2/2]
template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, typename ComputeTypeA = FloatA, typename ComputeTypeB = FloatB>
template<typename ABlockBuffer, typename BBlockBuffer, typename CThreadBuffer>
|
inline |
◆ SetIdxSqueezeA()
template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, typename ComputeTypeA = FloatA, typename ComputeTypeB = FloatB>
template<typename AThreadBuf, typename IdxBuf, int32_t num_elems>
|
inline |
Member Data Documentation
◆ a_block_desc_m0_m1_m2_k
template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, typename ComputeTypeA = FloatA, typename ComputeTypeB = FloatB>
|
staticconstexpr |
◆ A_K0
template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, typename ComputeTypeA = FloatA, typename ComputeTypeB = FloatB>
|
staticconstexpr |
◆ A_K1
template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, typename ComputeTypeA = FloatA, typename ComputeTypeB = FloatB>
|
staticconstexpr |
◆ a_thread_copy_
template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, typename ComputeTypeA = FloatA, typename ComputeTypeB = FloatB>
|
protected |
◆ a_thread_desc_
template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, typename ComputeTypeA = FloatA, typename ComputeTypeB = FloatB>
|
staticconstexprprotected |
Initial value:
=
__host__ __device__ constexpr auto make_naive_tensor_descriptor_packed(const Tuple< Lengths... > &lengths)
Definition tensor_descriptor_helper.hpp:101
__host__ __device__ constexpr auto make_tuple(Xs &&... xs)
Definition utility/tuple.hpp:211
static constexpr auto I1
Definition blockwise_gemm_dpp.hpp:35
◆ b_block_desc_n0_n1_n2_k
template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, typename ComputeTypeA = FloatA, typename ComputeTypeB = FloatB>
|
staticconstexpr |
◆ B_K0
template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, typename ComputeTypeA = FloatA, typename ComputeTypeB = FloatB>
|
staticconstexpr |
◆ B_K1
template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, typename ComputeTypeA = FloatA, typename ComputeTypeB = FloatB>
|
staticconstexpr |
◆ b_thread_copy_
template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, typename ComputeTypeA = FloatA, typename ComputeTypeB = FloatB>
|
protected |
◆ b_thread_desc_
template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, typename ComputeTypeA = FloatA, typename ComputeTypeB = FloatB>
|
staticconstexprprotected |
Initial value:
◆ c_thread_buf_
template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, typename ComputeTypeA = FloatA, typename ComputeTypeB = FloatB>
| StaticBufferTupleOfVector< AddressSpaceEnum::Vgpr, FloatAcc, MRepeat *NRepeat, xdlops_gemm.GetRegSizePerXdlops(), true > ck::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1< BlockSize, FloatA, FloatB, FloatAcc, AK0MK1BlockDesc, BK0NK1BlockDesc, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack, ComputeTypeA, ComputeTypeB >::c_thread_buf_ |
◆ c_thread_desc_
template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, typename ComputeTypeA = FloatA, typename ComputeTypeB = FloatB>
|
staticconstexprprotected |
Initial value:
static constexpr auto xdlops_gemm
Definition blockwise_gemm_smfmac_xdlops.hpp:66
◆ I0
template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, typename ComputeTypeA = FloatA, typename ComputeTypeB = FloatB>
|
staticconstexpr |
◆ I1
template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, typename ComputeTypeA = FloatA, typename ComputeTypeB = FloatB>
|
staticconstexpr |
◆ I2
template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, typename ComputeTypeA = FloatA, typename ComputeTypeB = FloatB>
|
staticconstexpr |
◆ I3
template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, typename ComputeTypeA = FloatA, typename ComputeTypeB = FloatB>
|
staticconstexpr |
◆ KPerBlock
template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, typename ComputeTypeA = FloatA, typename ComputeTypeB = FloatB>
|
staticconstexpr |
Initial value:
=
static constexpr auto I2
Definition blockwise_gemm_smfmac_xdlops.hpp:47
static constexpr auto I0
Definition blockwise_gemm_smfmac_xdlops.hpp:45
◆ KPerThread
template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, typename ComputeTypeA = FloatA, typename ComputeTypeB = FloatB>
|
staticconstexpr |
◆ MPerBlock
template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, typename ComputeTypeA = FloatA, typename ComputeTypeB = FloatB>
|
staticconstexpr |
◆ MWaves
template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, typename ComputeTypeA = FloatA, typename ComputeTypeB = FloatB>
|
staticconstexpr |
◆ NPerBlock
template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, typename ComputeTypeA = FloatA, typename ComputeTypeB = FloatB>
|
staticconstexpr |
◆ NWaves
template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, typename ComputeTypeA = FloatA, typename ComputeTypeB = FloatB>
|
staticconstexpr |
◆ WaveSize
template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, typename ComputeTypeA = FloatA, typename ComputeTypeB = FloatB>
|
staticconstexpr |
◆ xdlops_gemm
template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, typename ComputeTypeA = FloatA, typename ComputeTypeB = FloatB>
|
staticconstexpr |
Initial value:
=
Definition smfmac_xdlops_gemm.hpp:215
The documentation for this struct was generated from the following files: