SparseXdlopsGemm< base_type, MPerXdlops, NPerXdlops, KPack, additional_type > Struct Template Reference

SparseXdlopsGemm&lt; base_type, MPerXdlops, NPerXdlops, KPack, additional_type &gt; Struct Template Reference#

Composable Kernel: ck::SparseXdlopsGemm< base_type, MPerXdlops, NPerXdlops, KPack, additional_type > Struct Template Reference
ck::SparseXdlopsGemm< base_type, MPerXdlops, NPerXdlops, KPack, additional_type > Struct Template Reference

#include <smfmac_xdlops_gemm.hpp>

Public Types

using CIndex = MultiIndex<2>
using CIndex4D = MultiIndex<4>

Public Member Functions

__host__ __device__ constexpr SparseXdlopsGemm ()
template<class FloatA, class FloatB, class Idx, class FloatC>
__device__ void Run (const FloatA &p_a_wave, const FloatB &p_b_wave, const Idx &idx, FloatC &p_c_thread) const

Static Public Member Functions

static __device__ constexpr index_t GetNumBlks ()
static __device__ constexpr index_t GetNumXdlops ()
template<typename CDesc_M0_N0_M1_N1_M2_N2>
__host__ static __device__ constexpr auto MakeCDescriptor_M0_N0_M1_N1_M2_M3_M4_N2 (const CDesc_M0_N0_M1_N1_M2_N2 &c_desc_m0_n0_m1_n1_m2_n2)
template<typename CDesc_G_M0_N0_M1_N1_M2_N2>
__host__ static __device__ constexpr auto MakeCDescriptor_G_M0_N0_M1_N1_M2_M3_M4_N2 (const CDesc_G_M0_N0_M1_N1_M2_N2 &c_desc_g_m0_n0_m1_n1_m2_n2)
static __device__ constexpr index_t GetRegSizePerXdlops ()
static __device__ constexpr index_t GetWaveSize ()
static __device__ auto GetLaneId ()
static __device__ auto GetBlkIdx ()
__host__ static __device__ auto CalculateAThreadOriginDataIndex ()
__host__ static __device__ auto CalculateBThreadOriginDataIndex ()
static __device__ CIndex GetBeginOfThreadBlk (index_t xdlops_i, index_t blk_i)
static __device__ CIndex4D GetBeginOfThreadBlk4D (index_t, index_t)
__host__ static __device__ constexpr auto GetCM0M1M2NThreadBlkLengths ()

Static Public Attributes

static constexpr auto I0 = Number<0>{}
static constexpr auto I1 = Number<1>{}
static constexpr auto I2 = Number<2>{}
static constexpr auto I3 = Number<3>{}
static constexpr auto I4 = Number<4>{}
static constexpr auto I5 = Number<5>{}
static constexpr auto smfmac
static constexpr auto smfmac_instr = smfmac.selected_smfmac
static constexpr auto KPerXdlops = smfmac.GetKPerXdlops()
static constexpr auto K1PerXdlops = smfmac.GetK1PerXdlops()
static constexpr auto K0PerXdlops = KPerXdlops / K1PerXdlops

Member Typedef Documentation

◆ CIndex

template<typename base_type, index_t MPerXdlops, index_t NPerXdlops, index_t KPack, typename additional_type = base_type>
using ck::SparseXdlopsGemm< base_type, MPerXdlops, NPerXdlops, KPack, additional_type >::CIndex = MultiIndex<2>

◆ CIndex4D

template<typename base_type, index_t MPerXdlops, index_t NPerXdlops, index_t KPack, typename additional_type = base_type>
using ck::SparseXdlopsGemm< base_type, MPerXdlops, NPerXdlops, KPack, additional_type >::CIndex4D = MultiIndex<4>

Constructor & Destructor Documentation

◆ SparseXdlopsGemm()

template<typename base_type, index_t MPerXdlops, index_t NPerXdlops, index_t KPack, typename additional_type = base_type>
__host__ __device__ constexpr ck::SparseXdlopsGemm< base_type, MPerXdlops, NPerXdlops, KPack, additional_type >::SparseXdlopsGemm ( )
inlineconstexpr

Member Function Documentation

◆ CalculateAThreadOriginDataIndex()

template<typename base_type, index_t MPerXdlops, index_t NPerXdlops, index_t KPack, typename additional_type = base_type>
__host__ static __device__ auto ck::SparseXdlopsGemm< base_type, MPerXdlops, NPerXdlops, KPack, additional_type >::CalculateAThreadOriginDataIndex ( )
inlinestatic

◆ CalculateBThreadOriginDataIndex()

template<typename base_type, index_t MPerXdlops, index_t NPerXdlops, index_t KPack, typename additional_type = base_type>
__host__ static __device__ auto ck::SparseXdlopsGemm< base_type, MPerXdlops, NPerXdlops, KPack, additional_type >::CalculateBThreadOriginDataIndex ( )
inlinestatic

◆ GetBeginOfThreadBlk()

template<typename base_type, index_t MPerXdlops, index_t NPerXdlops, index_t KPack, typename additional_type = base_type>
__device__ CIndex ck::SparseXdlopsGemm< base_type, MPerXdlops, NPerXdlops, KPack, additional_type >::GetBeginOfThreadBlk ( index_t xdlops_i,
index_t blk_i )
inlinestatic

◆ GetBeginOfThreadBlk4D()

template<typename base_type, index_t MPerXdlops, index_t NPerXdlops, index_t KPack, typename additional_type = base_type>
__device__ CIndex4D ck::SparseXdlopsGemm< base_type, MPerXdlops, NPerXdlops, KPack, additional_type >::GetBeginOfThreadBlk4D ( index_t ,
index_t  )
inlinestatic

◆ GetBlkIdx()

template<typename base_type, index_t MPerXdlops, index_t NPerXdlops, index_t KPack, typename additional_type = base_type>
__device__ auto ck::SparseXdlopsGemm< base_type, MPerXdlops, NPerXdlops, KPack, additional_type >::GetBlkIdx ( )
inlinestatic

◆ GetCM0M1M2NThreadBlkLengths()

template<typename base_type, index_t MPerXdlops, index_t NPerXdlops, index_t KPack, typename additional_type = base_type>
__host__ static __device__ constexpr auto ck::SparseXdlopsGemm< base_type, MPerXdlops, NPerXdlops, KPack, additional_type >::GetCM0M1M2NThreadBlkLengths ( )
inlinestaticconstexpr

◆ GetLaneId()

template<typename base_type, index_t MPerXdlops, index_t NPerXdlops, index_t KPack, typename additional_type = base_type>
__device__ auto ck::SparseXdlopsGemm< base_type, MPerXdlops, NPerXdlops, KPack, additional_type >::GetLaneId ( )
inlinestatic

◆ GetNumBlks()

template<typename base_type, index_t MPerXdlops, index_t NPerXdlops, index_t KPack, typename additional_type = base_type>
__device__ constexpr index_t ck::SparseXdlopsGemm< base_type, MPerXdlops, NPerXdlops, KPack, additional_type >::GetNumBlks ( )
inlinestaticconstexpr

◆ GetNumXdlops()

template<typename base_type, index_t MPerXdlops, index_t NPerXdlops, index_t KPack, typename additional_type = base_type>
__device__ constexpr index_t ck::SparseXdlopsGemm< base_type, MPerXdlops, NPerXdlops, KPack, additional_type >::GetNumXdlops ( )
inlinestaticconstexpr

◆ GetRegSizePerXdlops()

template<typename base_type, index_t MPerXdlops, index_t NPerXdlops, index_t KPack, typename additional_type = base_type>
__device__ constexpr index_t ck::SparseXdlopsGemm< base_type, MPerXdlops, NPerXdlops, KPack, additional_type >::GetRegSizePerXdlops ( )
inlinestaticconstexpr

◆ GetWaveSize()

template<typename base_type, index_t MPerXdlops, index_t NPerXdlops, index_t KPack, typename additional_type = base_type>
__device__ constexpr index_t ck::SparseXdlopsGemm< base_type, MPerXdlops, NPerXdlops, KPack, additional_type >::GetWaveSize ( )
inlinestaticconstexpr

◆ MakeCDescriptor_G_M0_N0_M1_N1_M2_M3_M4_N2()

template<typename base_type, index_t MPerXdlops, index_t NPerXdlops, index_t KPack, typename additional_type = base_type>
template<typename CDesc_G_M0_N0_M1_N1_M2_N2>
__host__ static __device__ constexpr auto ck::SparseXdlopsGemm< base_type, MPerXdlops, NPerXdlops, KPack, additional_type >::MakeCDescriptor_G_M0_N0_M1_N1_M2_M3_M4_N2 ( const CDesc_G_M0_N0_M1_N1_M2_N2 & c_desc_g_m0_n0_m1_n1_m2_n2)
inlinestaticconstexpr

◆ MakeCDescriptor_M0_N0_M1_N1_M2_M3_M4_N2()

template<typename base_type, index_t MPerXdlops, index_t NPerXdlops, index_t KPack, typename additional_type = base_type>
template<typename CDesc_M0_N0_M1_N1_M2_N2>
__host__ static __device__ constexpr auto ck::SparseXdlopsGemm< base_type, MPerXdlops, NPerXdlops, KPack, additional_type >::MakeCDescriptor_M0_N0_M1_N1_M2_M3_M4_N2 ( const CDesc_M0_N0_M1_N1_M2_N2 & c_desc_m0_n0_m1_n1_m2_n2)
inlinestaticconstexpr

◆ Run()

template<typename base_type, index_t MPerXdlops, index_t NPerXdlops, index_t KPack, typename additional_type = base_type>
template<class FloatA, class FloatB, class Idx, class FloatC>
__device__ void ck::SparseXdlopsGemm< base_type, MPerXdlops, NPerXdlops, KPack, additional_type >::Run ( const FloatA & p_a_wave,
const FloatB & p_b_wave,
const Idx & idx,
FloatC & p_c_thread ) const
inline

Member Data Documentation

◆ I0

template<typename base_type, index_t MPerXdlops, index_t NPerXdlops, index_t KPack, typename additional_type = base_type>
auto ck::SparseXdlopsGemm< base_type, MPerXdlops, NPerXdlops, KPack, additional_type >::I0 = Number<0>{}
staticconstexpr

◆ I1

template<typename base_type, index_t MPerXdlops, index_t NPerXdlops, index_t KPack, typename additional_type = base_type>
auto ck::SparseXdlopsGemm< base_type, MPerXdlops, NPerXdlops, KPack, additional_type >::I1 = Number<1>{}
staticconstexpr

◆ I2

template<typename base_type, index_t MPerXdlops, index_t NPerXdlops, index_t KPack, typename additional_type = base_type>
auto ck::SparseXdlopsGemm< base_type, MPerXdlops, NPerXdlops, KPack, additional_type >::I2 = Number<2>{}
staticconstexpr

◆ I3

template<typename base_type, index_t MPerXdlops, index_t NPerXdlops, index_t KPack, typename additional_type = base_type>
auto ck::SparseXdlopsGemm< base_type, MPerXdlops, NPerXdlops, KPack, additional_type >::I3 = Number<3>{}
staticconstexpr

◆ I4

template<typename base_type, index_t MPerXdlops, index_t NPerXdlops, index_t KPack, typename additional_type = base_type>
auto ck::SparseXdlopsGemm< base_type, MPerXdlops, NPerXdlops, KPack, additional_type >::I4 = Number<4>{}
staticconstexpr

◆ I5

template<typename base_type, index_t MPerXdlops, index_t NPerXdlops, index_t KPack, typename additional_type = base_type>
auto ck::SparseXdlopsGemm< base_type, MPerXdlops, NPerXdlops, KPack, additional_type >::I5 = Number<5>{}
staticconstexpr

◆ K0PerXdlops

template<typename base_type, index_t MPerXdlops, index_t NPerXdlops, index_t KPack, typename additional_type = base_type>
auto ck::SparseXdlopsGemm< base_type, MPerXdlops, NPerXdlops, KPack, additional_type >::K0PerXdlops = KPerXdlops / K1PerXdlops
staticconstexpr

◆ K1PerXdlops

template<typename base_type, index_t MPerXdlops, index_t NPerXdlops, index_t KPack, typename additional_type = base_type>
auto ck::SparseXdlopsGemm< base_type, MPerXdlops, NPerXdlops, KPack, additional_type >::K1PerXdlops = smfmac.GetK1PerXdlops()
staticconstexpr

◆ KPerXdlops

template<typename base_type, index_t MPerXdlops, index_t NPerXdlops, index_t KPack, typename additional_type = base_type>
auto ck::SparseXdlopsGemm< base_type, MPerXdlops, NPerXdlops, KPack, additional_type >::KPerXdlops = smfmac.GetKPerXdlops()
staticconstexpr

◆ smfmac

template<typename base_type, index_t MPerXdlops, index_t NPerXdlops, index_t KPack, typename additional_type = base_type>
auto ck::SparseXdlopsGemm< base_type, MPerXdlops, NPerXdlops, KPack, additional_type >::smfmac
staticconstexpr
Initial value:

◆ smfmac_instr

template<typename base_type, index_t MPerXdlops, index_t NPerXdlops, index_t KPack, typename additional_type = base_type>
auto ck::SparseXdlopsGemm< base_type, MPerXdlops, NPerXdlops, KPack, additional_type >::smfmac_instr = smfmac.selected_smfmac
staticconstexpr

The documentation for this struct was generated from the following file: