BScale< ScaleSliceSizeN, ScaleSliceSizeK, NWaves, ScaleBlockK, NumberOfBuffers, GridDesc, ThreadCopy, GridBuffer, ThreadStaticBuffer, BScaleThreadDesc > Struct Template Reference

BScale&lt; ScaleSliceSizeN, ScaleSliceSizeK, NWaves, ScaleBlockK, NumberOfBuffers, GridDesc, ThreadCopy, GridBuffer, ThreadStaticBuffer, BScaleThreadDesc &gt; Struct Template Reference#

Composable Kernel: ck::BlockwiseGemmWmmaops_pipeline_base< BlockSize, ADataType, BDataType, ComputeTypeA, ComputeTypeB, AccDataType, AWmmaTileDesc, BWmmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerWmma, NPerWmma, MRepeat, NRepeat, KPack, TransposeC >::BScale< ScaleSliceSizeN, ScaleSliceSizeK, NWaves, ScaleBlockK, NumberOfBuffers, GridDesc, ThreadCopy, GridBuffer, ThreadStaticBuffer, BScaleThreadDesc > Struct Template Reference
ck::BlockwiseGemmWmmaops_pipeline_base< BlockSize, ADataType, BDataType, ComputeTypeA, ComputeTypeB, AccDataType, AWmmaTileDesc, BWmmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerWmma, NPerWmma, MRepeat, NRepeat, KPack, TransposeC >::BScale< ScaleSliceSizeN, ScaleSliceSizeK, NWaves, ScaleBlockK, NumberOfBuffers, GridDesc, ThreadCopy, GridBuffer, ThreadStaticBuffer, BScaleThreadDesc > Struct Template Reference

#include <blockwise_gemm_pipeline_wmmaops_base.hpp>

Public Member Functions

__device__ BScale (GridDesc b_scale_grid_desc_, ThreadCopy b_scale_thread_copy_, GridBuffer b_scale_grid_buf_)
template<index_t NBuffer>
__device__ void GlobalLoad (bool cond)

Public Attributes

ThreadCopy b_scale_thread_copy
GridDesc b_scale_grid_desc
GridBuffer b_scale_grid_buf
StaticallyIndexedArray< ThreadStaticBuffer, Number< NumberOfBuffers >{}> b_scale_thread_bufs

Static Public Attributes

static constexpr index_t num_scale_k_block = BScaleThreadDesc{}.GetLength(Number<1>{})
static constexpr index_t num_scale_krepeat = KRepeat / num_scale_k_block
static constexpr auto b_scale_thread_desc = BScaleThreadDesc{}
static constexpr auto b_scale_thread_copy_step

Constructor & Destructor Documentation

◆ BScale()

template<index_t BlockSize, typename ADataType, typename BDataType, typename ComputeTypeA, typename ComputeTypeB, typename AccDataType, typename AWmmaTileDesc, typename BWmmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerWmma, index_t NPerWmma, index_t MRepeat, index_t NRepeat, index_t KPack, bool TransposeC = false>
template<index_t ScaleSliceSizeN, index_t ScaleSliceSizeK, index_t NWaves, index_t ScaleBlockK, index_t NumberOfBuffers, typename GridDesc, typename ThreadCopy, typename GridBuffer, typename ThreadStaticBuffer, typename BScaleThreadDesc>
__device__ ck::BlockwiseGemmWmmaops_pipeline_base< BlockSize, ADataType, BDataType, ComputeTypeA, ComputeTypeB, AccDataType, AWmmaTileDesc, BWmmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerWmma, NPerWmma, MRepeat, NRepeat, KPack, TransposeC >::BScale< ScaleSliceSizeN, ScaleSliceSizeK, NWaves, ScaleBlockK, NumberOfBuffers, GridDesc, ThreadCopy, GridBuffer, ThreadStaticBuffer, BScaleThreadDesc >::BScale ( GridDesc b_scale_grid_desc_,
ThreadCopy b_scale_thread_copy_,
GridBuffer b_scale_grid_buf_ )
inline

Member Function Documentation

◆ GlobalLoad()

template<index_t BlockSize, typename ADataType, typename BDataType, typename ComputeTypeA, typename ComputeTypeB, typename AccDataType, typename AWmmaTileDesc, typename BWmmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerWmma, index_t NPerWmma, index_t MRepeat, index_t NRepeat, index_t KPack, bool TransposeC = false>
template<index_t ScaleSliceSizeN, index_t ScaleSliceSizeK, index_t NWaves, index_t ScaleBlockK, index_t NumberOfBuffers, typename GridDesc, typename ThreadCopy, typename GridBuffer, typename ThreadStaticBuffer, typename BScaleThreadDesc>
template<index_t NBuffer>
__device__ void ck::BlockwiseGemmWmmaops_pipeline_base< BlockSize, ADataType, BDataType, ComputeTypeA, ComputeTypeB, AccDataType, AWmmaTileDesc, BWmmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerWmma, NPerWmma, MRepeat, NRepeat, KPack, TransposeC >::BScale< ScaleSliceSizeN, ScaleSliceSizeK, NWaves, ScaleBlockK, NumberOfBuffers, GridDesc, ThreadCopy, GridBuffer, ThreadStaticBuffer, BScaleThreadDesc >::GlobalLoad ( bool cond)
inline

Member Data Documentation

◆ b_scale_grid_buf

template<index_t BlockSize, typename ADataType, typename BDataType, typename ComputeTypeA, typename ComputeTypeB, typename AccDataType, typename AWmmaTileDesc, typename BWmmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerWmma, index_t NPerWmma, index_t MRepeat, index_t NRepeat, index_t KPack, bool TransposeC = false>
template<index_t ScaleSliceSizeN, index_t ScaleSliceSizeK, index_t NWaves, index_t ScaleBlockK, index_t NumberOfBuffers, typename GridDesc, typename ThreadCopy, typename GridBuffer, typename ThreadStaticBuffer, typename BScaleThreadDesc>
GridBuffer ck::BlockwiseGemmWmmaops_pipeline_base< BlockSize, ADataType, BDataType, ComputeTypeA, ComputeTypeB, AccDataType, AWmmaTileDesc, BWmmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerWmma, NPerWmma, MRepeat, NRepeat, KPack, TransposeC >::BScale< ScaleSliceSizeN, ScaleSliceSizeK, NWaves, ScaleBlockK, NumberOfBuffers, GridDesc, ThreadCopy, GridBuffer, ThreadStaticBuffer, BScaleThreadDesc >::b_scale_grid_buf

◆ b_scale_grid_desc

template<index_t BlockSize, typename ADataType, typename BDataType, typename ComputeTypeA, typename ComputeTypeB, typename AccDataType, typename AWmmaTileDesc, typename BWmmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerWmma, index_t NPerWmma, index_t MRepeat, index_t NRepeat, index_t KPack, bool TransposeC = false>
template<index_t ScaleSliceSizeN, index_t ScaleSliceSizeK, index_t NWaves, index_t ScaleBlockK, index_t NumberOfBuffers, typename GridDesc, typename ThreadCopy, typename GridBuffer, typename ThreadStaticBuffer, typename BScaleThreadDesc>
GridDesc ck::BlockwiseGemmWmmaops_pipeline_base< BlockSize, ADataType, BDataType, ComputeTypeA, ComputeTypeB, AccDataType, AWmmaTileDesc, BWmmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerWmma, NPerWmma, MRepeat, NRepeat, KPack, TransposeC >::BScale< ScaleSliceSizeN, ScaleSliceSizeK, NWaves, ScaleBlockK, NumberOfBuffers, GridDesc, ThreadCopy, GridBuffer, ThreadStaticBuffer, BScaleThreadDesc >::b_scale_grid_desc

◆ b_scale_thread_bufs

template<index_t BlockSize, typename ADataType, typename BDataType, typename ComputeTypeA, typename ComputeTypeB, typename AccDataType, typename AWmmaTileDesc, typename BWmmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerWmma, index_t NPerWmma, index_t MRepeat, index_t NRepeat, index_t KPack, bool TransposeC = false>
template<index_t ScaleSliceSizeN, index_t ScaleSliceSizeK, index_t NWaves, index_t ScaleBlockK, index_t NumberOfBuffers, typename GridDesc, typename ThreadCopy, typename GridBuffer, typename ThreadStaticBuffer, typename BScaleThreadDesc>
StaticallyIndexedArray<ThreadStaticBuffer, Number<NumberOfBuffers>{}> ck::BlockwiseGemmWmmaops_pipeline_base< BlockSize, ADataType, BDataType, ComputeTypeA, ComputeTypeB, AccDataType, AWmmaTileDesc, BWmmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerWmma, NPerWmma, MRepeat, NRepeat, KPack, TransposeC >::BScale< ScaleSliceSizeN, ScaleSliceSizeK, NWaves, ScaleBlockK, NumberOfBuffers, GridDesc, ThreadCopy, GridBuffer, ThreadStaticBuffer, BScaleThreadDesc >::b_scale_thread_bufs

◆ b_scale_thread_copy

template<index_t BlockSize, typename ADataType, typename BDataType, typename ComputeTypeA, typename ComputeTypeB, typename AccDataType, typename AWmmaTileDesc, typename BWmmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerWmma, index_t NPerWmma, index_t MRepeat, index_t NRepeat, index_t KPack, bool TransposeC = false>
template<index_t ScaleSliceSizeN, index_t ScaleSliceSizeK, index_t NWaves, index_t ScaleBlockK, index_t NumberOfBuffers, typename GridDesc, typename ThreadCopy, typename GridBuffer, typename ThreadStaticBuffer, typename BScaleThreadDesc>
ThreadCopy ck::BlockwiseGemmWmmaops_pipeline_base< BlockSize, ADataType, BDataType, ComputeTypeA, ComputeTypeB, AccDataType, AWmmaTileDesc, BWmmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerWmma, NPerWmma, MRepeat, NRepeat, KPack, TransposeC >::BScale< ScaleSliceSizeN, ScaleSliceSizeK, NWaves, ScaleBlockK, NumberOfBuffers, GridDesc, ThreadCopy, GridBuffer, ThreadStaticBuffer, BScaleThreadDesc >::b_scale_thread_copy

◆ b_scale_thread_copy_step

template<index_t BlockSize, typename ADataType, typename BDataType, typename ComputeTypeA, typename ComputeTypeB, typename AccDataType, typename AWmmaTileDesc, typename BWmmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerWmma, index_t NPerWmma, index_t MRepeat, index_t NRepeat, index_t KPack, bool TransposeC = false>
template<index_t ScaleSliceSizeN, index_t ScaleSliceSizeK, index_t NWaves, index_t ScaleBlockK, index_t NumberOfBuffers, typename GridDesc, typename ThreadCopy, typename GridBuffer, typename ThreadStaticBuffer, typename BScaleThreadDesc>
auto ck::BlockwiseGemmWmmaops_pipeline_base< BlockSize, ADataType, BDataType, ComputeTypeA, ComputeTypeB, AccDataType, AWmmaTileDesc, BWmmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerWmma, NPerWmma, MRepeat, NRepeat, KPack, TransposeC >::BScale< ScaleSliceSizeN, ScaleSliceSizeK, NWaves, ScaleBlockK, NumberOfBuffers, GridDesc, ThreadCopy, GridBuffer, ThreadStaticBuffer, BScaleThreadDesc >::b_scale_thread_copy_step
staticconstexpr
Initial value:
=
make_multi_index(-NPerBlock, 0),
make_multi_index(-NPerBlock, (KPerBlock + ScaleBlockK - 1) / ScaleBlockK))
__host__ __device__ constexpr auto make_multi_index(Xs &&... xs)
Definition array_multi_index.hpp:15
__host__ __device__ constexpr auto make_tuple(Xs &&... xs)
Definition utility/tuple.hpp:211
static constexpr index_t NWaves
Definition blockwise_gemm_pipeline_wmmaops_base.hpp:47

◆ b_scale_thread_desc

template<index_t BlockSize, typename ADataType, typename BDataType, typename ComputeTypeA, typename ComputeTypeB, typename AccDataType, typename AWmmaTileDesc, typename BWmmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerWmma, index_t NPerWmma, index_t MRepeat, index_t NRepeat, index_t KPack, bool TransposeC = false>
template<index_t ScaleSliceSizeN, index_t ScaleSliceSizeK, index_t NWaves, index_t ScaleBlockK, index_t NumberOfBuffers, typename GridDesc, typename ThreadCopy, typename GridBuffer, typename ThreadStaticBuffer, typename BScaleThreadDesc>
auto ck::BlockwiseGemmWmmaops_pipeline_base< BlockSize, ADataType, BDataType, ComputeTypeA, ComputeTypeB, AccDataType, AWmmaTileDesc, BWmmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerWmma, NPerWmma, MRepeat, NRepeat, KPack, TransposeC >::BScale< ScaleSliceSizeN, ScaleSliceSizeK, NWaves, ScaleBlockK, NumberOfBuffers, GridDesc, ThreadCopy, GridBuffer, ThreadStaticBuffer, BScaleThreadDesc >::b_scale_thread_desc = BScaleThreadDesc{}
staticconstexpr

◆ num_scale_k_block

template<index_t BlockSize, typename ADataType, typename BDataType, typename ComputeTypeA, typename ComputeTypeB, typename AccDataType, typename AWmmaTileDesc, typename BWmmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerWmma, index_t NPerWmma, index_t MRepeat, index_t NRepeat, index_t KPack, bool TransposeC = false>
template<index_t ScaleSliceSizeN, index_t ScaleSliceSizeK, index_t NWaves, index_t ScaleBlockK, index_t NumberOfBuffers, typename GridDesc, typename ThreadCopy, typename GridBuffer, typename ThreadStaticBuffer, typename BScaleThreadDesc>
index_t ck::BlockwiseGemmWmmaops_pipeline_base< BlockSize, ADataType, BDataType, ComputeTypeA, ComputeTypeB, AccDataType, AWmmaTileDesc, BWmmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerWmma, NPerWmma, MRepeat, NRepeat, KPack, TransposeC >::BScale< ScaleSliceSizeN, ScaleSliceSizeK, NWaves, ScaleBlockK, NumberOfBuffers, GridDesc, ThreadCopy, GridBuffer, ThreadStaticBuffer, BScaleThreadDesc >::num_scale_k_block = BScaleThreadDesc{}.GetLength(Number<1>{})
staticconstexpr

◆ num_scale_krepeat

template<index_t BlockSize, typename ADataType, typename BDataType, typename ComputeTypeA, typename ComputeTypeB, typename AccDataType, typename AWmmaTileDesc, typename BWmmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerWmma, index_t NPerWmma, index_t MRepeat, index_t NRepeat, index_t KPack, bool TransposeC = false>
template<index_t ScaleSliceSizeN, index_t ScaleSliceSizeK, index_t NWaves, index_t ScaleBlockK, index_t NumberOfBuffers, typename GridDesc, typename ThreadCopy, typename GridBuffer, typename ThreadStaticBuffer, typename BScaleThreadDesc>
index_t ck::BlockwiseGemmWmmaops_pipeline_base< BlockSize, ADataType, BDataType, ComputeTypeA, ComputeTypeB, AccDataType, AWmmaTileDesc, BWmmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerWmma, NPerWmma, MRepeat, NRepeat, KPack, TransposeC >::BScale< ScaleSliceSizeN, ScaleSliceSizeK, NWaves, ScaleBlockK, NumberOfBuffers, GridDesc, ThreadCopy, GridBuffer, ThreadStaticBuffer, BScaleThreadDesc >::num_scale_krepeat = KRepeat / num_scale_k_block
staticconstexpr

The documentation for this struct was generated from the following file: