GemmPipelineAgBgCrCompAsync< Problem, Policy > Struct Template Reference

GemmPipelineAgBgCrCompAsync&lt; Problem, Policy &gt; Struct Template Reference#

Composable Kernel: ck_tile::GemmPipelineAgBgCrCompAsync< Problem, Policy > Struct Template Reference
ck_tile::GemmPipelineAgBgCrCompAsync< Problem, Policy > Struct Template Reference

Compute optimized pipeline version async; which is based on V4. More...

#include <gemm_pipeline_ag_bg_cr_comp_async.hpp>

Inheritance diagram for ck_tile::GemmPipelineAgBgCrCompAsync< Problem, Policy >:
ck_tile::BaseGemmPipelineAgBgCrCompAsync< Problem >

Classes

struct  PipelineImpl
struct  PipelineImpl< GemmPipelineScheduler::Intrawave >

Public Types

using Base = BaseGemmPipelineAgBgCrCompAsync<Problem>
using PipelineImplBase = GemmPipelineAgBgCrImplBase<Problem, Policy>
using AsDataType = remove_cvref_t<typename Problem::AsDataTypeTuple>
using BsDataType = remove_cvref_t<typename Problem::BsDataTypeTuple>
using CDataType = remove_cvref_t<typename Problem::CDataType>
using BlockGemmShape = remove_cvref_t<typename Problem::BlockGemmShape>
using AsLayout = remove_cvref_t<typename Problem::AsLayoutTuple>
using BsLayout = remove_cvref_t<typename Problem::BsLayoutTuple>
using CLayout = remove_cvref_t<typename Problem::CLayout>
using AElementWise = remove_cvref_t<typename Problem::AElementWise>
using BElementWise = remove_cvref_t<typename Problem::BElementWise>
using ALayout = remove_cvref_t<std::tuple_element_t<0, AsLayout>>
using BLayout = remove_cvref_t<std::tuple_element_t<0, BsLayout>>
using ADataType = remove_cvref_t<std::tuple_element_t<0, AsDataType>>
using BDataType = remove_cvref_t<std::tuple_element_t<0, BsDataType>>
using BlockGemm = remove_cvref_t<decltype(Policy::template GetBlockGemm<Problem>())>
using I0 = number<0>
using I1 = number<1>
using I2 = number<2>

Public Member Functions

template<typename ADramBlockWindowTmp, typename BDramBlockWindowTmp, typename AElementFunction, typename BElementFunction>
CK_TILE_DEVICE auto operator() (const ADramBlockWindowTmp &a_dram_block_window_tmp, const AElementFunction &a_element_func, const BDramBlockWindowTmp &b_dram_block_window_tmp, const BElementFunction &b_element_func, index_t num_loop, void *p_smem_0, void *p_smem_1) const
template<typename ADramBlockWindowTmp, typename BDramBlockWindowTmp>
CK_TILE_DEVICE auto operator() (const ADramBlockWindowTmp &a_dram_block_window_tmp, const BDramBlockWindowTmp &b_dram_block_window_tmp, const index_t num_loop, void *__restrict__ p_smem_0, void *__restrict__ p_smem_1) const

Static Public Member Functions

template<bool IsWave32Host = false>
static constexpr index_t GetVectorSizeA ()
template<bool IsWave32Host = false>
static constexpr index_t GetVectorSizeB ()
static constexpr index_t GetVectorSizeC ()
static constexpr index_t GetSmemPackA ()
static constexpr index_t GetSmemPackB ()
static CK_TILE_HOST_DEVICE constexpr index_t GetSmemSize ()
static CK_TILE_HOST_DEVICE constexpr auto IsTransposeC ()
Static Public Member Functions inherited from ck_tile::BaseGemmPipelineAgBgCrCompAsync< Problem >
static CK_TILE_HOST constexpr bool BlockHasHotloop (index_t num_loop)
static CK_TILE_HOST constexpr TailNumber GetBlockLoopTailNum (index_t num_loop)
template<typename RunFunction>
static CK_TILE_HOST_DEVICE auto TailHandler (const RunFunction &run_func, bool has_hot_loop, TailNumber tail_number)

Static Public Attributes

static constexpr index_t APackedSize
static constexpr index_t BPackedSize
static constexpr index_t BlockSize = Problem::kBlockSize
static constexpr index_t MPerBlock = BlockGemmShape::kM
static constexpr index_t NPerBlock = BlockGemmShape::kN
static constexpr index_t KPerBlock = BlockGemmShape::kK
static constexpr index_t NumWaveGroups = Problem::NumWaveGroups
static constexpr index_t Preshuffle = Problem::Preshuffle
static constexpr bool kPadM = Problem::kPadM
static constexpr bool kPadN = Problem::kPadN
static constexpr bool kPadK = Problem::kPadK
static constexpr bool DoubleSmemBuffer = Problem::DoubleSmemBuffer
static constexpr bool HasHotLoop = Problem::HasHotLoop
static constexpr auto TailNum = Problem::TailNum
static constexpr auto Scheduler = Problem::Scheduler
static constexpr auto is_a_load_tr_v = bool_constant<PipelineImplBase::is_a_load_tr>{}
static constexpr auto is_b_load_tr_v = bool_constant<PipelineImplBase::is_b_load_tr>{}
Static Public Attributes inherited from ck_tile::BaseGemmPipelineAgBgCrCompAsync< Problem >
static constexpr index_t PrefetchStages = 2
static constexpr index_t PrefillStages = 1
static constexpr index_t GlobalBufferNum = 1

Detailed Description

template<typename Problem, typename Policy = GemmPipelineAgBgCrCompAsyncDefaultPolicy>
struct ck_tile::GemmPipelineAgBgCrCompAsync< Problem, Policy >

Compute optimized pipeline version async; which is based on V4.

This pipeline introduces asynchronous load from global memory to LDS, skipping the intermediate loading into pipeline registers.

Member Typedef Documentation

◆ ADataType

template<typename Problem, typename Policy = GemmPipelineAgBgCrCompAsyncDefaultPolicy>
using ck_tile::GemmPipelineAgBgCrCompAsync< Problem, Policy >::ADataType = remove_cvref_t<std::tuple_element_t<0, AsDataType>>

◆ AElementWise

template<typename Problem, typename Policy = GemmPipelineAgBgCrCompAsyncDefaultPolicy>
using ck_tile::GemmPipelineAgBgCrCompAsync< Problem, Policy >::AElementWise = remove_cvref_t<typename Problem::AElementWise>

◆ ALayout

template<typename Problem, typename Policy = GemmPipelineAgBgCrCompAsyncDefaultPolicy>
using ck_tile::GemmPipelineAgBgCrCompAsync< Problem, Policy >::ALayout = remove_cvref_t<std::tuple_element_t<0, AsLayout>>

◆ AsDataType

template<typename Problem, typename Policy = GemmPipelineAgBgCrCompAsyncDefaultPolicy>
using ck_tile::GemmPipelineAgBgCrCompAsync< Problem, Policy >::AsDataType = remove_cvref_t<typename Problem::AsDataTypeTuple>

◆ AsLayout

template<typename Problem, typename Policy = GemmPipelineAgBgCrCompAsyncDefaultPolicy>
using ck_tile::GemmPipelineAgBgCrCompAsync< Problem, Policy >::AsLayout = remove_cvref_t<typename Problem::AsLayoutTuple>

◆ Base

template<typename Problem, typename Policy = GemmPipelineAgBgCrCompAsyncDefaultPolicy>
using ck_tile::GemmPipelineAgBgCrCompAsync< Problem, Policy >::Base = BaseGemmPipelineAgBgCrCompAsync<Problem>

◆ BDataType

template<typename Problem, typename Policy = GemmPipelineAgBgCrCompAsyncDefaultPolicy>
using ck_tile::GemmPipelineAgBgCrCompAsync< Problem, Policy >::BDataType = remove_cvref_t<std::tuple_element_t<0, BsDataType>>

◆ BElementWise

template<typename Problem, typename Policy = GemmPipelineAgBgCrCompAsyncDefaultPolicy>
using ck_tile::GemmPipelineAgBgCrCompAsync< Problem, Policy >::BElementWise = remove_cvref_t<typename Problem::BElementWise>

◆ BLayout

template<typename Problem, typename Policy = GemmPipelineAgBgCrCompAsyncDefaultPolicy>
using ck_tile::GemmPipelineAgBgCrCompAsync< Problem, Policy >::BLayout = remove_cvref_t<std::tuple_element_t<0, BsLayout>>

◆ BlockGemm

template<typename Problem, typename Policy = GemmPipelineAgBgCrCompAsyncDefaultPolicy>
using ck_tile::GemmPipelineAgBgCrCompAsync< Problem, Policy >::BlockGemm = remove_cvref_t<decltype(Policy::template GetBlockGemm<Problem>())>

◆ BlockGemmShape

template<typename Problem, typename Policy = GemmPipelineAgBgCrCompAsyncDefaultPolicy>
using ck_tile::GemmPipelineAgBgCrCompAsync< Problem, Policy >::BlockGemmShape = remove_cvref_t<typename Problem::BlockGemmShape>

◆ BsDataType

template<typename Problem, typename Policy = GemmPipelineAgBgCrCompAsyncDefaultPolicy>
using ck_tile::GemmPipelineAgBgCrCompAsync< Problem, Policy >::BsDataType = remove_cvref_t<typename Problem::BsDataTypeTuple>

◆ BsLayout

template<typename Problem, typename Policy = GemmPipelineAgBgCrCompAsyncDefaultPolicy>
using ck_tile::GemmPipelineAgBgCrCompAsync< Problem, Policy >::BsLayout = remove_cvref_t<typename Problem::BsLayoutTuple>

◆ CDataType

template<typename Problem, typename Policy = GemmPipelineAgBgCrCompAsyncDefaultPolicy>
using ck_tile::GemmPipelineAgBgCrCompAsync< Problem, Policy >::CDataType = remove_cvref_t<typename Problem::CDataType>

◆ CLayout

template<typename Problem, typename Policy = GemmPipelineAgBgCrCompAsyncDefaultPolicy>
using ck_tile::GemmPipelineAgBgCrCompAsync< Problem, Policy >::CLayout = remove_cvref_t<typename Problem::CLayout>

◆ I0

template<typename Problem, typename Policy = GemmPipelineAgBgCrCompAsyncDefaultPolicy>
using ck_tile::GemmPipelineAgBgCrCompAsync< Problem, Policy >::I0 = number<0>

◆ I1

template<typename Problem, typename Policy = GemmPipelineAgBgCrCompAsyncDefaultPolicy>
using ck_tile::GemmPipelineAgBgCrCompAsync< Problem, Policy >::I1 = number<1>

◆ I2

template<typename Problem, typename Policy = GemmPipelineAgBgCrCompAsyncDefaultPolicy>
using ck_tile::GemmPipelineAgBgCrCompAsync< Problem, Policy >::I2 = number<2>

◆ PipelineImplBase

template<typename Problem, typename Policy = GemmPipelineAgBgCrCompAsyncDefaultPolicy>
using ck_tile::GemmPipelineAgBgCrCompAsync< Problem, Policy >::PipelineImplBase = GemmPipelineAgBgCrImplBase<Problem, Policy>

Member Function Documentation

◆ GetSmemPackA()

template<typename Problem, typename Policy = GemmPipelineAgBgCrCompAsyncDefaultPolicy>
constexpr index_t ck_tile::GemmPipelineAgBgCrCompAsync< Problem, Policy >::GetSmemPackA ( )
inlinestaticconstexpr

◆ GetSmemPackB()

template<typename Problem, typename Policy = GemmPipelineAgBgCrCompAsyncDefaultPolicy>
constexpr index_t ck_tile::GemmPipelineAgBgCrCompAsync< Problem, Policy >::GetSmemPackB ( )
inlinestaticconstexpr

◆ GetSmemSize()

template<typename Problem, typename Policy = GemmPipelineAgBgCrCompAsyncDefaultPolicy>
CK_TILE_HOST_DEVICE constexpr index_t ck_tile::GemmPipelineAgBgCrCompAsync< Problem, Policy >::GetSmemSize ( )
inlinestaticconstexpr

◆ GetVectorSizeA()

template<typename Problem, typename Policy = GemmPipelineAgBgCrCompAsyncDefaultPolicy>
template<bool IsWave32Host = false>
constexpr index_t ck_tile::GemmPipelineAgBgCrCompAsync< Problem, Policy >::GetVectorSizeA ( )
inlinestaticconstexpr

◆ GetVectorSizeB()

template<typename Problem, typename Policy = GemmPipelineAgBgCrCompAsyncDefaultPolicy>
template<bool IsWave32Host = false>
constexpr index_t ck_tile::GemmPipelineAgBgCrCompAsync< Problem, Policy >::GetVectorSizeB ( )
inlinestaticconstexpr

◆ GetVectorSizeC()

template<typename Problem, typename Policy = GemmPipelineAgBgCrCompAsyncDefaultPolicy>
constexpr index_t ck_tile::GemmPipelineAgBgCrCompAsync< Problem, Policy >::GetVectorSizeC ( )
inlinestaticconstexpr

◆ IsTransposeC()

template<typename Problem, typename Policy = GemmPipelineAgBgCrCompAsyncDefaultPolicy>
CK_TILE_HOST_DEVICE constexpr auto ck_tile::GemmPipelineAgBgCrCompAsync< Problem, Policy >::IsTransposeC ( )
inlinestaticconstexpr

◆ operator()() [1/2]

template<typename Problem, typename Policy = GemmPipelineAgBgCrCompAsyncDefaultPolicy>
template<typename ADramBlockWindowTmp, typename BDramBlockWindowTmp, typename AElementFunction, typename BElementFunction>
CK_TILE_DEVICE auto ck_tile::GemmPipelineAgBgCrCompAsync< Problem, Policy >::operator() ( const ADramBlockWindowTmp & a_dram_block_window_tmp,
const AElementFunction & a_element_func,
const BDramBlockWindowTmp & b_dram_block_window_tmp,
const BElementFunction & b_element_func,
index_t num_loop,
void * p_smem_0,
void * p_smem_1 ) const
inline

◆ operator()() [2/2]

template<typename Problem, typename Policy = GemmPipelineAgBgCrCompAsyncDefaultPolicy>
template<typename ADramBlockWindowTmp, typename BDramBlockWindowTmp>
CK_TILE_DEVICE auto ck_tile::GemmPipelineAgBgCrCompAsync< Problem, Policy >::operator() ( const ADramBlockWindowTmp & a_dram_block_window_tmp,
const BDramBlockWindowTmp & b_dram_block_window_tmp,
const index_t num_loop,
void *__restrict__ p_smem_0,
void *__restrict__ p_smem_1 ) const
inline

Member Data Documentation

◆ APackedSize

template<typename Problem, typename Policy = GemmPipelineAgBgCrCompAsyncDefaultPolicy>
index_t ck_tile::GemmPipelineAgBgCrCompAsync< Problem, Policy >::APackedSize
staticconstexpr
Initial value:
=
Definition tile/core/numeric/numeric.hpp:81

◆ BlockSize

template<typename Problem, typename Policy = GemmPipelineAgBgCrCompAsyncDefaultPolicy>
index_t ck_tile::GemmPipelineAgBgCrCompAsync< Problem, Policy >::BlockSize = Problem::kBlockSize
staticconstexpr

◆ BPackedSize

template<typename Problem, typename Policy = GemmPipelineAgBgCrCompAsyncDefaultPolicy>
index_t ck_tile::GemmPipelineAgBgCrCompAsync< Problem, Policy >::BPackedSize
staticconstexpr

◆ DoubleSmemBuffer

template<typename Problem, typename Policy = GemmPipelineAgBgCrCompAsyncDefaultPolicy>
bool ck_tile::GemmPipelineAgBgCrCompAsync< Problem, Policy >::DoubleSmemBuffer = Problem::DoubleSmemBuffer
staticconstexpr

◆ HasHotLoop

template<typename Problem, typename Policy = GemmPipelineAgBgCrCompAsyncDefaultPolicy>
bool ck_tile::GemmPipelineAgBgCrCompAsync< Problem, Policy >::HasHotLoop = Problem::HasHotLoop
staticconstexpr

◆ is_a_load_tr_v

template<typename Problem, typename Policy = GemmPipelineAgBgCrCompAsyncDefaultPolicy>
auto ck_tile::GemmPipelineAgBgCrCompAsync< Problem, Policy >::is_a_load_tr_v = bool_constant<PipelineImplBase::is_a_load_tr>{}
staticconstexpr

◆ is_b_load_tr_v

template<typename Problem, typename Policy = GemmPipelineAgBgCrCompAsyncDefaultPolicy>
auto ck_tile::GemmPipelineAgBgCrCompAsync< Problem, Policy >::is_b_load_tr_v = bool_constant<PipelineImplBase::is_b_load_tr>{}
staticconstexpr

◆ kPadK

template<typename Problem, typename Policy = GemmPipelineAgBgCrCompAsyncDefaultPolicy>
bool ck_tile::GemmPipelineAgBgCrCompAsync< Problem, Policy >::kPadK = Problem::kPadK
staticconstexpr

◆ kPadM

template<typename Problem, typename Policy = GemmPipelineAgBgCrCompAsyncDefaultPolicy>
bool ck_tile::GemmPipelineAgBgCrCompAsync< Problem, Policy >::kPadM = Problem::kPadM
staticconstexpr

◆ kPadN

template<typename Problem, typename Policy = GemmPipelineAgBgCrCompAsyncDefaultPolicy>
bool ck_tile::GemmPipelineAgBgCrCompAsync< Problem, Policy >::kPadN = Problem::kPadN
staticconstexpr

◆ KPerBlock

template<typename Problem, typename Policy = GemmPipelineAgBgCrCompAsyncDefaultPolicy>
index_t ck_tile::GemmPipelineAgBgCrCompAsync< Problem, Policy >::KPerBlock = BlockGemmShape::kK
staticconstexpr

◆ MPerBlock

template<typename Problem, typename Policy = GemmPipelineAgBgCrCompAsyncDefaultPolicy>
index_t ck_tile::GemmPipelineAgBgCrCompAsync< Problem, Policy >::MPerBlock = BlockGemmShape::kM
staticconstexpr

◆ NPerBlock

template<typename Problem, typename Policy = GemmPipelineAgBgCrCompAsyncDefaultPolicy>
index_t ck_tile::GemmPipelineAgBgCrCompAsync< Problem, Policy >::NPerBlock = BlockGemmShape::kN
staticconstexpr

◆ NumWaveGroups

template<typename Problem, typename Policy = GemmPipelineAgBgCrCompAsyncDefaultPolicy>
index_t ck_tile::GemmPipelineAgBgCrCompAsync< Problem, Policy >::NumWaveGroups = Problem::NumWaveGroups
staticconstexpr

◆ Preshuffle

template<typename Problem, typename Policy = GemmPipelineAgBgCrCompAsyncDefaultPolicy>
index_t ck_tile::GemmPipelineAgBgCrCompAsync< Problem, Policy >::Preshuffle = Problem::Preshuffle
staticconstexpr

◆ Scheduler

template<typename Problem, typename Policy = GemmPipelineAgBgCrCompAsyncDefaultPolicy>
auto ck_tile::GemmPipelineAgBgCrCompAsync< Problem, Policy >::Scheduler = Problem::Scheduler
staticconstexpr

◆ TailNum

template<typename Problem, typename Policy = GemmPipelineAgBgCrCompAsyncDefaultPolicy>
auto ck_tile::GemmPipelineAgBgCrCompAsync< Problem, Policy >::TailNum = Problem::TailNum
staticconstexpr

The documentation for this struct was generated from the following file: