GridwiseNormalizationBwdData_mk_to_mk< DYDataType, XDataType, GammaDataType, MeanInvStdDataType, ComputeDataType, DXDataType, GridDesc_M_K, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, DYSrcVectorDim, DYSrcVectorSize, XSrcVectorDim, XSrcVectorSize, GammaSrcVectorDim, GammaSrcVectorSize, MeanInvStdSrcVectorDim, MeanInvStdSrcVectorSize, DXDstVectorDim, DXDstVectorSize, SweepOnce > Struct Template Reference

GridwiseNormalizationBwdData_mk_to_mk&lt; DYDataType, XDataType, GammaDataType, MeanInvStdDataType, ComputeDataType, DXDataType, GridDesc_M_K, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, DYSrcVectorDim, DYSrcVectorSize, XSrcVectorDim, XSrcVectorSize, GammaSrcVectorDim, GammaSrcVectorSize, MeanInvStdSrcVectorDim, MeanInvStdSrcVectorSize, DXDstVectorDim, DXDstVectorSize, SweepOnce &gt; Struct Template Reference#

Composable Kernel: ck::GridwiseNormalizationBwdData_mk_to_mk< DYDataType, XDataType, GammaDataType, MeanInvStdDataType, ComputeDataType, DXDataType, GridDesc_M_K, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, DYSrcVectorDim, DYSrcVectorSize, XSrcVectorDim, XSrcVectorSize, GammaSrcVectorDim, GammaSrcVectorSize, MeanInvStdSrcVectorDim, MeanInvStdSrcVectorSize, DXDstVectorDim, DXDstVectorSize, SweepOnce > Struct Template Reference
ck::GridwiseNormalizationBwdData_mk_to_mk< DYDataType, XDataType, GammaDataType, MeanInvStdDataType, ComputeDataType, DXDataType, GridDesc_M_K, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, DYSrcVectorDim, DYSrcVectorSize, XSrcVectorDim, XSrcVectorSize, GammaSrcVectorDim, GammaSrcVectorSize, MeanInvStdSrcVectorDim, MeanInvStdSrcVectorSize, DXDstVectorDim, DXDstVectorSize, SweepOnce > Struct Template Reference

#include <gridwise_normalization_bwd_data.hpp>

Public Types

using ThreadClusterLengths_M_K = Sequence<MThreadClusterSize, KThreadClusterSize>
using DYThreadBufferDimAccessOrder
using XThreadBufferDimAccessOrder
using GammaThreadBufferDimAccessOrder
using MeanInvStdThreadBufferDimAccessOrder
using DXThreadBufferDimAccessOrder
using ThreadClusterArrangeOrder = DYThreadBufferDimAccessOrder
using ThreadBufferLengths_M_K = Sequence<MThreadSliceSize, KThreadSliceSize>
using PassThroughOp = tensor_operation::element_wise::PassThrough
using BlockwiseSumReduce

Static Public Member Functions

static __device__ void Run (const GridDesc_M_K &dy_grid_desc_m_k, const GridDesc_M_K &x_grid_desc_m_k, const GridDesc_M_K &gamma_grid_desc_m_k, const GridDesc_M_K &mean_grid_desc_m_k, const GridDesc_M_K &inv_std_grid_desc_m_k, const GridDesc_M_K &dx_grid_desc_m_k, index_t num_k_block_tile_iteration, const DYDataType *const __restrict__ p_dy_global, const XDataType *const __restrict__ p_x_global, const GammaDataType *const __restrict__ p_gamma_global, const MeanInvStdDataType *const __restrict__ p_mean_global, const MeanInvStdDataType *const __restrict__ p_inv_std_global, DXDataType *const __restrict__ p_dx_global)

Static Public Attributes

static constexpr auto thread_cluster_desc
static constexpr auto thread_buffer_desc_m_k
static constexpr auto thread_buffer_desc_m
static constexpr auto I0 = Number<0>{}
static constexpr auto I1 = Number<1>{}
static constexpr auto I2 = Number<2>{}
static constexpr index_t M_BlockTileSize = MThreadClusterSize * MThreadSliceSize
static constexpr index_t K_BlockTileSize = KThreadClusterSize * KThreadSliceSize

Member Typedef Documentation

◆ BlockwiseSumReduce

template<typename DYDataType, typename XDataType, typename GammaDataType, typename MeanInvStdDataType, typename ComputeDataType, typename DXDataType, typename GridDesc_M_K, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t DYSrcVectorDim, index_t DYSrcVectorSize, index_t XSrcVectorDim, index_t XSrcVectorSize, index_t GammaSrcVectorDim, index_t GammaSrcVectorSize, index_t MeanInvStdSrcVectorDim, index_t MeanInvStdSrcVectorSize, index_t DXDstVectorDim, index_t DXDstVectorSize, bool SweepOnce>
using ck::GridwiseNormalizationBwdData_mk_to_mk< DYDataType, XDataType, GammaDataType, MeanInvStdDataType, ComputeDataType, DXDataType, GridDesc_M_K, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, DYSrcVectorDim, DYSrcVectorSize, XSrcVectorDim, XSrcVectorSize, GammaSrcVectorDim, GammaSrcVectorSize, MeanInvStdSrcVectorDim, MeanInvStdSrcVectorSize, DXDstVectorDim, DXDstVectorSize, SweepOnce >::BlockwiseSumReduce
Initial value:
BlockSize,
true>
decltype(ThreadClusterDesc_M_K{}.GetLengths()) ThreadClusterLengths_M_K
Definition blockwise_softmax.hpp:69
DYThreadBufferDimAccessOrder ThreadClusterArrangeOrder
Definition gridwise_normalization_bwd_data.hpp:86
Definition reduction_functions_blockwise.hpp:28
Definition reduction_operator.hpp:37

◆ DXThreadBufferDimAccessOrder

template<typename DYDataType, typename XDataType, typename GammaDataType, typename MeanInvStdDataType, typename ComputeDataType, typename DXDataType, typename GridDesc_M_K, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t DYSrcVectorDim, index_t DYSrcVectorSize, index_t XSrcVectorDim, index_t XSrcVectorSize, index_t GammaSrcVectorDim, index_t GammaSrcVectorSize, index_t MeanInvStdSrcVectorDim, index_t MeanInvStdSrcVectorSize, index_t DXDstVectorDim, index_t DXDstVectorSize, bool SweepOnce>
using ck::GridwiseNormalizationBwdData_mk_to_mk< DYDataType, XDataType, GammaDataType, MeanInvStdDataType, ComputeDataType, DXDataType, GridDesc_M_K, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, DYSrcVectorDim, DYSrcVectorSize, XSrcVectorDim, XSrcVectorSize, GammaSrcVectorDim, GammaSrcVectorSize, MeanInvStdSrcVectorDim, MeanInvStdSrcVectorSize, DXDstVectorDim, DXDstVectorSize, SweepOnce >::DXThreadBufferDimAccessOrder
Initial value:
Definition utility/sequence.hpp:43
Definition utility/functional.hpp:100

◆ DYThreadBufferDimAccessOrder

template<typename DYDataType, typename XDataType, typename GammaDataType, typename MeanInvStdDataType, typename ComputeDataType, typename DXDataType, typename GridDesc_M_K, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t DYSrcVectorDim, index_t DYSrcVectorSize, index_t XSrcVectorDim, index_t XSrcVectorSize, index_t GammaSrcVectorDim, index_t GammaSrcVectorSize, index_t MeanInvStdSrcVectorDim, index_t MeanInvStdSrcVectorSize, index_t DXDstVectorDim, index_t DXDstVectorSize, bool SweepOnce>
using ck::GridwiseNormalizationBwdData_mk_to_mk< DYDataType, XDataType, GammaDataType, MeanInvStdDataType, ComputeDataType, DXDataType, GridDesc_M_K, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, DYSrcVectorDim, DYSrcVectorSize, XSrcVectorDim, XSrcVectorSize, GammaSrcVectorDim, GammaSrcVectorSize, MeanInvStdSrcVectorDim, MeanInvStdSrcVectorSize, DXDstVectorDim, DXDstVectorSize, SweepOnce >::DYThreadBufferDimAccessOrder

◆ GammaThreadBufferDimAccessOrder

template<typename DYDataType, typename XDataType, typename GammaDataType, typename MeanInvStdDataType, typename ComputeDataType, typename DXDataType, typename GridDesc_M_K, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t DYSrcVectorDim, index_t DYSrcVectorSize, index_t XSrcVectorDim, index_t XSrcVectorSize, index_t GammaSrcVectorDim, index_t GammaSrcVectorSize, index_t MeanInvStdSrcVectorDim, index_t MeanInvStdSrcVectorSize, index_t DXDstVectorDim, index_t DXDstVectorSize, bool SweepOnce>
using ck::GridwiseNormalizationBwdData_mk_to_mk< DYDataType, XDataType, GammaDataType, MeanInvStdDataType, ComputeDataType, DXDataType, GridDesc_M_K, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, DYSrcVectorDim, DYSrcVectorSize, XSrcVectorDim, XSrcVectorSize, GammaSrcVectorDim, GammaSrcVectorSize, MeanInvStdSrcVectorDim, MeanInvStdSrcVectorSize, DXDstVectorDim, DXDstVectorSize, SweepOnce >::GammaThreadBufferDimAccessOrder

◆ MeanInvStdThreadBufferDimAccessOrder

template<typename DYDataType, typename XDataType, typename GammaDataType, typename MeanInvStdDataType, typename ComputeDataType, typename DXDataType, typename GridDesc_M_K, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t DYSrcVectorDim, index_t DYSrcVectorSize, index_t XSrcVectorDim, index_t XSrcVectorSize, index_t GammaSrcVectorDim, index_t GammaSrcVectorSize, index_t MeanInvStdSrcVectorDim, index_t MeanInvStdSrcVectorSize, index_t DXDstVectorDim, index_t DXDstVectorSize, bool SweepOnce>
using ck::GridwiseNormalizationBwdData_mk_to_mk< DYDataType, XDataType, GammaDataType, MeanInvStdDataType, ComputeDataType, DXDataType, GridDesc_M_K, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, DYSrcVectorDim, DYSrcVectorSize, XSrcVectorDim, XSrcVectorSize, GammaSrcVectorDim, GammaSrcVectorSize, MeanInvStdSrcVectorDim, MeanInvStdSrcVectorSize, DXDstVectorDim, DXDstVectorSize, SweepOnce >::MeanInvStdThreadBufferDimAccessOrder

◆ PassThroughOp

template<typename DYDataType, typename XDataType, typename GammaDataType, typename MeanInvStdDataType, typename ComputeDataType, typename DXDataType, typename GridDesc_M_K, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t DYSrcVectorDim, index_t DYSrcVectorSize, index_t XSrcVectorDim, index_t XSrcVectorSize, index_t GammaSrcVectorDim, index_t GammaSrcVectorSize, index_t MeanInvStdSrcVectorDim, index_t MeanInvStdSrcVectorSize, index_t DXDstVectorDim, index_t DXDstVectorSize, bool SweepOnce>
using ck::GridwiseNormalizationBwdData_mk_to_mk< DYDataType, XDataType, GammaDataType, MeanInvStdDataType, ComputeDataType, DXDataType, GridDesc_M_K, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, DYSrcVectorDim, DYSrcVectorSize, XSrcVectorDim, XSrcVectorSize, GammaSrcVectorDim, GammaSrcVectorSize, MeanInvStdSrcVectorDim, MeanInvStdSrcVectorSize, DXDstVectorDim, DXDstVectorSize, SweepOnce >::PassThroughOp = tensor_operation::element_wise::PassThrough

◆ ThreadBufferLengths_M_K

template<typename DYDataType, typename XDataType, typename GammaDataType, typename MeanInvStdDataType, typename ComputeDataType, typename DXDataType, typename GridDesc_M_K, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t DYSrcVectorDim, index_t DYSrcVectorSize, index_t XSrcVectorDim, index_t XSrcVectorSize, index_t GammaSrcVectorDim, index_t GammaSrcVectorSize, index_t MeanInvStdSrcVectorDim, index_t MeanInvStdSrcVectorSize, index_t DXDstVectorDim, index_t DXDstVectorSize, bool SweepOnce>
using ck::GridwiseNormalizationBwdData_mk_to_mk< DYDataType, XDataType, GammaDataType, MeanInvStdDataType, ComputeDataType, DXDataType, GridDesc_M_K, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, DYSrcVectorDim, DYSrcVectorSize, XSrcVectorDim, XSrcVectorSize, GammaSrcVectorDim, GammaSrcVectorSize, MeanInvStdSrcVectorDim, MeanInvStdSrcVectorSize, DXDstVectorDim, DXDstVectorSize, SweepOnce >::ThreadBufferLengths_M_K = Sequence<MThreadSliceSize, KThreadSliceSize>

◆ ThreadClusterArrangeOrder

template<typename DYDataType, typename XDataType, typename GammaDataType, typename MeanInvStdDataType, typename ComputeDataType, typename DXDataType, typename GridDesc_M_K, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t DYSrcVectorDim, index_t DYSrcVectorSize, index_t XSrcVectorDim, index_t XSrcVectorSize, index_t GammaSrcVectorDim, index_t GammaSrcVectorSize, index_t MeanInvStdSrcVectorDim, index_t MeanInvStdSrcVectorSize, index_t DXDstVectorDim, index_t DXDstVectorSize, bool SweepOnce>
using ck::GridwiseNormalizationBwdData_mk_to_mk< DYDataType, XDataType, GammaDataType, MeanInvStdDataType, ComputeDataType, DXDataType, GridDesc_M_K, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, DYSrcVectorDim, DYSrcVectorSize, XSrcVectorDim, XSrcVectorSize, GammaSrcVectorDim, GammaSrcVectorSize, MeanInvStdSrcVectorDim, MeanInvStdSrcVectorSize, DXDstVectorDim, DXDstVectorSize, SweepOnce >::ThreadClusterArrangeOrder = DYThreadBufferDimAccessOrder

◆ ThreadClusterLengths_M_K

template<typename DYDataType, typename XDataType, typename GammaDataType, typename MeanInvStdDataType, typename ComputeDataType, typename DXDataType, typename GridDesc_M_K, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t DYSrcVectorDim, index_t DYSrcVectorSize, index_t XSrcVectorDim, index_t XSrcVectorSize, index_t GammaSrcVectorDim, index_t GammaSrcVectorSize, index_t MeanInvStdSrcVectorDim, index_t MeanInvStdSrcVectorSize, index_t DXDstVectorDim, index_t DXDstVectorSize, bool SweepOnce>
using ck::GridwiseNormalizationBwdData_mk_to_mk< DYDataType, XDataType, GammaDataType, MeanInvStdDataType, ComputeDataType, DXDataType, GridDesc_M_K, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, DYSrcVectorDim, DYSrcVectorSize, XSrcVectorDim, XSrcVectorSize, GammaSrcVectorDim, GammaSrcVectorSize, MeanInvStdSrcVectorDim, MeanInvStdSrcVectorSize, DXDstVectorDim, DXDstVectorSize, SweepOnce >::ThreadClusterLengths_M_K = Sequence<MThreadClusterSize, KThreadClusterSize>

◆ XThreadBufferDimAccessOrder

template<typename DYDataType, typename XDataType, typename GammaDataType, typename MeanInvStdDataType, typename ComputeDataType, typename DXDataType, typename GridDesc_M_K, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t DYSrcVectorDim, index_t DYSrcVectorSize, index_t XSrcVectorDim, index_t XSrcVectorSize, index_t GammaSrcVectorDim, index_t GammaSrcVectorSize, index_t MeanInvStdSrcVectorDim, index_t MeanInvStdSrcVectorSize, index_t DXDstVectorDim, index_t DXDstVectorSize, bool SweepOnce>
using ck::GridwiseNormalizationBwdData_mk_to_mk< DYDataType, XDataType, GammaDataType, MeanInvStdDataType, ComputeDataType, DXDataType, GridDesc_M_K, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, DYSrcVectorDim, DYSrcVectorSize, XSrcVectorDim, XSrcVectorSize, GammaSrcVectorDim, GammaSrcVectorSize, MeanInvStdSrcVectorDim, MeanInvStdSrcVectorSize, DXDstVectorDim, DXDstVectorSize, SweepOnce >::XThreadBufferDimAccessOrder

Member Function Documentation

◆ Run()

template<typename DYDataType, typename XDataType, typename GammaDataType, typename MeanInvStdDataType, typename ComputeDataType, typename DXDataType, typename GridDesc_M_K, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t DYSrcVectorDim, index_t DYSrcVectorSize, index_t XSrcVectorDim, index_t XSrcVectorSize, index_t GammaSrcVectorDim, index_t GammaSrcVectorSize, index_t MeanInvStdSrcVectorDim, index_t MeanInvStdSrcVectorSize, index_t DXDstVectorDim, index_t DXDstVectorSize, bool SweepOnce>
__device__ void ck::GridwiseNormalizationBwdData_mk_to_mk< DYDataType, XDataType, GammaDataType, MeanInvStdDataType, ComputeDataType, DXDataType, GridDesc_M_K, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, DYSrcVectorDim, DYSrcVectorSize, XSrcVectorDim, XSrcVectorSize, GammaSrcVectorDim, GammaSrcVectorSize, MeanInvStdSrcVectorDim, MeanInvStdSrcVectorSize, DXDstVectorDim, DXDstVectorSize, SweepOnce >::Run ( const GridDesc_M_K & dy_grid_desc_m_k,
const GridDesc_M_K & x_grid_desc_m_k,
const GridDesc_M_K & gamma_grid_desc_m_k,
const GridDesc_M_K & mean_grid_desc_m_k,
const GridDesc_M_K & inv_std_grid_desc_m_k,
const GridDesc_M_K & dx_grid_desc_m_k,
index_t num_k_block_tile_iteration,
const DYDataType *const __restrict__ p_dy_global,
const XDataType *const __restrict__ p_x_global,
const GammaDataType *const __restrict__ p_gamma_global,
const MeanInvStdDataType *const __restrict__ p_mean_global,
const MeanInvStdDataType *const __restrict__ p_inv_std_global,
DXDataType *const __restrict__ p_dx_global )
inlinestatic

Member Data Documentation

◆ I0

template<typename DYDataType, typename XDataType, typename GammaDataType, typename MeanInvStdDataType, typename ComputeDataType, typename DXDataType, typename GridDesc_M_K, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t DYSrcVectorDim, index_t DYSrcVectorSize, index_t XSrcVectorDim, index_t XSrcVectorSize, index_t GammaSrcVectorDim, index_t GammaSrcVectorSize, index_t MeanInvStdSrcVectorDim, index_t MeanInvStdSrcVectorSize, index_t DXDstVectorDim, index_t DXDstVectorSize, bool SweepOnce>
auto ck::GridwiseNormalizationBwdData_mk_to_mk< DYDataType, XDataType, GammaDataType, MeanInvStdDataType, ComputeDataType, DXDataType, GridDesc_M_K, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, DYSrcVectorDim, DYSrcVectorSize, XSrcVectorDim, XSrcVectorSize, GammaSrcVectorDim, GammaSrcVectorSize, MeanInvStdSrcVectorDim, MeanInvStdSrcVectorSize, DXDstVectorDim, DXDstVectorSize, SweepOnce >::I0 = Number<0>{}
staticconstexpr

◆ I1

template<typename DYDataType, typename XDataType, typename GammaDataType, typename MeanInvStdDataType, typename ComputeDataType, typename DXDataType, typename GridDesc_M_K, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t DYSrcVectorDim, index_t DYSrcVectorSize, index_t XSrcVectorDim, index_t XSrcVectorSize, index_t GammaSrcVectorDim, index_t GammaSrcVectorSize, index_t MeanInvStdSrcVectorDim, index_t MeanInvStdSrcVectorSize, index_t DXDstVectorDim, index_t DXDstVectorSize, bool SweepOnce>
auto ck::GridwiseNormalizationBwdData_mk_to_mk< DYDataType, XDataType, GammaDataType, MeanInvStdDataType, ComputeDataType, DXDataType, GridDesc_M_K, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, DYSrcVectorDim, DYSrcVectorSize, XSrcVectorDim, XSrcVectorSize, GammaSrcVectorDim, GammaSrcVectorSize, MeanInvStdSrcVectorDim, MeanInvStdSrcVectorSize, DXDstVectorDim, DXDstVectorSize, SweepOnce >::I1 = Number<1>{}
staticconstexpr

◆ I2

template<typename DYDataType, typename XDataType, typename GammaDataType, typename MeanInvStdDataType, typename ComputeDataType, typename DXDataType, typename GridDesc_M_K, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t DYSrcVectorDim, index_t DYSrcVectorSize, index_t XSrcVectorDim, index_t XSrcVectorSize, index_t GammaSrcVectorDim, index_t GammaSrcVectorSize, index_t MeanInvStdSrcVectorDim, index_t MeanInvStdSrcVectorSize, index_t DXDstVectorDim, index_t DXDstVectorSize, bool SweepOnce>
auto ck::GridwiseNormalizationBwdData_mk_to_mk< DYDataType, XDataType, GammaDataType, MeanInvStdDataType, ComputeDataType, DXDataType, GridDesc_M_K, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, DYSrcVectorDim, DYSrcVectorSize, XSrcVectorDim, XSrcVectorSize, GammaSrcVectorDim, GammaSrcVectorSize, MeanInvStdSrcVectorDim, MeanInvStdSrcVectorSize, DXDstVectorDim, DXDstVectorSize, SweepOnce >::I2 = Number<2>{}
staticconstexpr

◆ K_BlockTileSize

template<typename DYDataType, typename XDataType, typename GammaDataType, typename MeanInvStdDataType, typename ComputeDataType, typename DXDataType, typename GridDesc_M_K, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t DYSrcVectorDim, index_t DYSrcVectorSize, index_t XSrcVectorDim, index_t XSrcVectorSize, index_t GammaSrcVectorDim, index_t GammaSrcVectorSize, index_t MeanInvStdSrcVectorDim, index_t MeanInvStdSrcVectorSize, index_t DXDstVectorDim, index_t DXDstVectorSize, bool SweepOnce>
index_t ck::GridwiseNormalizationBwdData_mk_to_mk< DYDataType, XDataType, GammaDataType, MeanInvStdDataType, ComputeDataType, DXDataType, GridDesc_M_K, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, DYSrcVectorDim, DYSrcVectorSize, XSrcVectorDim, XSrcVectorSize, GammaSrcVectorDim, GammaSrcVectorSize, MeanInvStdSrcVectorDim, MeanInvStdSrcVectorSize, DXDstVectorDim, DXDstVectorSize, SweepOnce >::K_BlockTileSize = KThreadClusterSize * KThreadSliceSize
staticconstexpr

◆ M_BlockTileSize

template<typename DYDataType, typename XDataType, typename GammaDataType, typename MeanInvStdDataType, typename ComputeDataType, typename DXDataType, typename GridDesc_M_K, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t DYSrcVectorDim, index_t DYSrcVectorSize, index_t XSrcVectorDim, index_t XSrcVectorSize, index_t GammaSrcVectorDim, index_t GammaSrcVectorSize, index_t MeanInvStdSrcVectorDim, index_t MeanInvStdSrcVectorSize, index_t DXDstVectorDim, index_t DXDstVectorSize, bool SweepOnce>
index_t ck::GridwiseNormalizationBwdData_mk_to_mk< DYDataType, XDataType, GammaDataType, MeanInvStdDataType, ComputeDataType, DXDataType, GridDesc_M_K, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, DYSrcVectorDim, DYSrcVectorSize, XSrcVectorDim, XSrcVectorSize, GammaSrcVectorDim, GammaSrcVectorSize, MeanInvStdSrcVectorDim, MeanInvStdSrcVectorSize, DXDstVectorDim, DXDstVectorSize, SweepOnce >::M_BlockTileSize = MThreadClusterSize * MThreadSliceSize
staticconstexpr

◆ thread_buffer_desc_m

template<typename DYDataType, typename XDataType, typename GammaDataType, typename MeanInvStdDataType, typename ComputeDataType, typename DXDataType, typename GridDesc_M_K, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t DYSrcVectorDim, index_t DYSrcVectorSize, index_t XSrcVectorDim, index_t XSrcVectorSize, index_t GammaSrcVectorDim, index_t GammaSrcVectorSize, index_t MeanInvStdSrcVectorDim, index_t MeanInvStdSrcVectorSize, index_t DXDstVectorDim, index_t DXDstVectorSize, bool SweepOnce>
auto ck::GridwiseNormalizationBwdData_mk_to_mk< DYDataType, XDataType, GammaDataType, MeanInvStdDataType, ComputeDataType, DXDataType, GridDesc_M_K, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, DYSrcVectorDim, DYSrcVectorSize, XSrcVectorDim, XSrcVectorSize, GammaSrcVectorDim, GammaSrcVectorSize, MeanInvStdSrcVectorDim, MeanInvStdSrcVectorSize, DXDstVectorDim, DXDstVectorSize, SweepOnce >::thread_buffer_desc_m
staticconstexpr
Initial value:
=
integral_constant< index_t, N > Number
Definition number.hpp:12
__host__ __device__ constexpr auto make_naive_tensor_descriptor_packed(const Tuple< Lengths... > &lengths)
Definition tensor_descriptor_helper.hpp:101
__host__ __device__ constexpr auto make_tuple(Xs &&... xs)
Definition utility/tuple.hpp:211

◆ thread_buffer_desc_m_k

template<typename DYDataType, typename XDataType, typename GammaDataType, typename MeanInvStdDataType, typename ComputeDataType, typename DXDataType, typename GridDesc_M_K, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t DYSrcVectorDim, index_t DYSrcVectorSize, index_t XSrcVectorDim, index_t XSrcVectorSize, index_t GammaSrcVectorDim, index_t GammaSrcVectorSize, index_t MeanInvStdSrcVectorDim, index_t MeanInvStdSrcVectorSize, index_t DXDstVectorDim, index_t DXDstVectorSize, bool SweepOnce>
auto ck::GridwiseNormalizationBwdData_mk_to_mk< DYDataType, XDataType, GammaDataType, MeanInvStdDataType, ComputeDataType, DXDataType, GridDesc_M_K, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, DYSrcVectorDim, DYSrcVectorSize, XSrcVectorDim, XSrcVectorSize, GammaSrcVectorDim, GammaSrcVectorSize, MeanInvStdSrcVectorDim, MeanInvStdSrcVectorSize, DXDstVectorDim, DXDstVectorSize, SweepOnce >::thread_buffer_desc_m_k
staticconstexpr

◆ thread_cluster_desc

template<typename DYDataType, typename XDataType, typename GammaDataType, typename MeanInvStdDataType, typename ComputeDataType, typename DXDataType, typename GridDesc_M_K, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t DYSrcVectorDim, index_t DYSrcVectorSize, index_t XSrcVectorDim, index_t XSrcVectorSize, index_t GammaSrcVectorDim, index_t GammaSrcVectorSize, index_t MeanInvStdSrcVectorDim, index_t MeanInvStdSrcVectorSize, index_t DXDstVectorDim, index_t DXDstVectorSize, bool SweepOnce>
auto ck::GridwiseNormalizationBwdData_mk_to_mk< DYDataType, XDataType, GammaDataType, MeanInvStdDataType, ComputeDataType, DXDataType, GridDesc_M_K, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, DYSrcVectorDim, DYSrcVectorSize, XSrcVectorDim, XSrcVectorSize, GammaSrcVectorDim, GammaSrcVectorSize, MeanInvStdSrcVectorDim, MeanInvStdSrcVectorSize, DXDstVectorDim, DXDstVectorSize, SweepOnce >::thread_cluster_desc
staticconstexpr
Initial value:
=
__host__ __device__ constexpr auto make_cluster_descriptor(const Lengths &lengths, ArrangeOrder order=typename arithmetic_sequence_gen< 0, Lengths::Size(), 1 >::type{})
Definition tensor_description/cluster_descriptor.hpp:13
Sequence< MThreadClusterSize, KThreadClusterSize > ThreadClusterLengths_M_K
Definition gridwise_normalization_bwd_data.hpp:73

The documentation for this struct was generated from the following file: