GridwiseNormalizationBwdGammaBeta_mk_to_k< DYDataType, XDataType, MeanInvStdDataType, ComputeDataType, DGammaDataType, DBetaDataType, GridDesc_M_K, GridDesc_M, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, DYSrcVectorDim, DYSrcVectorSize, XSrcVectorDim, XSrcVectorSize, MeanInvStdSrcVectorDim, MeanInvStdSrcVectorSize, DGammaDstVectorSize, DBetaDstVectorSize > Struct Template Reference

GridwiseNormalizationBwdGammaBeta_mk_to_k&lt; DYDataType, XDataType, MeanInvStdDataType, ComputeDataType, DGammaDataType, DBetaDataType, GridDesc_M_K, GridDesc_M, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, DYSrcVectorDim, DYSrcVectorSize, XSrcVectorDim, XSrcVectorSize, MeanInvStdSrcVectorDim, MeanInvStdSrcVectorSize, DGammaDstVectorSize, DBetaDstVectorSize &gt; Struct Template Reference#

Composable Kernel: ck::GridwiseNormalizationBwdGammaBeta_mk_to_k< DYDataType, XDataType, MeanInvStdDataType, ComputeDataType, DGammaDataType, DBetaDataType, GridDesc_M_K, GridDesc_M, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, DYSrcVectorDim, DYSrcVectorSize, XSrcVectorDim, XSrcVectorSize, MeanInvStdSrcVectorDim, MeanInvStdSrcVectorSize, DGammaDstVectorSize, DBetaDstVectorSize > Struct Template Reference
ck::GridwiseNormalizationBwdGammaBeta_mk_to_k< DYDataType, XDataType, MeanInvStdDataType, ComputeDataType, DGammaDataType, DBetaDataType, GridDesc_M_K, GridDesc_M, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, DYSrcVectorDim, DYSrcVectorSize, XSrcVectorDim, XSrcVectorSize, MeanInvStdSrcVectorDim, MeanInvStdSrcVectorSize, DGammaDstVectorSize, DBetaDstVectorSize > Struct Template Reference

#include <gridwise_normalization_bwd_gamma_beta.hpp>

Public Types

using ThreadClusterLengths_M_K = Sequence<MThreadClusterSize, KThreadClusterSize>
using DYThreadBufferDimAccessOrder
using XThreadBufferDimAccessOrder
using MeanInvStdThreadBufferDimAccessOrder
using ThreadClusterArrangeOrder = DYThreadBufferDimAccessOrder
using ThreadBufferLengths_M_K = Sequence<MThreadSliceSize, KThreadSliceSize>
using ThreadBufferLengths_M = Sequence<MThreadSliceSize>
using PassThroughOp = tensor_operation::element_wise::PassThrough
using BlockwiseSumReduce

Static Public Member Functions

static __device__ void Run (const GridDesc_M_K &dy_grid_desc_m_k, const GridDesc_M_K &x_grid_desc_m_k, const GridDesc_M_K &mean_grid_desc_m_k, const GridDesc_M_K &inv_std_grid_desc_m_k, const GridDesc_M &dgamma_grid_desc_m, const GridDesc_M &dbeta_grid_desc_m, index_t num_k_block_tile_iteration, const DYDataType *const __restrict__ p_dy_global, const XDataType *const __restrict__ p_x_global, const MeanInvStdDataType *const __restrict__ p_mean_global, const MeanInvStdDataType *const __restrict__ p_inv_std_global, DGammaDataType *const __restrict__ p_dgamma_global, DBetaDataType *const __restrict__ p_dbeta_global)

Static Public Attributes

static constexpr auto thread_cluster_desc
static constexpr auto thread_buffer_desc_m_k
static constexpr auto thread_buffer_desc_m
static constexpr auto I0 = Number<0>{}
static constexpr auto I1 = Number<1>{}
static constexpr index_t M_BlockTileSize = MThreadClusterSize * MThreadSliceSize
static constexpr index_t K_BlockTileSize = KThreadClusterSize * KThreadSliceSize

Member Typedef Documentation

◆ BlockwiseSumReduce

template<typename DYDataType, typename XDataType, typename MeanInvStdDataType, typename ComputeDataType, typename DGammaDataType, typename DBetaDataType, typename GridDesc_M_K, typename GridDesc_M, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t DYSrcVectorDim, index_t DYSrcVectorSize, index_t XSrcVectorDim, index_t XSrcVectorSize, index_t MeanInvStdSrcVectorDim, index_t MeanInvStdSrcVectorSize, index_t DGammaDstVectorSize, index_t DBetaDstVectorSize>
using ck::GridwiseNormalizationBwdGammaBeta_mk_to_k< DYDataType, XDataType, MeanInvStdDataType, ComputeDataType, DGammaDataType, DBetaDataType, GridDesc_M_K, GridDesc_M, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, DYSrcVectorDim, DYSrcVectorSize, XSrcVectorDim, XSrcVectorSize, MeanInvStdSrcVectorDim, MeanInvStdSrcVectorSize, DGammaDstVectorSize, DBetaDstVectorSize >::BlockwiseSumReduce
Initial value:
BlockSize,
true>
decltype(ThreadClusterDesc_M_K{}.GetLengths()) ThreadClusterLengths_M_K
Definition blockwise_softmax.hpp:69
DYThreadBufferDimAccessOrder ThreadClusterArrangeOrder
Definition gridwise_normalization_bwd_data.hpp:86
Definition reduction_functions_blockwise.hpp:28
Definition reduction_operator.hpp:37

◆ DYThreadBufferDimAccessOrder

template<typename DYDataType, typename XDataType, typename MeanInvStdDataType, typename ComputeDataType, typename DGammaDataType, typename DBetaDataType, typename GridDesc_M_K, typename GridDesc_M, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t DYSrcVectorDim, index_t DYSrcVectorSize, index_t XSrcVectorDim, index_t XSrcVectorSize, index_t MeanInvStdSrcVectorDim, index_t MeanInvStdSrcVectorSize, index_t DGammaDstVectorSize, index_t DBetaDstVectorSize>
using ck::GridwiseNormalizationBwdGammaBeta_mk_to_k< DYDataType, XDataType, MeanInvStdDataType, ComputeDataType, DGammaDataType, DBetaDataType, GridDesc_M_K, GridDesc_M, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, DYSrcVectorDim, DYSrcVectorSize, XSrcVectorDim, XSrcVectorSize, MeanInvStdSrcVectorDim, MeanInvStdSrcVectorSize, DGammaDstVectorSize, DBetaDstVectorSize >::DYThreadBufferDimAccessOrder
Initial value:
Definition utility/sequence.hpp:43
Definition utility/functional.hpp:100

◆ MeanInvStdThreadBufferDimAccessOrder

template<typename DYDataType, typename XDataType, typename MeanInvStdDataType, typename ComputeDataType, typename DGammaDataType, typename DBetaDataType, typename GridDesc_M_K, typename GridDesc_M, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t DYSrcVectorDim, index_t DYSrcVectorSize, index_t XSrcVectorDim, index_t XSrcVectorSize, index_t MeanInvStdSrcVectorDim, index_t MeanInvStdSrcVectorSize, index_t DGammaDstVectorSize, index_t DBetaDstVectorSize>
using ck::GridwiseNormalizationBwdGammaBeta_mk_to_k< DYDataType, XDataType, MeanInvStdDataType, ComputeDataType, DGammaDataType, DBetaDataType, GridDesc_M_K, GridDesc_M, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, DYSrcVectorDim, DYSrcVectorSize, XSrcVectorDim, XSrcVectorSize, MeanInvStdSrcVectorDim, MeanInvStdSrcVectorSize, DGammaDstVectorSize, DBetaDstVectorSize >::MeanInvStdThreadBufferDimAccessOrder

◆ PassThroughOp

template<typename DYDataType, typename XDataType, typename MeanInvStdDataType, typename ComputeDataType, typename DGammaDataType, typename DBetaDataType, typename GridDesc_M_K, typename GridDesc_M, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t DYSrcVectorDim, index_t DYSrcVectorSize, index_t XSrcVectorDim, index_t XSrcVectorSize, index_t MeanInvStdSrcVectorDim, index_t MeanInvStdSrcVectorSize, index_t DGammaDstVectorSize, index_t DBetaDstVectorSize>
using ck::GridwiseNormalizationBwdGammaBeta_mk_to_k< DYDataType, XDataType, MeanInvStdDataType, ComputeDataType, DGammaDataType, DBetaDataType, GridDesc_M_K, GridDesc_M, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, DYSrcVectorDim, DYSrcVectorSize, XSrcVectorDim, XSrcVectorSize, MeanInvStdSrcVectorDim, MeanInvStdSrcVectorSize, DGammaDstVectorSize, DBetaDstVectorSize >::PassThroughOp = tensor_operation::element_wise::PassThrough

◆ ThreadBufferLengths_M

template<typename DYDataType, typename XDataType, typename MeanInvStdDataType, typename ComputeDataType, typename DGammaDataType, typename DBetaDataType, typename GridDesc_M_K, typename GridDesc_M, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t DYSrcVectorDim, index_t DYSrcVectorSize, index_t XSrcVectorDim, index_t XSrcVectorSize, index_t MeanInvStdSrcVectorDim, index_t MeanInvStdSrcVectorSize, index_t DGammaDstVectorSize, index_t DBetaDstVectorSize>
using ck::GridwiseNormalizationBwdGammaBeta_mk_to_k< DYDataType, XDataType, MeanInvStdDataType, ComputeDataType, DGammaDataType, DBetaDataType, GridDesc_M_K, GridDesc_M, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, DYSrcVectorDim, DYSrcVectorSize, XSrcVectorDim, XSrcVectorSize, MeanInvStdSrcVectorDim, MeanInvStdSrcVectorSize, DGammaDstVectorSize, DBetaDstVectorSize >::ThreadBufferLengths_M = Sequence<MThreadSliceSize>

◆ ThreadBufferLengths_M_K

template<typename DYDataType, typename XDataType, typename MeanInvStdDataType, typename ComputeDataType, typename DGammaDataType, typename DBetaDataType, typename GridDesc_M_K, typename GridDesc_M, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t DYSrcVectorDim, index_t DYSrcVectorSize, index_t XSrcVectorDim, index_t XSrcVectorSize, index_t MeanInvStdSrcVectorDim, index_t MeanInvStdSrcVectorSize, index_t DGammaDstVectorSize, index_t DBetaDstVectorSize>
using ck::GridwiseNormalizationBwdGammaBeta_mk_to_k< DYDataType, XDataType, MeanInvStdDataType, ComputeDataType, DGammaDataType, DBetaDataType, GridDesc_M_K, GridDesc_M, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, DYSrcVectorDim, DYSrcVectorSize, XSrcVectorDim, XSrcVectorSize, MeanInvStdSrcVectorDim, MeanInvStdSrcVectorSize, DGammaDstVectorSize, DBetaDstVectorSize >::ThreadBufferLengths_M_K = Sequence<MThreadSliceSize, KThreadSliceSize>

◆ ThreadClusterArrangeOrder

template<typename DYDataType, typename XDataType, typename MeanInvStdDataType, typename ComputeDataType, typename DGammaDataType, typename DBetaDataType, typename GridDesc_M_K, typename GridDesc_M, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t DYSrcVectorDim, index_t DYSrcVectorSize, index_t XSrcVectorDim, index_t XSrcVectorSize, index_t MeanInvStdSrcVectorDim, index_t MeanInvStdSrcVectorSize, index_t DGammaDstVectorSize, index_t DBetaDstVectorSize>
using ck::GridwiseNormalizationBwdGammaBeta_mk_to_k< DYDataType, XDataType, MeanInvStdDataType, ComputeDataType, DGammaDataType, DBetaDataType, GridDesc_M_K, GridDesc_M, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, DYSrcVectorDim, DYSrcVectorSize, XSrcVectorDim, XSrcVectorSize, MeanInvStdSrcVectorDim, MeanInvStdSrcVectorSize, DGammaDstVectorSize, DBetaDstVectorSize >::ThreadClusterArrangeOrder = DYThreadBufferDimAccessOrder

◆ ThreadClusterLengths_M_K

template<typename DYDataType, typename XDataType, typename MeanInvStdDataType, typename ComputeDataType, typename DGammaDataType, typename DBetaDataType, typename GridDesc_M_K, typename GridDesc_M, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t DYSrcVectorDim, index_t DYSrcVectorSize, index_t XSrcVectorDim, index_t XSrcVectorSize, index_t MeanInvStdSrcVectorDim, index_t MeanInvStdSrcVectorSize, index_t DGammaDstVectorSize, index_t DBetaDstVectorSize>
using ck::GridwiseNormalizationBwdGammaBeta_mk_to_k< DYDataType, XDataType, MeanInvStdDataType, ComputeDataType, DGammaDataType, DBetaDataType, GridDesc_M_K, GridDesc_M, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, DYSrcVectorDim, DYSrcVectorSize, XSrcVectorDim, XSrcVectorSize, MeanInvStdSrcVectorDim, MeanInvStdSrcVectorSize, DGammaDstVectorSize, DBetaDstVectorSize >::ThreadClusterLengths_M_K = Sequence<MThreadClusterSize, KThreadClusterSize>

◆ XThreadBufferDimAccessOrder

template<typename DYDataType, typename XDataType, typename MeanInvStdDataType, typename ComputeDataType, typename DGammaDataType, typename DBetaDataType, typename GridDesc_M_K, typename GridDesc_M, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t DYSrcVectorDim, index_t DYSrcVectorSize, index_t XSrcVectorDim, index_t XSrcVectorSize, index_t MeanInvStdSrcVectorDim, index_t MeanInvStdSrcVectorSize, index_t DGammaDstVectorSize, index_t DBetaDstVectorSize>
using ck::GridwiseNormalizationBwdGammaBeta_mk_to_k< DYDataType, XDataType, MeanInvStdDataType, ComputeDataType, DGammaDataType, DBetaDataType, GridDesc_M_K, GridDesc_M, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, DYSrcVectorDim, DYSrcVectorSize, XSrcVectorDim, XSrcVectorSize, MeanInvStdSrcVectorDim, MeanInvStdSrcVectorSize, DGammaDstVectorSize, DBetaDstVectorSize >::XThreadBufferDimAccessOrder

Member Function Documentation

◆ Run()

template<typename DYDataType, typename XDataType, typename MeanInvStdDataType, typename ComputeDataType, typename DGammaDataType, typename DBetaDataType, typename GridDesc_M_K, typename GridDesc_M, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t DYSrcVectorDim, index_t DYSrcVectorSize, index_t XSrcVectorDim, index_t XSrcVectorSize, index_t MeanInvStdSrcVectorDim, index_t MeanInvStdSrcVectorSize, index_t DGammaDstVectorSize, index_t DBetaDstVectorSize>
__device__ void ck::GridwiseNormalizationBwdGammaBeta_mk_to_k< DYDataType, XDataType, MeanInvStdDataType, ComputeDataType, DGammaDataType, DBetaDataType, GridDesc_M_K, GridDesc_M, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, DYSrcVectorDim, DYSrcVectorSize, XSrcVectorDim, XSrcVectorSize, MeanInvStdSrcVectorDim, MeanInvStdSrcVectorSize, DGammaDstVectorSize, DBetaDstVectorSize >::Run ( const GridDesc_M_K & dy_grid_desc_m_k,
const GridDesc_M_K & x_grid_desc_m_k,
const GridDesc_M_K & mean_grid_desc_m_k,
const GridDesc_M_K & inv_std_grid_desc_m_k,
const GridDesc_M & dgamma_grid_desc_m,
const GridDesc_M & dbeta_grid_desc_m,
index_t num_k_block_tile_iteration,
const DYDataType *const __restrict__ p_dy_global,
const XDataType *const __restrict__ p_x_global,
const MeanInvStdDataType *const __restrict__ p_mean_global,
const MeanInvStdDataType *const __restrict__ p_inv_std_global,
DGammaDataType *const __restrict__ p_dgamma_global,
DBetaDataType *const __restrict__ p_dbeta_global )
inlinestatic

Member Data Documentation

◆ I0

template<typename DYDataType, typename XDataType, typename MeanInvStdDataType, typename ComputeDataType, typename DGammaDataType, typename DBetaDataType, typename GridDesc_M_K, typename GridDesc_M, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t DYSrcVectorDim, index_t DYSrcVectorSize, index_t XSrcVectorDim, index_t XSrcVectorSize, index_t MeanInvStdSrcVectorDim, index_t MeanInvStdSrcVectorSize, index_t DGammaDstVectorSize, index_t DBetaDstVectorSize>
auto ck::GridwiseNormalizationBwdGammaBeta_mk_to_k< DYDataType, XDataType, MeanInvStdDataType, ComputeDataType, DGammaDataType, DBetaDataType, GridDesc_M_K, GridDesc_M, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, DYSrcVectorDim, DYSrcVectorSize, XSrcVectorDim, XSrcVectorSize, MeanInvStdSrcVectorDim, MeanInvStdSrcVectorSize, DGammaDstVectorSize, DBetaDstVectorSize >::I0 = Number<0>{}
staticconstexpr

◆ I1

template<typename DYDataType, typename XDataType, typename MeanInvStdDataType, typename ComputeDataType, typename DGammaDataType, typename DBetaDataType, typename GridDesc_M_K, typename GridDesc_M, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t DYSrcVectorDim, index_t DYSrcVectorSize, index_t XSrcVectorDim, index_t XSrcVectorSize, index_t MeanInvStdSrcVectorDim, index_t MeanInvStdSrcVectorSize, index_t DGammaDstVectorSize, index_t DBetaDstVectorSize>
auto ck::GridwiseNormalizationBwdGammaBeta_mk_to_k< DYDataType, XDataType, MeanInvStdDataType, ComputeDataType, DGammaDataType, DBetaDataType, GridDesc_M_K, GridDesc_M, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, DYSrcVectorDim, DYSrcVectorSize, XSrcVectorDim, XSrcVectorSize, MeanInvStdSrcVectorDim, MeanInvStdSrcVectorSize, DGammaDstVectorSize, DBetaDstVectorSize >::I1 = Number<1>{}
staticconstexpr

◆ K_BlockTileSize

template<typename DYDataType, typename XDataType, typename MeanInvStdDataType, typename ComputeDataType, typename DGammaDataType, typename DBetaDataType, typename GridDesc_M_K, typename GridDesc_M, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t DYSrcVectorDim, index_t DYSrcVectorSize, index_t XSrcVectorDim, index_t XSrcVectorSize, index_t MeanInvStdSrcVectorDim, index_t MeanInvStdSrcVectorSize, index_t DGammaDstVectorSize, index_t DBetaDstVectorSize>
index_t ck::GridwiseNormalizationBwdGammaBeta_mk_to_k< DYDataType, XDataType, MeanInvStdDataType, ComputeDataType, DGammaDataType, DBetaDataType, GridDesc_M_K, GridDesc_M, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, DYSrcVectorDim, DYSrcVectorSize, XSrcVectorDim, XSrcVectorSize, MeanInvStdSrcVectorDim, MeanInvStdSrcVectorSize, DGammaDstVectorSize, DBetaDstVectorSize >::K_BlockTileSize = KThreadClusterSize * KThreadSliceSize
staticconstexpr

◆ M_BlockTileSize

template<typename DYDataType, typename XDataType, typename MeanInvStdDataType, typename ComputeDataType, typename DGammaDataType, typename DBetaDataType, typename GridDesc_M_K, typename GridDesc_M, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t DYSrcVectorDim, index_t DYSrcVectorSize, index_t XSrcVectorDim, index_t XSrcVectorSize, index_t MeanInvStdSrcVectorDim, index_t MeanInvStdSrcVectorSize, index_t DGammaDstVectorSize, index_t DBetaDstVectorSize>
index_t ck::GridwiseNormalizationBwdGammaBeta_mk_to_k< DYDataType, XDataType, MeanInvStdDataType, ComputeDataType, DGammaDataType, DBetaDataType, GridDesc_M_K, GridDesc_M, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, DYSrcVectorDim, DYSrcVectorSize, XSrcVectorDim, XSrcVectorSize, MeanInvStdSrcVectorDim, MeanInvStdSrcVectorSize, DGammaDstVectorSize, DBetaDstVectorSize >::M_BlockTileSize = MThreadClusterSize * MThreadSliceSize
staticconstexpr

◆ thread_buffer_desc_m

template<typename DYDataType, typename XDataType, typename MeanInvStdDataType, typename ComputeDataType, typename DGammaDataType, typename DBetaDataType, typename GridDesc_M_K, typename GridDesc_M, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t DYSrcVectorDim, index_t DYSrcVectorSize, index_t XSrcVectorDim, index_t XSrcVectorSize, index_t MeanInvStdSrcVectorDim, index_t MeanInvStdSrcVectorSize, index_t DGammaDstVectorSize, index_t DBetaDstVectorSize>
auto ck::GridwiseNormalizationBwdGammaBeta_mk_to_k< DYDataType, XDataType, MeanInvStdDataType, ComputeDataType, DGammaDataType, DBetaDataType, GridDesc_M_K, GridDesc_M, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, DYSrcVectorDim, DYSrcVectorSize, XSrcVectorDim, XSrcVectorSize, MeanInvStdSrcVectorDim, MeanInvStdSrcVectorSize, DGammaDstVectorSize, DBetaDstVectorSize >::thread_buffer_desc_m
staticconstexpr
Initial value:
=
integral_constant< index_t, N > Number
Definition number.hpp:12
__host__ __device__ constexpr auto make_naive_tensor_descriptor_packed(const Tuple< Lengths... > &lengths)
Definition tensor_descriptor_helper.hpp:101
__host__ __device__ constexpr auto make_tuple(Xs &&... xs)
Definition utility/tuple.hpp:211

◆ thread_buffer_desc_m_k

template<typename DYDataType, typename XDataType, typename MeanInvStdDataType, typename ComputeDataType, typename DGammaDataType, typename DBetaDataType, typename GridDesc_M_K, typename GridDesc_M, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t DYSrcVectorDim, index_t DYSrcVectorSize, index_t XSrcVectorDim, index_t XSrcVectorSize, index_t MeanInvStdSrcVectorDim, index_t MeanInvStdSrcVectorSize, index_t DGammaDstVectorSize, index_t DBetaDstVectorSize>
auto ck::GridwiseNormalizationBwdGammaBeta_mk_to_k< DYDataType, XDataType, MeanInvStdDataType, ComputeDataType, DGammaDataType, DBetaDataType, GridDesc_M_K, GridDesc_M, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, DYSrcVectorDim, DYSrcVectorSize, XSrcVectorDim, XSrcVectorSize, MeanInvStdSrcVectorDim, MeanInvStdSrcVectorSize, DGammaDstVectorSize, DBetaDstVectorSize >::thread_buffer_desc_m_k
staticconstexpr

◆ thread_cluster_desc

template<typename DYDataType, typename XDataType, typename MeanInvStdDataType, typename ComputeDataType, typename DGammaDataType, typename DBetaDataType, typename GridDesc_M_K, typename GridDesc_M, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t DYSrcVectorDim, index_t DYSrcVectorSize, index_t XSrcVectorDim, index_t XSrcVectorSize, index_t MeanInvStdSrcVectorDim, index_t MeanInvStdSrcVectorSize, index_t DGammaDstVectorSize, index_t DBetaDstVectorSize>
auto ck::GridwiseNormalizationBwdGammaBeta_mk_to_k< DYDataType, XDataType, MeanInvStdDataType, ComputeDataType, DGammaDataType, DBetaDataType, GridDesc_M_K, GridDesc_M, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, DYSrcVectorDim, DYSrcVectorSize, XSrcVectorDim, XSrcVectorSize, MeanInvStdSrcVectorDim, MeanInvStdSrcVectorSize, DGammaDstVectorSize, DBetaDstVectorSize >::thread_cluster_desc
staticconstexpr
Initial value:
=
__host__ __device__ constexpr auto make_cluster_descriptor(const Lengths &lengths, ArrangeOrder order=typename arithmetic_sequence_gen< 0, Lengths::Size(), 1 >::type{})
Definition tensor_description/cluster_descriptor.hpp:13
DYThreadBufferDimAccessOrder ThreadClusterArrangeOrder
Definition gridwise_normalization_bwd_gamma_beta.hpp:67
Sequence< MThreadClusterSize, KThreadClusterSize > ThreadClusterLengths_M_K
Definition gridwise_normalization_bwd_gamma_beta.hpp:56

The documentation for this struct was generated from the following file: