GridwiseMultipleReduction_mk_to_m_threadwise< NumReduction, InDataType, OutDataTypePointerTuple, AccDataType, InGridDesc_M_K, OutGridDesc_M_Tuple, ReduceOperation, InElementwiseOperationTuple, AccElementwiseOperationTuple, OutMemoryDataOperation, PropagateNan, BlockSize, MThreadSliceSize, KThreadSliceSize, InSrcVectorDim, InSrcVectorSize, OutDstVectorSizeSeq > Struct Template Reference#
ck::GridwiseMultipleReduction_mk_to_m_threadwise< NumReduction, InDataType, OutDataTypePointerTuple, AccDataType, InGridDesc_M_K, OutGridDesc_M_Tuple, ReduceOperation, InElementwiseOperationTuple, AccElementwiseOperationTuple, OutMemoryDataOperation, PropagateNan, BlockSize, MThreadSliceSize, KThreadSliceSize, InSrcVectorDim, InSrcVectorSize, OutDstVectorSizeSeq > Struct Template Reference
#include <gridwise_2d_multiple_reduction_threadwise.hpp>
Public Types | |
| using | ThreadBufferDimAccessOrder |
| using | ThreadReduceSrcDesc_M_K |
| using | ThreadReduceDstDesc_M |
| using | ThreadwiseReduce |
| using | PassThroughOp = tensor_operation::element_wise::PassThrough |
| using | Accumulation = detail::AccumulateWithNanCheck<PropagateNan, ReduceOperation, AccDataType> |
Static Public Member Functions | |
| static __device__ void | Run (const InGridDesc_M_K &in_grid_desc_m_k, const OutGridDesc_M_Tuple &out_grid_desc_m_tuple, const InElementwiseOperationTuple &in_elementwise_op_tuple, const AccElementwiseOperationTuple &acc_elementwise_op_tuple, Array< AccDataType, NumReduction > alpha_values, const InDataType *const __restrict__ p_in_value_global, Array< AccDataType, NumReduction > beta_values, OutDataTypePointerTuple p_out_value_global_tuple) |
Static Public Attributes | |
| static constexpr bool | reorder_thread_cluster = (InSrcVectorDim == 0) |
| static constexpr auto | I0 = Number<0>{} |
Member Typedef Documentation
◆ Accumulation
template<index_t NumReduction, typename InDataType, typename OutDataTypePointerTuple, typename AccDataType, typename InGridDesc_M_K, typename OutGridDesc_M_Tuple, typename ReduceOperation, typename InElementwiseOperationTuple, typename AccElementwiseOperationTuple, InMemoryDataOperationEnum OutMemoryDataOperation, bool PropagateNan, index_t BlockSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t InSrcVectorDim, index_t InSrcVectorSize, typename OutDstVectorSizeSeq>
| using ck::GridwiseMultipleReduction_mk_to_m_threadwise< NumReduction, InDataType, OutDataTypePointerTuple, AccDataType, InGridDesc_M_K, OutGridDesc_M_Tuple, ReduceOperation, InElementwiseOperationTuple, AccElementwiseOperationTuple, OutMemoryDataOperation, PropagateNan, BlockSize, MThreadSliceSize, KThreadSliceSize, InSrcVectorDim, InSrcVectorSize, OutDstVectorSizeSeq >::Accumulation = detail::AccumulateWithNanCheck<PropagateNan, ReduceOperation, AccDataType> |
◆ PassThroughOp
template<index_t NumReduction, typename InDataType, typename OutDataTypePointerTuple, typename AccDataType, typename InGridDesc_M_K, typename OutGridDesc_M_Tuple, typename ReduceOperation, typename InElementwiseOperationTuple, typename AccElementwiseOperationTuple, InMemoryDataOperationEnum OutMemoryDataOperation, bool PropagateNan, index_t BlockSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t InSrcVectorDim, index_t InSrcVectorSize, typename OutDstVectorSizeSeq>
| using ck::GridwiseMultipleReduction_mk_to_m_threadwise< NumReduction, InDataType, OutDataTypePointerTuple, AccDataType, InGridDesc_M_K, OutGridDesc_M_Tuple, ReduceOperation, InElementwiseOperationTuple, AccElementwiseOperationTuple, OutMemoryDataOperation, PropagateNan, BlockSize, MThreadSliceSize, KThreadSliceSize, InSrcVectorDim, InSrcVectorSize, OutDstVectorSizeSeq >::PassThroughOp = tensor_operation::element_wise::PassThrough |
◆ ThreadBufferDimAccessOrder
template<index_t NumReduction, typename InDataType, typename OutDataTypePointerTuple, typename AccDataType, typename InGridDesc_M_K, typename OutGridDesc_M_Tuple, typename ReduceOperation, typename InElementwiseOperationTuple, typename AccElementwiseOperationTuple, InMemoryDataOperationEnum OutMemoryDataOperation, bool PropagateNan, index_t BlockSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t InSrcVectorDim, index_t InSrcVectorSize, typename OutDstVectorSizeSeq>
| using ck::GridwiseMultipleReduction_mk_to_m_threadwise< NumReduction, InDataType, OutDataTypePointerTuple, AccDataType, InGridDesc_M_K, OutGridDesc_M_Tuple, ReduceOperation, InElementwiseOperationTuple, AccElementwiseOperationTuple, OutMemoryDataOperation, PropagateNan, BlockSize, MThreadSliceSize, KThreadSliceSize, InSrcVectorDim, InSrcVectorSize, OutDstVectorSizeSeq >::ThreadBufferDimAccessOrder |
Initial value:
Definition utility/sequence.hpp:43
Definition utility/functional.hpp:100
◆ ThreadReduceDstDesc_M
template<index_t NumReduction, typename InDataType, typename OutDataTypePointerTuple, typename AccDataType, typename InGridDesc_M_K, typename OutGridDesc_M_Tuple, typename ReduceOperation, typename InElementwiseOperationTuple, typename AccElementwiseOperationTuple, InMemoryDataOperationEnum OutMemoryDataOperation, bool PropagateNan, index_t BlockSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t InSrcVectorDim, index_t InSrcVectorSize, typename OutDstVectorSizeSeq>
| using ck::GridwiseMultipleReduction_mk_to_m_threadwise< NumReduction, InDataType, OutDataTypePointerTuple, AccDataType, InGridDesc_M_K, OutGridDesc_M_Tuple, ReduceOperation, InElementwiseOperationTuple, AccElementwiseOperationTuple, OutMemoryDataOperation, PropagateNan, BlockSize, MThreadSliceSize, KThreadSliceSize, InSrcVectorDim, InSrcVectorSize, OutDstVectorSizeSeq >::ThreadReduceDstDesc_M |
Initial value:
__host__ __device__ constexpr auto make_naive_tensor_descriptor_packed(const Tuple< Lengths... > &lengths)
Definition tensor_descriptor_helper.hpp:101
__host__ __device__ constexpr auto make_tuple(Xs &&... xs)
Definition utility/tuple.hpp:211
◆ ThreadReduceSrcDesc_M_K
template<index_t NumReduction, typename InDataType, typename OutDataTypePointerTuple, typename AccDataType, typename InGridDesc_M_K, typename OutGridDesc_M_Tuple, typename ReduceOperation, typename InElementwiseOperationTuple, typename AccElementwiseOperationTuple, InMemoryDataOperationEnum OutMemoryDataOperation, bool PropagateNan, index_t BlockSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t InSrcVectorDim, index_t InSrcVectorSize, typename OutDstVectorSizeSeq>
| using ck::GridwiseMultipleReduction_mk_to_m_threadwise< NumReduction, InDataType, OutDataTypePointerTuple, AccDataType, InGridDesc_M_K, OutGridDesc_M_Tuple, ReduceOperation, InElementwiseOperationTuple, AccElementwiseOperationTuple, OutMemoryDataOperation, PropagateNan, BlockSize, MThreadSliceSize, KThreadSliceSize, InSrcVectorDim, InSrcVectorSize, OutDstVectorSizeSeq >::ThreadReduceSrcDesc_M_K |
Initial value:
◆ ThreadwiseReduce
template<index_t NumReduction, typename InDataType, typename OutDataTypePointerTuple, typename AccDataType, typename InGridDesc_M_K, typename OutGridDesc_M_Tuple, typename ReduceOperation, typename InElementwiseOperationTuple, typename AccElementwiseOperationTuple, InMemoryDataOperationEnum OutMemoryDataOperation, bool PropagateNan, index_t BlockSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t InSrcVectorDim, index_t InSrcVectorSize, typename OutDstVectorSizeSeq>
| using ck::GridwiseMultipleReduction_mk_to_m_threadwise< NumReduction, InDataType, OutDataTypePointerTuple, AccDataType, InGridDesc_M_K, OutGridDesc_M_Tuple, ReduceOperation, InElementwiseOperationTuple, AccElementwiseOperationTuple, OutMemoryDataOperation, PropagateNan, BlockSize, MThreadSliceSize, KThreadSliceSize, InSrcVectorDim, InSrcVectorSize, OutDstVectorSizeSeq >::ThreadwiseReduce |
Initial value:
ThreadwiseReduction<AccDataType,
ReduceOperation,
PropagateNan>
decltype(make_naive_tensor_descriptor_packed(make_tuple(Number< MThreadSliceSize >{}))) ThreadReduceDstDesc_M
Definition gridwise_multiblock_reduce_second_half_batchnorm_backward_final.hpp:123
decltype(make_naive_tensor_descriptor_packed( make_tuple(Number< MThreadSliceSize >{}, Number< KThreadSliceSize >{}))) ThreadReduceSrcDesc_M_K
Definition gridwise_multiblock_welford_second_half_multiblock_reduce_first_half.hpp:116
Definition reduction_functions_threadwise.hpp:23
Member Function Documentation
◆ Run()
template<index_t NumReduction, typename InDataType, typename OutDataTypePointerTuple, typename AccDataType, typename InGridDesc_M_K, typename OutGridDesc_M_Tuple, typename ReduceOperation, typename InElementwiseOperationTuple, typename AccElementwiseOperationTuple, InMemoryDataOperationEnum OutMemoryDataOperation, bool PropagateNan, index_t BlockSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t InSrcVectorDim, index_t InSrcVectorSize, typename OutDstVectorSizeSeq>
|
inlinestatic |
Member Data Documentation
◆ I0
template<index_t NumReduction, typename InDataType, typename OutDataTypePointerTuple, typename AccDataType, typename InGridDesc_M_K, typename OutGridDesc_M_Tuple, typename ReduceOperation, typename InElementwiseOperationTuple, typename AccElementwiseOperationTuple, InMemoryDataOperationEnum OutMemoryDataOperation, bool PropagateNan, index_t BlockSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t InSrcVectorDim, index_t InSrcVectorSize, typename OutDstVectorSizeSeq>
|
staticconstexpr |
◆ reorder_thread_cluster
template<index_t NumReduction, typename InDataType, typename OutDataTypePointerTuple, typename AccDataType, typename InGridDesc_M_K, typename OutGridDesc_M_Tuple, typename ReduceOperation, typename InElementwiseOperationTuple, typename AccElementwiseOperationTuple, InMemoryDataOperationEnum OutMemoryDataOperation, bool PropagateNan, index_t BlockSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t InSrcVectorDim, index_t InSrcVectorSize, typename OutDstVectorSizeSeq>
|
staticconstexpr |
The documentation for this struct was generated from the following file: