WarpGemmSmfmacImpl< WarpGemmAttribute_ > Struct Template Reference#
Public Types |
Public Member Functions |
Static Public Member Functions |
Static Public Attributes |
List of all members
ck_tile::WarpGemmSmfmacImpl< WarpGemmAttribute_ > Struct Template Reference
#include <warp_gemm_smfmac_impl.hpp>
Public Types | |
| using | WarpGemmAttribute = remove_cvref_t<WarpGemmAttribute_> |
| using | ADataType = typename WarpGemmAttribute::ADataType |
| using | BDataType = typename WarpGemmAttribute::BDataType |
| using | CDataType = typename WarpGemmAttribute::CDataType |
| using | AWarpDstrEncoding = typename WarpGemmAttribute::AWarpDstrEncoding |
| using | BWarpDstrEncoding = typename WarpGemmAttribute::BWarpDstrEncoding |
| using | CWarpDstrEncoding = typename WarpGemmAttribute::CWarpDstrEncoding |
| using | AWarpDstr = remove_cvref_t<decltype(make_static_tile_distribution(AWarpDstrEncoding{}))> |
| using | BWarpDstr = remove_cvref_t<decltype(make_static_tile_distribution(BWarpDstrEncoding{}))> |
| using | CWarpDstr = remove_cvref_t<decltype(make_static_tile_distribution(CWarpDstrEncoding{}))> |
| using | AWarpTensor = static_distributed_tensor<ADataType, AWarpDstr> |
| using | BWarpTensor = static_distributed_tensor<BDataType, BWarpDstr> |
| using | CWarpTensor = static_distributed_tensor<CDataType, CWarpDstr> |
Public Member Functions | |
| template<typename AVec> | |
| CK_TILE_DEVICE int32_t | compress_a (AVec &a_vec) const |
| Compress A vector for 2:4 structured sparsity instruction by moving all non-zero elements into lower part of a_vec to half its effective size. | |
| template<typename CTensor, typename ATensor, typename BTensor, bool post_nop_ = false> | |
| CK_TILE_DEVICE void | operator() (CTensor &c, const ATensor &a, const BTensor &b, bool_constant< post_nop_ >={}) const |
Static Public Member Functions | |
| static CK_TILE_HOST_DEVICE constexpr auto | get_num_of_access () |
Static Public Attributes | |
| static constexpr index_t | kM = WarpGemmAttribute::kM |
| static constexpr index_t | kN = WarpGemmAttribute::kN |
| static constexpr index_t | kK = WarpGemmAttribute::kK |
| static constexpr index_t | kKPerThread = WarpGemmAttribute::kKPerThread |
| The number of elements in K dimension processed by single thread in wavefront. | |
Member Typedef Documentation
◆ ADataType
template<typename WarpGemmAttribute_>
| using ck_tile::WarpGemmSmfmacImpl< WarpGemmAttribute_ >::ADataType = typename WarpGemmAttribute::ADataType |
◆ AWarpDstr
template<typename WarpGemmAttribute_>
| using ck_tile::WarpGemmSmfmacImpl< WarpGemmAttribute_ >::AWarpDstr = remove_cvref_t<decltype(make_static_tile_distribution(AWarpDstrEncoding{}))> |
◆ AWarpDstrEncoding
template<typename WarpGemmAttribute_>
| using ck_tile::WarpGemmSmfmacImpl< WarpGemmAttribute_ >::AWarpDstrEncoding = typename WarpGemmAttribute::AWarpDstrEncoding |
◆ AWarpTensor
template<typename WarpGemmAttribute_>
| using ck_tile::WarpGemmSmfmacImpl< WarpGemmAttribute_ >::AWarpTensor = static_distributed_tensor<ADataType, AWarpDstr> |
◆ BDataType
template<typename WarpGemmAttribute_>
| using ck_tile::WarpGemmSmfmacImpl< WarpGemmAttribute_ >::BDataType = typename WarpGemmAttribute::BDataType |
◆ BWarpDstr
template<typename WarpGemmAttribute_>
| using ck_tile::WarpGemmSmfmacImpl< WarpGemmAttribute_ >::BWarpDstr = remove_cvref_t<decltype(make_static_tile_distribution(BWarpDstrEncoding{}))> |
◆ BWarpDstrEncoding
template<typename WarpGemmAttribute_>
| using ck_tile::WarpGemmSmfmacImpl< WarpGemmAttribute_ >::BWarpDstrEncoding = typename WarpGemmAttribute::BWarpDstrEncoding |
◆ BWarpTensor
template<typename WarpGemmAttribute_>
| using ck_tile::WarpGemmSmfmacImpl< WarpGemmAttribute_ >::BWarpTensor = static_distributed_tensor<BDataType, BWarpDstr> |
◆ CDataType
template<typename WarpGemmAttribute_>
| using ck_tile::WarpGemmSmfmacImpl< WarpGemmAttribute_ >::CDataType = typename WarpGemmAttribute::CDataType |
◆ CWarpDstr
template<typename WarpGemmAttribute_>
| using ck_tile::WarpGemmSmfmacImpl< WarpGemmAttribute_ >::CWarpDstr = remove_cvref_t<decltype(make_static_tile_distribution(CWarpDstrEncoding{}))> |
◆ CWarpDstrEncoding
template<typename WarpGemmAttribute_>
| using ck_tile::WarpGemmSmfmacImpl< WarpGemmAttribute_ >::CWarpDstrEncoding = typename WarpGemmAttribute::CWarpDstrEncoding |
◆ CWarpTensor
template<typename WarpGemmAttribute_>
| using ck_tile::WarpGemmSmfmacImpl< WarpGemmAttribute_ >::CWarpTensor = static_distributed_tensor<CDataType, CWarpDstr> |
◆ WarpGemmAttribute
template<typename WarpGemmAttribute_>
| using ck_tile::WarpGemmSmfmacImpl< WarpGemmAttribute_ >::WarpGemmAttribute = remove_cvref_t<WarpGemmAttribute_> |
Member Function Documentation
◆ compress_a()
template<typename WarpGemmAttribute_>
template<typename AVec>
|
inline |
Compress A vector for 2:4 structured sparsity instruction by moving all non-zero elements into lower part of a_vec to half its effective size.
- Parameters
-
a_vec Vector to be compressed.
- Returns
- Four 2-bit indexes of non-zero elements locations
◆ get_num_of_access()
template<typename WarpGemmAttribute_>
|
inlinestaticconstexpr |
◆ operator()()
template<typename WarpGemmAttribute_>
template<typename CTensor, typename ATensor, typename BTensor, bool post_nop_ = false>
|
inline |
Member Data Documentation
◆ kK
template<typename WarpGemmAttribute_>
|
staticconstexpr |
◆ kKPerThread
template<typename WarpGemmAttribute_>
|
staticconstexpr |
The number of elements in K dimension processed by single thread in wavefront.
- Note
- Note that WarpGemm may run MFMA instruction multiple times (on different K). In such situation this value reflects this fact.
◆ kM
template<typename WarpGemmAttribute_>
|
staticconstexpr |
◆ kN
template<typename WarpGemmAttribute_>
|
staticconstexpr |
The documentation for this struct was generated from the following file: