|
| template<bool HasHotLoop, TailNumber TailNum, typename AsDramBlockWindowTmp, typename BsDramBlockWindowTmp, typename AElementFunction, typename BElementFunction, typename std::enable_if_t< is_detected< is_tuple, AsDramBlockWindowTmp >::value &&is_detected< is_tuple, BsDramBlockWindowTmp >::value, bool > * = nullptr> |
| CK_TILE_DEVICE auto | operator() (const AsDramBlockWindowTmp &a_dram_block_window_tmp, const AElementFunction &a_element_func, const BsDramBlockWindowTmp &b_dram_block_window_tmp, const BElementFunction &b_element_func, index_t num_loop, void *p_smem) const |
| template<typename DstBlockTile, typename SrcTileWindow, typename DramTileWindowStep> |
| CK_TILE_DEVICE void | GlobalPrefetch (DstBlockTile &dst_block_tile, SrcTileWindow &dram_tile_window, const DramTileWindowStep &dram_tile_window_step) const |
| template<typename DstBlockWindow, typename SrcTileWindow, typename DramTileWindowStep> |
| CK_TILE_DEVICE void | GlobalPrefetchAsync (DstBlockWindow &dst_block_window, SrcTileWindow &dram_tile_window, const DramTileWindowStep &dram_tile_window_step) const |
| template<typename DstTileWindow, typename SrcBlockTile, typename ElementFunction> |
| CK_TILE_DEVICE void | LocalPrefill (DstTileWindow &lds_tile_window, const SrcBlockTile &src_block_tile, const ElementFunction &element_func) const |
| template<typename DstTileWindow, typename SrcBlockTile> |
| CK_TILE_DEVICE void | LocalPrefill (DstTileWindow &lds_tile_window, const SrcBlockTile &src_block_tile) const |
| template<typename DstBlockTile, typename SrcTileWindow, bool LoadTranspose = false> |
| CK_TILE_DEVICE void | LocalPrefetch (DstBlockTile &dst_block_tile, const SrcTileWindow &lds_tile_window, bool_constant< LoadTranspose >={}) const |
| CK_TILE_DEVICE auto | GetABLdsTensorViews (void *p_smem) const |
| template<typename DramBlockWindowTmp, typename std::enable_if_t< is_detected< is_tuple, DramBlockWindowTmp >::value, bool > * = nullptr> |
| CK_TILE_DEVICE constexpr auto | CopyADramWindow (const DramBlockWindowTmp &dram_block_window_tmp, const array< index_t, 2 > &offset={0, 0}) const |
| template<typename DramBlockWindowTmp, typename std::enable_if_t<!is_detected< is_tuple, DramBlockWindowTmp >::value, bool > * = nullptr> |
| CK_TILE_DEVICE constexpr auto | CopyADramWindow (const DramBlockWindowTmp &dram_block_window_tmp, const array< index_t, 2 > &offset={0, 0}) const |
| template<typename DramBlockWindowTmp, typename std::enable_if_t< is_detected< is_tuple, DramBlockWindowTmp >::value, bool > * = nullptr> |
| CK_TILE_DEVICE constexpr auto | CopyBDramWindow (const DramBlockWindowTmp &dram_block_window_tmp, const array< index_t, 2 > &offset={0, 0}) const |
| template<typename DramBlockWindowTmp, typename std::enable_if_t<!is_detected< is_tuple, DramBlockWindowTmp >::value, bool > * = nullptr> |
| CK_TILE_DEVICE constexpr auto | CopyBDramWindow (const DramBlockWindowTmp &dram_block_window_tmp, const array< index_t, 2 > &offset={0, 0}) const |
| template<typename ADramBlockWindowTmp, typename ALdsTensorView, typename ALdsLoadTileDistr> |
| CK_TILE_DEVICE constexpr auto | GetAWindows (const ADramBlockWindowTmp &a_dram_block_window_tmp, const ALdsTensorView &a_lds_block_view, const ALdsLoadTileDistr &, const array< index_t, 2 > &offset={0, 0}) const |
| template<typename BDramBlockWindowTmp, typename BLdsTensorView, typename BLdsLoadTileDistr> |
| CK_TILE_DEVICE constexpr auto | GetBWindows (const BDramBlockWindowTmp &b_dram_block_window_tmp, const BLdsTensorView &b_lds_block_view, const BLdsLoadTileDistr &, const array< index_t, 2 > &offset={0, 0}) const |