mx_flatmm_pipeline_agmem_bgmem_creg_v1.hpp Source File#
mx_flatmm_pipeline_agmem_bgmem_creg_v1.hpp
Go to the documentation of this file.
50struct MXF4FlatmmPipelineAGmemBGmemCRegV1 : FlatmmPipelineAGmemBGmemCRegV1<Problem, PipelinePolicy>
Definition tile/core/algorithm/cluster_descriptor.hpp:13
remove_cv_t< std::remove_reference_t< T > > remove_cvref_t
Definition type_traits.hpp:21
CK_TILE_DEVICE auto tile_elementwise_in(const InElementFunc &in_element_func, const InTensor &... in_dstr_tensors)
Definition tile_elementwise.hpp:40
CK_TILE_HOST_DEVICE constexpr auto make_tensor_view(DataType *__restrict__ p, const tensor_descriptor< Ts... > &desc)
Definition tensor_view.hpp:452
CK_TILE_DEVICE index_t get_warp_id(bool_constant< ReturnSgpr >={})
Definition arch.hpp:104
CK_TILE_DEVICE void tile_elementwise_inout(const InOutElementFunc &inout_element_func, InOutDstrTensors &... inout_dstr_tensors)
Definition tile_elementwise.hpp:23
CK_TILE_HOST_DEVICE constexpr auto merge_sequences(Seqs...)
Definition tile/core/container/sequence.hpp:826
CK_TILE_DEVICE constexpr auto make_tile_window(null_tensor_view, const WindowLengths &window_lengths, const multi_index< WindowLengths::size()> &, Ts &&...)
Definition null_tile_window.hpp:75
CK_TILE_HOST_DEVICE constexpr auto to_sequence(tuple< number< Is >... >)
Definition tile/core/container/sequence.hpp:1055
CK_TILE_DEVICE void move_tile_window(null_tile_window< WindowLengths > &, const typename null_tile_window< WindowLengths >::BottomTensorIndex &)
Definition null_tile_window.hpp:95
typename uniform_sequence_gen< NSize, I >::type uniform_sequence_gen_t
Definition tile/core/container/sequence.hpp:1026
CK_TILE_DEVICE void store_tile(tile_window_with_static_lengths< BottomTensorView_, WindowLengths_ > &tile_window_tmp, const static_distributed_tensor< DataType_, TileDistribution_ > &dstr_tensor)
Definition store_tile.hpp:23
GemmPipelineScheduler
Definition gemm_pipeline_ag_bg_cr_scheduler.hpp:14
@ Intrawave
Definition gemm_pipeline_ag_bg_cr_scheduler.hpp:16
CK_TILE_DEVICE auto load_tile(const TileWindow_ &tile_window, number< i_access >={}, bool_constant< oob_conditional_check >={})
Definition load_tile.hpp:22
CK_TILE_HOST_DEVICE constexpr auto make_tuple(Xs &&... xs)
Definition tile/core/container/tuple.hpp:360
tuple_array< T, N > statically_indexed_array
Definition tile/core/container/statically_indexed_array.hpp:16
Definition flatmm_pipeline_agmem_bgmem_creg_v1.hpp:47
Definition gemm_pipeline_problem.hpp:323
Definition mx_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:51
static constexpr index_t kMPerBlock
Definition mx_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:80
static constexpr index_t BlockSize
Definition mx_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:77
static constexpr auto idxK
Definition mx_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:104
FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy > Underlying
Definition mx_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:52
static constexpr index_t dsread_per_wg
Definition mx_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:141
CK_TILE_DEVICE auto operator()(const ADramBlockWindowTmp &a_dram_block_window_tmp, const BFlatBlockWindowTmp &b_flat_dram_block_window_tmp, const ScaleADramBlockWindowTmp &scale_a_flat_window_tmp, const ScaleBDramBlockWindowTmp &scale_b_flat_window_tmp, index_t num_loop, void *p_smem_ping, void *p_smem_pong) const
Definition mx_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:1310
static constexpr index_t mfma_perM_perK
Definition mx_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:161
static constexpr index_t BK1
Definition mx_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:130
remove_cvref_t< decltype(PipelinePolicy::template GetBlockFlatmm< Problem >())> BlockFlatmm
Definition mx_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:66
static constexpr index_t DsWritePreIssue
Definition mx_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:74
ADataType ComputeType
Definition mx_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:59
static constexpr auto idxM
Definition mx_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:102
static constexpr index_t KPerBlockPerIter
Definition mx_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:120
static constexpr index_t APackedSize
Definition mx_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:122
static CK_TILE_HOST_DEVICE constexpr auto SchedulerPerM(index_t dsread_perM, index_t dswrite_perM, index_t load_perM)
Definition mx_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:169
static constexpr auto config
Definition mx_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:69
static constexpr index_t KPerScaleLoad
Definition mx_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:157
static constexpr index_t GetVectorSizeC()
Definition mx_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:89
remove_cvref_t< typename BlockGemmShape::BlockWarps > BlockWarps
Definition mx_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:106
static constexpr index_t KIterPerWarp
Definition mx_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:114
static constexpr index_t GetVectorSizeB()
Definition mx_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:88
static constexpr index_t dswrite_kIter
Definition mx_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:163
remove_cvref_t< typename Problem::BDataType > BDataType
Definition mx_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:55
static constexpr index_t dsread_num_perK
Definition mx_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:147
static constexpr index_t Bload_num_perK
Definition mx_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:153
remove_cvref_t< typename BlockGemmShape::WarpTile > WarpTile
Definition mx_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:107
static CK_TILE_HOST_DEVICE constexpr auto HotLoopScheduler()
Definition mx_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:252
remove_cvref_t< decltype(config.template at< 0 >())> WG
Definition mx_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:72
static constexpr index_t HalfMIter
Definition mx_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:158
static constexpr index_t NXdlPack
Definition mx_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:126
static CK_TILE_HOST_DEVICE constexpr auto Last2ndHotLoopScheduler()
Definition mx_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:395
static constexpr index_t GetVectorSizeA()
Definition mx_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:87
static constexpr index_t BPackedSize
Definition mx_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:123
static constexpr index_t MWarp
Definition mx_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:109
static CK_TILE_HOST_DEVICE constexpr auto LastHotLoopScheduler()
Definition mx_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:447
static constexpr index_t ScaleBload_num
Definition mx_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:155
static constexpr bool kPadN
Definition mx_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:92
static constexpr index_t dswrite_rep
Definition mx_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:149
static constexpr index_t NumWaveGroups
Definition mx_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:96
static constexpr index_t NFlatPerBlockPerIter
Definition mx_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:117
remove_cvref_t< typename Problem::CLayout > CLayout
Definition mx_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:64
static constexpr index_t mfma_per_wg
Definition mx_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:139
static constexpr bool kPadK
Definition mx_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:93
static constexpr auto I1
Definition mx_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:100
static constexpr index_t WaveSize
Definition mx_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:78
static constexpr index_t flatKPerWarp
Definition mx_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:84
static constexpr index_t NWarp
Definition mx_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:110
static constexpr index_t MIterPerWarp
Definition mx_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:112
static constexpr index_t NIterPerWarp
Definition mx_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:113
static constexpr auto I0
Definition mx_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:99
static constexpr index_t MPerBlockPerIter
Definition mx_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:119
CK_TILE_HOST_DEVICE auto operator()(ADramBlockWindowTmp a_copy_dram_window, const AElementFunction &a_element_func, const BFlatBlockWindowTmp &b_flat_dram_block_window_tmp, const ScaleADramBlockWindowTmp &scale_a_window, const ScaleBDramBlockWindowTmp &scale_b_window, index_t num_loop, void *p_smem_ping, void *p_smem_pong) const
Definition mx_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:477
static constexpr index_t KXdlPack
Definition mx_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:127
remove_cvref_t< typename Problem::ADataType > ADataType
Definition mx_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:54
static constexpr index_t ScaleBload_K1
Definition mx_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:154
static constexpr bool kPadM
Definition mx_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:91
static constexpr index_t MXdlPack
Definition mx_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:125
static constexpr index_t KFlatPerBlockPerIter
Definition mx_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:116
static CK_TILE_HOST_DEVICE constexpr auto GetADramTileDistribution()
Definition mx_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:467
static constexpr index_t kKPerBlock
Definition mx_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:82
remove_cvref_t< typename Problem::CDataType > CDataType
Definition mx_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:56
static constexpr auto I2
Definition mx_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:101
static constexpr bool HasHotLoop
Definition mx_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:136
static constexpr index_t kNPerBlock
Definition mx_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:81
remove_cvref_t< typename Problem::BLayout > BLayout
Definition mx_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:63
remove_cvref_t< typename BlockGemmShape::BlockTile > BlockTile
Definition mx_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:105
remove_cvref_t< typename Problem::BlockGemmShape > BlockGemmShape
Definition mx_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:57
static constexpr index_t AK1
Definition mx_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:129
static constexpr index_t m_preload
Definition mx_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:132
static constexpr auto TailNum
Definition mx_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:137
remove_cvref_t< typename Problem::ALayout > ALayout
Definition mx_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:62
static constexpr bool UsePersistentKernel
Definition mx_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:97
static constexpr index_t dswrite_mIter
Definition mx_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:162
static constexpr index_t Aload_rep
Definition mx_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:151
static constexpr index_t flatNPerWarp
Definition mx_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:85
static constexpr index_t DsReadPreload
Definition mx_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:75
static constexpr index_t Bload_rep
Definition mx_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:159
static constexpr index_t Aload_num_perK
Definition mx_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:150
static constexpr bool DoubleSmemBuffer
Definition mx_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:166
static constexpr auto idxN
Definition mx_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:103
static constexpr index_t dswrite_num_perK
Definition mx_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:148
Definition mx_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:32
BlockGemmShape_ BlockGemmShape
Definition mx_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:33
static constexpr int ScaleGranularityK
Definition mx_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:39
static constexpr int KXdlPack
Definition mx_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:44
static constexpr index_t flatNPerWarp
Definition mx_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:37
static constexpr int MXdlPack
Definition mx_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:42
static constexpr int NXdlPack
Definition mx_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:43
static constexpr index_t flatKPerWarp
Definition mx_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:46
static constexpr int ContinuousKPerThread
Definition mx_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:41
static constexpr int PackedSize
Definition tile/core/numeric/numeric.hpp:82
Definition tile/core/container/sequence.hpp:49
Definition tile/core/utility/functional.hpp:43
Definition tile/core/utility/debug.hpp:67