intrin_mfma_scale_f32_16x16x128f8f6f4< 16, 16, OpselA, OpselB > Struct Template Reference

intrin_mfma_scale_f32_16x16x128f8f6f4&lt; 16, 16, OpselA, OpselB &gt; Struct Template Reference#

Composable Kernel: ck::intrin_mfma_scale_f32_16x16x128f8f6f4< 16, 16, OpselA, OpselB > Struct Template Reference
ck::intrin_mfma_scale_f32_16x16x128f8f6f4< 16, 16, OpselA, OpselB > Struct Template Reference

#include <amd_xdlops.hpp>

Static Public Member Functions

template<class FloatC>
static __device__ void Run (const f8x32_t &reg_a, const int32_t &scale_a, const f8x32_t &reg_b, const int32_t &scale_b, FloatC &reg_c)
template<class FloatC>
static __device__ void Run (const bf8x32_t &reg_a, const int32_t &scale_a, const bf8x32_t &reg_b, const int32_t &scale_b, FloatC &reg_c)
template<class FloatC>
static __device__ void Run (const f8x32_t &reg_a, const int32_t &scale_a, const bf8x32_t &reg_b, const int32_t &scale_b, FloatC &reg_c)
template<class FloatC>
static __device__ void Run (const bf8x32_t &reg_a, const int32_t &scale_a, const f8x32_t &reg_b, const int32_t &scale_b, FloatC &reg_c)
template<class FloatC>
static __device__ void Run (const f6x32_t &reg_a, const int32_t scale_a, const f6x32_t &reg_b, const int32_t scale_b, FloatC &reg_c)
template<class FloatC>
static __device__ void Run (const f6x16x2_t &reg_a, const int32_t scale_a, const f6x16x2_t &reg_b, const int32_t scale_b, FloatC &reg_c)
template<class FloatC>
static __device__ void Run (const bf6x32_t &reg_a, const int32_t scale_a, const bf6x32_t &reg_b, const int32_t scale_b, FloatC &reg_c)
template<class FloatC>
static __device__ void Run (const bf6x16x2_t &reg_a, const int32_t scale_a, const bf6x16x2_t &reg_b, const int32_t scale_b, FloatC &reg_c)
template<class FloatC>
static __device__ void Run (const f4x32_t &reg_a, const int32_t scale_a, const f4x32_t &reg_b, const int32_t scale_b, FloatC &reg_c)

Member Function Documentation

◆ Run() [1/9]

template<index_t OpselA, index_t OpselB>
template<class FloatC>
__device__ void ck::intrin_mfma_scale_f32_16x16x128f8f6f4< 16, 16, OpselA, OpselB >::Run ( const bf6x16x2_t & reg_a,
const int32_t scale_a,
const bf6x16x2_t & reg_b,
const int32_t scale_b,
FloatC & reg_c )
inlinestatic

◆ Run() [2/9]

template<index_t OpselA, index_t OpselB>
template<class FloatC>
__device__ void ck::intrin_mfma_scale_f32_16x16x128f8f6f4< 16, 16, OpselA, OpselB >::Run ( const bf6x32_t & reg_a,
const int32_t scale_a,
const bf6x32_t & reg_b,
const int32_t scale_b,
FloatC & reg_c )
inlinestatic

◆ Run() [3/9]

template<index_t OpselA, index_t OpselB>
template<class FloatC>
__device__ void ck::intrin_mfma_scale_f32_16x16x128f8f6f4< 16, 16, OpselA, OpselB >::Run ( const bf8x32_t & reg_a,
const int32_t & scale_a,
const bf8x32_t & reg_b,
const int32_t & scale_b,
FloatC & reg_c )
inlinestatic

◆ Run() [4/9]

template<index_t OpselA, index_t OpselB>
template<class FloatC>
__device__ void ck::intrin_mfma_scale_f32_16x16x128f8f6f4< 16, 16, OpselA, OpselB >::Run ( const bf8x32_t & reg_a,
const int32_t & scale_a,
const f8x32_t & reg_b,
const int32_t & scale_b,
FloatC & reg_c )
inlinestatic

◆ Run() [5/9]

template<index_t OpselA, index_t OpselB>
template<class FloatC>
__device__ void ck::intrin_mfma_scale_f32_16x16x128f8f6f4< 16, 16, OpselA, OpselB >::Run ( const f4x32_t & reg_a,
const int32_t scale_a,
const f4x32_t & reg_b,
const int32_t scale_b,
FloatC & reg_c )
inlinestatic

◆ Run() [6/9]

template<index_t OpselA, index_t OpselB>
template<class FloatC>
__device__ void ck::intrin_mfma_scale_f32_16x16x128f8f6f4< 16, 16, OpselA, OpselB >::Run ( const f6x16x2_t & reg_a,
const int32_t scale_a,
const f6x16x2_t & reg_b,
const int32_t scale_b,
FloatC & reg_c )
inlinestatic

◆ Run() [7/9]

template<index_t OpselA, index_t OpselB>
template<class FloatC>
__device__ void ck::intrin_mfma_scale_f32_16x16x128f8f6f4< 16, 16, OpselA, OpselB >::Run ( const f6x32_t & reg_a,
const int32_t scale_a,
const f6x32_t & reg_b,
const int32_t scale_b,
FloatC & reg_c )
inlinestatic

◆ Run() [8/9]

template<index_t OpselA, index_t OpselB>
template<class FloatC>
__device__ void ck::intrin_mfma_scale_f32_16x16x128f8f6f4< 16, 16, OpselA, OpselB >::Run ( const f8x32_t & reg_a,
const int32_t & scale_a,
const bf8x32_t & reg_b,
const int32_t & scale_b,
FloatC & reg_c )
inlinestatic

◆ Run() [9/9]

template<index_t OpselA, index_t OpselB>
template<class FloatC>
__device__ void ck::intrin_mfma_scale_f32_16x16x128f8f6f4< 16, 16, OpselA, OpselB >::Run ( const f8x32_t & reg_a,
const int32_t & scale_a,
const f8x32_t & reg_b,
const int32_t & scale_b,
FloatC & reg_c )
inlinestatic

The documentation for this struct was generated from the following file: