#include <amd_xdlops.hpp>
|
| template<class FloatC> |
| static __device__ void | Run (const f8x32_t ®_a, const int32_t &scale_a, const f8x32_t ®_b, const int32_t &scale_b, FloatC ®_c) |
| template<class FloatC> |
| static __device__ void | Run (const bf8x32_t ®_a, const int32_t &scale_a, const bf8x32_t ®_b, const int32_t &scale_b, FloatC ®_c) |
| template<class FloatC> |
| static __device__ void | Run (const bf8x32_t ®_a, const int32_t &scale_a, const f8x32_t ®_b, const int32_t &scale_b, FloatC ®_c) |
| template<class FloatC> |
| static __device__ void | Run (const f6x32_t ®_a, const int32_t scale_a, const f6x32_t ®_b, const int32_t scale_b, FloatC ®_c) |
| template<class FloatC> |
| static __device__ void | Run (const bf6x32_t ®_a, const int32_t scale_a, const bf6x32_t ®_b, const int32_t scale_b, FloatC ®_c) |
| template<class FloatC> |
| static __device__ void | Run (const f4x32_t ®_a, const int32_t scale_a, const f4x32_t ®_b, const int32_t scale_b, FloatC ®_c) |
◆ Run() [1/6]
◆ Run() [2/6]
◆ Run() [3/6]
◆ Run() [4/6]
◆ Run() [5/6]
◆ Run() [6/6]
The documentation for this struct was generated from the following file: