naive_attention_fwd_kernel< QType, KType, VType, OType, AccType, KVScaleType, QLayout, KLayout, VLayout, OLayout, KScaleLayout, VScaleLayout, Traits > Struct Template Reference

naive_attention_fwd_kernel&lt; QType, KType, VType, OType, AccType, KVScaleType, QLayout, KLayout, VLayout, OLayout, KScaleLayout, VScaleLayout, Traits &gt; Struct Template Reference#

Composable Kernel: ck_tile::naive_attention_fwd_kernel< QType, KType, VType, OType, AccType, KVScaleType, QLayout, KLayout, VLayout, OLayout, KScaleLayout, VScaleLayout, Traits > Struct Template Reference
ck_tile::naive_attention_fwd_kernel< QType, KType, VType, OType, AccType, KVScaleType, QLayout, KLayout, VLayout, OLayout, KScaleLayout, VScaleLayout, Traits > Struct Template Reference

#include <naive_attention.hpp>

Classes

struct  scale_max
struct  scale_max< int8_t >
struct  scale_max< fp8_t >
struct  addresser
struct  page_addresser
struct  kvscale_addresser

Public Types

using SoftmaxType = float
using QuantComputeType = float
using QCompute = KType
using PType = VType
using OAccType = float
using p_vec_type = ext_vector_t<PType, 16 / sizeof(PType)>

Public Member Functions

__host__ __device__ naive_attention_fwd_kernel ()
template<typename T, typename F>
__device__ constexpr T wave_reduce (T local, F reduce_f)
template<typename T, typename F>
__device__ constexpr T cross_wave_reduce (T local, F reduce_f, T *smem)
__device__ void operator() (naive_attention_fwd_args args)

Static Public Member Functions

__device__ static __host__ constexpr int get_block_size ()
static __host__ dim3 get_grid_size (naive_attention_fwd_args args)

Static Public Attributes

static constexpr bool is_kvcache_i8
static constexpr bool is_kvcache_fp8
static constexpr int v_per_token_quant_group_size = 64
static constexpr int kBlockSize = 256
static constexpr int p_vec_elem = vector_traits<p_vec_type>::vector_size

Member Typedef Documentation

◆ OAccType

template<typename QType, typename KType, typename VType, typename OType, typename AccType, typename KVScaleType, naive_attention_layout_enum QLayout, naive_attention_layout_enum KLayout, naive_attention_layout_enum VLayout, naive_attention_layout_enum OLayout, naive_attention_layout_enum KScaleLayout, naive_attention_layout_enum VScaleLayout, typename Traits>
using ck_tile::naive_attention_fwd_kernel< QType, KType, VType, OType, AccType, KVScaleType, QLayout, KLayout, VLayout, OLayout, KScaleLayout, VScaleLayout, Traits >::OAccType = float

◆ p_vec_type

template<typename QType, typename KType, typename VType, typename OType, typename AccType, typename KVScaleType, naive_attention_layout_enum QLayout, naive_attention_layout_enum KLayout, naive_attention_layout_enum VLayout, naive_attention_layout_enum OLayout, naive_attention_layout_enum KScaleLayout, naive_attention_layout_enum VScaleLayout, typename Traits>
using ck_tile::naive_attention_fwd_kernel< QType, KType, VType, OType, AccType, KVScaleType, QLayout, KLayout, VLayout, OLayout, KScaleLayout, VScaleLayout, Traits >::p_vec_type = ext_vector_t<PType, 16 / sizeof(PType)>

◆ PType

template<typename QType, typename KType, typename VType, typename OType, typename AccType, typename KVScaleType, naive_attention_layout_enum QLayout, naive_attention_layout_enum KLayout, naive_attention_layout_enum VLayout, naive_attention_layout_enum OLayout, naive_attention_layout_enum KScaleLayout, naive_attention_layout_enum VScaleLayout, typename Traits>
using ck_tile::naive_attention_fwd_kernel< QType, KType, VType, OType, AccType, KVScaleType, QLayout, KLayout, VLayout, OLayout, KScaleLayout, VScaleLayout, Traits >::PType = VType

◆ QCompute

template<typename QType, typename KType, typename VType, typename OType, typename AccType, typename KVScaleType, naive_attention_layout_enum QLayout, naive_attention_layout_enum KLayout, naive_attention_layout_enum VLayout, naive_attention_layout_enum OLayout, naive_attention_layout_enum KScaleLayout, naive_attention_layout_enum VScaleLayout, typename Traits>
using ck_tile::naive_attention_fwd_kernel< QType, KType, VType, OType, AccType, KVScaleType, QLayout, KLayout, VLayout, OLayout, KScaleLayout, VScaleLayout, Traits >::QCompute = KType

◆ QuantComputeType

template<typename QType, typename KType, typename VType, typename OType, typename AccType, typename KVScaleType, naive_attention_layout_enum QLayout, naive_attention_layout_enum KLayout, naive_attention_layout_enum VLayout, naive_attention_layout_enum OLayout, naive_attention_layout_enum KScaleLayout, naive_attention_layout_enum VScaleLayout, typename Traits>
using ck_tile::naive_attention_fwd_kernel< QType, KType, VType, OType, AccType, KVScaleType, QLayout, KLayout, VLayout, OLayout, KScaleLayout, VScaleLayout, Traits >::QuantComputeType = float

◆ SoftmaxType

template<typename QType, typename KType, typename VType, typename OType, typename AccType, typename KVScaleType, naive_attention_layout_enum QLayout, naive_attention_layout_enum KLayout, naive_attention_layout_enum VLayout, naive_attention_layout_enum OLayout, naive_attention_layout_enum KScaleLayout, naive_attention_layout_enum VScaleLayout, typename Traits>
using ck_tile::naive_attention_fwd_kernel< QType, KType, VType, OType, AccType, KVScaleType, QLayout, KLayout, VLayout, OLayout, KScaleLayout, VScaleLayout, Traits >::SoftmaxType = float

Constructor & Destructor Documentation

◆ naive_attention_fwd_kernel()

template<typename QType, typename KType, typename VType, typename OType, typename AccType, typename KVScaleType, naive_attention_layout_enum QLayout, naive_attention_layout_enum KLayout, naive_attention_layout_enum VLayout, naive_attention_layout_enum OLayout, naive_attention_layout_enum KScaleLayout, naive_attention_layout_enum VScaleLayout, typename Traits>
__host__ __device__ ck_tile::naive_attention_fwd_kernel< QType, KType, VType, OType, AccType, KVScaleType, QLayout, KLayout, VLayout, OLayout, KScaleLayout, VScaleLayout, Traits >::naive_attention_fwd_kernel ( )
inline

Member Function Documentation

◆ cross_wave_reduce()

template<typename QType, typename KType, typename VType, typename OType, typename AccType, typename KVScaleType, naive_attention_layout_enum QLayout, naive_attention_layout_enum KLayout, naive_attention_layout_enum VLayout, naive_attention_layout_enum OLayout, naive_attention_layout_enum KScaleLayout, naive_attention_layout_enum VScaleLayout, typename Traits>
template<typename T, typename F>
__device__ constexpr T ck_tile::naive_attention_fwd_kernel< QType, KType, VType, OType, AccType, KVScaleType, QLayout, KLayout, VLayout, OLayout, KScaleLayout, VScaleLayout, Traits >::cross_wave_reduce ( T local,
F reduce_f,
T * smem )
inlineconstexpr

◆ get_block_size()

template<typename QType, typename KType, typename VType, typename OType, typename AccType, typename KVScaleType, naive_attention_layout_enum QLayout, naive_attention_layout_enum KLayout, naive_attention_layout_enum VLayout, naive_attention_layout_enum OLayout, naive_attention_layout_enum KScaleLayout, naive_attention_layout_enum VScaleLayout, typename Traits>
__device__ static __host__ constexpr int ck_tile::naive_attention_fwd_kernel< QType, KType, VType, OType, AccType, KVScaleType, QLayout, KLayout, VLayout, OLayout, KScaleLayout, VScaleLayout, Traits >::get_block_size ( )
inlinestaticconstexpr

◆ get_grid_size()

template<typename QType, typename KType, typename VType, typename OType, typename AccType, typename KVScaleType, naive_attention_layout_enum QLayout, naive_attention_layout_enum KLayout, naive_attention_layout_enum VLayout, naive_attention_layout_enum OLayout, naive_attention_layout_enum KScaleLayout, naive_attention_layout_enum VScaleLayout, typename Traits>
__host__ dim3 ck_tile::naive_attention_fwd_kernel< QType, KType, VType, OType, AccType, KVScaleType, QLayout, KLayout, VLayout, OLayout, KScaleLayout, VScaleLayout, Traits >::get_grid_size ( naive_attention_fwd_args args)
inlinestatic

◆ operator()()

template<typename QType, typename KType, typename VType, typename OType, typename AccType, typename KVScaleType, naive_attention_layout_enum QLayout, naive_attention_layout_enum KLayout, naive_attention_layout_enum VLayout, naive_attention_layout_enum OLayout, naive_attention_layout_enum KScaleLayout, naive_attention_layout_enum VScaleLayout, typename Traits>
__device__ void ck_tile::naive_attention_fwd_kernel< QType, KType, VType, OType, AccType, KVScaleType, QLayout, KLayout, VLayout, OLayout, KScaleLayout, VScaleLayout, Traits >::operator() ( naive_attention_fwd_args args)
inline

◆ wave_reduce()

template<typename QType, typename KType, typename VType, typename OType, typename AccType, typename KVScaleType, naive_attention_layout_enum QLayout, naive_attention_layout_enum KLayout, naive_attention_layout_enum VLayout, naive_attention_layout_enum OLayout, naive_attention_layout_enum KScaleLayout, naive_attention_layout_enum VScaleLayout, typename Traits>
template<typename T, typename F>
__device__ constexpr T ck_tile::naive_attention_fwd_kernel< QType, KType, VType, OType, AccType, KVScaleType, QLayout, KLayout, VLayout, OLayout, KScaleLayout, VScaleLayout, Traits >::wave_reduce ( T local,
F reduce_f )
inlineconstexpr

Member Data Documentation

◆ is_kvcache_fp8

template<typename QType, typename KType, typename VType, typename OType, typename AccType, typename KVScaleType, naive_attention_layout_enum QLayout, naive_attention_layout_enum KLayout, naive_attention_layout_enum VLayout, naive_attention_layout_enum OLayout, naive_attention_layout_enum KScaleLayout, naive_attention_layout_enum VScaleLayout, typename Traits>
bool ck_tile::naive_attention_fwd_kernel< QType, KType, VType, OType, AccType, KVScaleType, QLayout, KLayout, VLayout, OLayout, KScaleLayout, VScaleLayout, Traits >::is_kvcache_fp8
staticconstexpr
Initial value:
=
std::is_same_v<KType, fp8_t> && std::is_same_v<VType, fp8_t>

◆ is_kvcache_i8

template<typename QType, typename KType, typename VType, typename OType, typename AccType, typename KVScaleType, naive_attention_layout_enum QLayout, naive_attention_layout_enum KLayout, naive_attention_layout_enum VLayout, naive_attention_layout_enum OLayout, naive_attention_layout_enum KScaleLayout, naive_attention_layout_enum VScaleLayout, typename Traits>
bool ck_tile::naive_attention_fwd_kernel< QType, KType, VType, OType, AccType, KVScaleType, QLayout, KLayout, VLayout, OLayout, KScaleLayout, VScaleLayout, Traits >::is_kvcache_i8
staticconstexpr
Initial value:
=
std::is_same_v<KType, int8_t> && std::is_same_v<VType, int8_t>

◆ kBlockSize

template<typename QType, typename KType, typename VType, typename OType, typename AccType, typename KVScaleType, naive_attention_layout_enum QLayout, naive_attention_layout_enum KLayout, naive_attention_layout_enum VLayout, naive_attention_layout_enum OLayout, naive_attention_layout_enum KScaleLayout, naive_attention_layout_enum VScaleLayout, typename Traits>
int ck_tile::naive_attention_fwd_kernel< QType, KType, VType, OType, AccType, KVScaleType, QLayout, KLayout, VLayout, OLayout, KScaleLayout, VScaleLayout, Traits >::kBlockSize = 256
staticconstexpr

◆ p_vec_elem

template<typename QType, typename KType, typename VType, typename OType, typename AccType, typename KVScaleType, naive_attention_layout_enum QLayout, naive_attention_layout_enum KLayout, naive_attention_layout_enum VLayout, naive_attention_layout_enum OLayout, naive_attention_layout_enum KScaleLayout, naive_attention_layout_enum VScaleLayout, typename Traits>
int ck_tile::naive_attention_fwd_kernel< QType, KType, VType, OType, AccType, KVScaleType, QLayout, KLayout, VLayout, OLayout, KScaleLayout, VScaleLayout, Traits >::p_vec_elem = vector_traits<p_vec_type>::vector_size
staticconstexpr

◆ v_per_token_quant_group_size

template<typename QType, typename KType, typename VType, typename OType, typename AccType, typename KVScaleType, naive_attention_layout_enum QLayout, naive_attention_layout_enum KLayout, naive_attention_layout_enum VLayout, naive_attention_layout_enum OLayout, naive_attention_layout_enum KScaleLayout, naive_attention_layout_enum VScaleLayout, typename Traits>
int ck_tile::naive_attention_fwd_kernel< QType, KType, VType, OType, AccType, KVScaleType, QLayout, KLayout, VLayout, OLayout, KScaleLayout, VScaleLayout, Traits >::v_per_token_quant_group_size = 64
staticconstexpr

The documentation for this struct was generated from the following file: