block_reduce.hpp File Reference

block_reduce.hpp File Reference#

Composable Kernel: block_reduce.hpp File Reference
block_reduce.hpp File Reference
#include "ck_tile/core.hpp"
#include <tuple>

Go to the source code of this file.

Classes

struct  ck_tile::BlockReduce2D< InDistributedTensor_ >

Namespaces

namespace  ck_tile

Functions

template<typename AccDistributedTensor_, typename ReduceFunc, bool WithBroadcast = true, bool CrossWarp = true>
CK_TILE_DEVICE void ck_tile::block_tile_reduce_sync (AccDistributedTensor_ &acc_tensor, const ReduceFunc &reduce_func, bool_constant< WithBroadcast >={}, bool_constant< CrossWarp >={})
template<typename AccDistributedTensor_, typename ReduceFunc>
CK_TILE_DEVICE void ck_tile::block_tile_reduce_xor_sync (AccDistributedTensor_ &acc_tensor, const ReduceFunc &reduce_func)
template<typename AccDistributedTensor_, typename InDistributedTensor_, index_t... InReduceDims, typename ReduceFunc>
CK_TILE_DEVICE void ck_tile::block_tile_reduce (AccDistributedTensor_ &acc_tensor, const InDistributedTensor_ &in_tensor, sequence< InReduceDims... >, const ReduceFunc &reduce_func)
template<typename AccDataType_, typename InDistributedTensor_, index_t... InReduceDims, typename ReduceFunc, typename InDataType_>
CK_TILE_DEVICE auto ck_tile::block_tile_reduce (const InDistributedTensor_ &in_tensor, sequence< InReduceDims... > in_reduce_dims, const ReduceFunc &reduce_func, const InDataType_ &reduce_init)
template<typename T>
CK_TILE_HOST_DEVICE_EXTERN ck_tile::BlockReduce2D (const T &, const typename T::DataType &) -> BlockReduce2D< T >