GridwiseTensorRearrange< InputGridDesc, InputDataType, OutputGridDesc, OutputDataType, BlockSize, MPerBlock, KPerBlock, ThreadClusterLengths, ScalarPerVector, DstInMemOp, Block2ETileMap, ComputePtrOffsetOfStridedBatch > Struct Template Reference

GridwiseTensorRearrange&lt; InputGridDesc, InputDataType, OutputGridDesc, OutputDataType, BlockSize, MPerBlock, KPerBlock, ThreadClusterLengths, ScalarPerVector, DstInMemOp, Block2ETileMap, ComputePtrOffsetOfStridedBatch &gt; Struct Template Reference#

Composable Kernel: ck::GridwiseTensorRearrange< InputGridDesc, InputDataType, OutputGridDesc, OutputDataType, BlockSize, MPerBlock, KPerBlock, ThreadClusterLengths, ScalarPerVector, DstInMemOp, Block2ETileMap, ComputePtrOffsetOfStridedBatch > Struct Template Reference
ck::GridwiseTensorRearrange< InputGridDesc, InputDataType, OutputGridDesc, OutputDataType, BlockSize, MPerBlock, KPerBlock, ThreadClusterLengths, ScalarPerVector, DstInMemOp, Block2ETileMap, ComputePtrOffsetOfStridedBatch > Struct Template Reference

#include <gridwise_tensor_rearrange.hpp>

Public Types

using ThisThreadBlock = ThisThreadBlock<BlockSize>

Static Public Member Functions

static __device__ void Run (const InputGridDesc &in_grid_desc, const InputDataType *__restrict__ p_in_global, const OutputGridDesc &out_grid_desc, OutputDataType *__restrict__ p_out_global, const index_t batch_count, const Block2ETileMap &block_2_tile_map, const ComputePtrOffsetOfStridedBatch &compute_ptr_offset_of_batch)
static __host__ constexpr bool CheckValidity (const InputGridDesc &in_grid_desc, const OutputGridDesc &out_grid_desc)

Static Public Attributes

static constexpr auto I0 = Number<0>{}
static constexpr auto I1 = Number<1>{}

Member Typedef Documentation

◆ ThisThreadBlock

template<typename InputGridDesc, typename InputDataType, typename OutputGridDesc, typename OutputDataType, index_t BlockSize, index_t MPerBlock, index_t KPerBlock, typename ThreadClusterLengths, index_t ScalarPerVector, InMemoryDataOperationEnum DstInMemOp, typename Block2ETileMap, typename ComputePtrOffsetOfStridedBatch>
using ck::GridwiseTensorRearrange< InputGridDesc, InputDataType, OutputGridDesc, OutputDataType, BlockSize, MPerBlock, KPerBlock, ThreadClusterLengths, ScalarPerVector, DstInMemOp, Block2ETileMap, ComputePtrOffsetOfStridedBatch >::ThisThreadBlock = ThisThreadBlock<BlockSize>

Member Function Documentation

◆ CheckValidity()

template<typename InputGridDesc, typename InputDataType, typename OutputGridDesc, typename OutputDataType, index_t BlockSize, index_t MPerBlock, index_t KPerBlock, typename ThreadClusterLengths, index_t ScalarPerVector, InMemoryDataOperationEnum DstInMemOp, typename Block2ETileMap, typename ComputePtrOffsetOfStridedBatch>
__host__ constexpr bool ck::GridwiseTensorRearrange< InputGridDesc, InputDataType, OutputGridDesc, OutputDataType, BlockSize, MPerBlock, KPerBlock, ThreadClusterLengths, ScalarPerVector, DstInMemOp, Block2ETileMap, ComputePtrOffsetOfStridedBatch >::CheckValidity ( const InputGridDesc & in_grid_desc,
const OutputGridDesc & out_grid_desc )
inlinestaticconstexpr

◆ Run()

template<typename InputGridDesc, typename InputDataType, typename OutputGridDesc, typename OutputDataType, index_t BlockSize, index_t MPerBlock, index_t KPerBlock, typename ThreadClusterLengths, index_t ScalarPerVector, InMemoryDataOperationEnum DstInMemOp, typename Block2ETileMap, typename ComputePtrOffsetOfStridedBatch>
__device__ void ck::GridwiseTensorRearrange< InputGridDesc, InputDataType, OutputGridDesc, OutputDataType, BlockSize, MPerBlock, KPerBlock, ThreadClusterLengths, ScalarPerVector, DstInMemOp, Block2ETileMap, ComputePtrOffsetOfStridedBatch >::Run ( const InputGridDesc & in_grid_desc,
const InputDataType *__restrict__ p_in_global,
const OutputGridDesc & out_grid_desc,
OutputDataType *__restrict__ p_out_global,
const index_t batch_count,
const Block2ETileMap & block_2_tile_map,
const ComputePtrOffsetOfStridedBatch & compute_ptr_offset_of_batch )
inlinestatic

Member Data Documentation

◆ I0

template<typename InputGridDesc, typename InputDataType, typename OutputGridDesc, typename OutputDataType, index_t BlockSize, index_t MPerBlock, index_t KPerBlock, typename ThreadClusterLengths, index_t ScalarPerVector, InMemoryDataOperationEnum DstInMemOp, typename Block2ETileMap, typename ComputePtrOffsetOfStridedBatch>
auto ck::GridwiseTensorRearrange< InputGridDesc, InputDataType, OutputGridDesc, OutputDataType, BlockSize, MPerBlock, KPerBlock, ThreadClusterLengths, ScalarPerVector, DstInMemOp, Block2ETileMap, ComputePtrOffsetOfStridedBatch >::I0 = Number<0>{}
staticconstexpr

◆ I1

template<typename InputGridDesc, typename InputDataType, typename OutputGridDesc, typename OutputDataType, index_t BlockSize, index_t MPerBlock, index_t KPerBlock, typename ThreadClusterLengths, index_t ScalarPerVector, InMemoryDataOperationEnum DstInMemOp, typename Block2ETileMap, typename ComputePtrOffsetOfStridedBatch>
auto ck::GridwiseTensorRearrange< InputGridDesc, InputDataType, OutputGridDesc, OutputDataType, BlockSize, MPerBlock, KPerBlock, ThreadClusterLengths, ScalarPerVector, DstInMemOp, Block2ETileMap, ComputePtrOffsetOfStridedBatch >::I1 = Number<1>{}
staticconstexpr

The documentation for this struct was generated from the following file: