tensor_partition.hpp File Reference

tensor_partition.hpp File Reference#

Composable Kernel: tensor_partition.hpp File Reference
tensor_partition.hpp File Reference

Go to the source code of this file.

Functions

template<typename TensorType, typename ThreadShape, typename ThreadUnrolledDesc, typename ProjectionTuple>
__host__ __device__ constexpr auto make_local_partition (TensorType &tensor, const Layout< ThreadShape, ThreadUnrolledDesc > &thread_layout, const index_t thread_id, const ProjectionTuple &projection)
 Create local partition for thread (At now only packed partition is supported).
template<typename TensorType, typename ThreadShape, typename ThreadUnrolledDesc>
__host__ __device__ constexpr auto make_local_partition (TensorType &tensor, const Layout< ThreadShape, ThreadUnrolledDesc > &thread_lengths, const index_t thread_id)
 Create local partition for thread (At now only packed partition is supported).
template<typename TensorType, typename BlockShapeTuple, typename BlockIdxs, typename ProjectionTuple>
__host__ __device__ constexpr auto make_local_tile (const TensorType &tensor, const BlockShapeTuple &tile_shape, const BlockIdxs &block_idxs, const ProjectionTuple &projection)
 Create local tile for thread block. (At now only packed tile is supported).
template<typename TensorType, typename BlockShapeTuple, typename BlockIdxs>
__host__ __device__ constexpr auto make_local_tile (const TensorType &tensor, const BlockShapeTuple &tile_shape, const BlockIdxs &block_idxs)
 Create local tile for thread block. (At now only packed tile is supported).

Function Documentation

◆ make_local_partition() [1/2]

template<typename TensorType, typename ThreadShape, typename ThreadUnrolledDesc, typename ProjectionTuple>
__host__ __device__ constexpr auto make_local_partition ( TensorType & tensor,
const Layout< ThreadShape, ThreadUnrolledDesc > & thread_layout,
const index_t thread_id,
const ProjectionTuple & projection )
constexpr

Create local partition for thread (At now only packed partition is supported).

Parameters
tensorTensor for partition.
thread_layoutLayout of threads (could not be transformed).
thread_idThread index represented as integer.
projectionProjection is used to remove selected dim from partitioning. Use slice(X) to remove dimension, where X is dim size. Use Number<1>{} to keep it.
Returns
Partition tensor.

◆ make_local_partition() [2/2]

template<typename TensorType, typename ThreadShape, typename ThreadUnrolledDesc>
__host__ __device__ constexpr auto make_local_partition ( TensorType & tensor,
const Layout< ThreadShape, ThreadUnrolledDesc > & thread_lengths,
const index_t thread_id )
constexpr

Create local partition for thread (At now only packed partition is supported).

Parameters
tensorTensor for partition.
thread_lengthsLayout of threads (could not be nested).
thread_idThread index represented as integer.
Returns
Partition tensor.

◆ make_local_tile() [1/2]

template<typename TensorType, typename BlockShapeTuple, typename BlockIdxs>
__host__ __device__ constexpr auto make_local_tile ( const TensorType & tensor,
const BlockShapeTuple & tile_shape,
const BlockIdxs & block_idxs )
constexpr

Create local tile for thread block. (At now only packed tile is supported).

Note
Currently to get the best performance please use 2d shape.
Parameters
tensorTensor for partition.
tile_shapeShapes of requested tile.
block_idxsTuple of block indexes represented as integer. If slice, then get whole dim.
Returns
Tile tensor.

◆ make_local_tile() [2/2]

template<typename TensorType, typename BlockShapeTuple, typename BlockIdxs, typename ProjectionTuple>
__host__ __device__ constexpr auto make_local_tile ( const TensorType & tensor,
const BlockShapeTuple & tile_shape,
const BlockIdxs & block_idxs,
const ProjectionTuple & projection )
constexpr

Create local tile for thread block. (At now only packed tile is supported).

Note
Temporary to gain the best performance use 2d tile_shape.
Parameters
tensorTensor for partition.
tile_shapeShapes of requested tile.
block_idxsTuple of block indexes represented as integer. If slice, then get whole dim.
projectionProjection is used to remove selected dim from partitioning. Use slice(X) to remove dimension, where X is dim size. Use Number<1>{} to keep it.
Returns
Tile tensor.