ThreadwiseTensorSliceTransfer_v3r1_dequant< SliceLengths, ScaleSliceLengths, SrcElementwiseOperation, ScaleElementwiseOperation, DstElementwiseOperation, DstInMemOp, SrcData, ScaleData, DstData, SrcDesc, ScaleDesc, DstDesc, SrcDimAccessOrder, DstDimAccessOrder, SrcVectorDim, DstVectorDim, SrcScalarPerVector, ScaleScalarPerVector, DstScalarPerVector, SrcScalarStrideInVector, ScaleScalarStrideInVector, DstScalarStrideInVector, SrcResetCoordinateAfterRun, DstResetCoordinateAfterRun, NumThreadScratch > Struct Template Reference

ThreadwiseTensorSliceTransfer_v3r1_dequant&lt; SliceLengths, ScaleSliceLengths, SrcElementwiseOperation, ScaleElementwiseOperation, DstElementwiseOperation, DstInMemOp, SrcData, ScaleData, DstData, SrcDesc, ScaleDesc, DstDesc, SrcDimAccessOrder, DstDimAccessOrder, SrcVectorDim, DstVectorDim, SrcScalarPerVector, ScaleScalarPerVector, DstScalarPerVector, SrcScalarStrideInVector, ScaleScalarStrideInVector, DstScalarStrideInVector, SrcResetCoordinateAfterRun, DstResetCoordinateAfterRun, NumThreadScratch &gt; Struct Template Reference#

Composable Kernel: ck::ThreadwiseTensorSliceTransfer_v3r1_dequant< SliceLengths, ScaleSliceLengths, SrcElementwiseOperation, ScaleElementwiseOperation, DstElementwiseOperation, DstInMemOp, SrcData, ScaleData, DstData, SrcDesc, ScaleDesc, DstDesc, SrcDimAccessOrder, DstDimAccessOrder, SrcVectorDim, DstVectorDim, SrcScalarPerVector, ScaleScalarPerVector, DstScalarPerVector, SrcScalarStrideInVector, ScaleScalarStrideInVector, DstScalarStrideInVector, SrcResetCoordinateAfterRun, DstResetCoordinateAfterRun, NumThreadScratch > Struct Template Reference
ck::ThreadwiseTensorSliceTransfer_v3r1_dequant< SliceLengths, ScaleSliceLengths, SrcElementwiseOperation, ScaleElementwiseOperation, DstElementwiseOperation, DstInMemOp, SrcData, ScaleData, DstData, SrcDesc, ScaleDesc, DstDesc, SrcDimAccessOrder, DstDimAccessOrder, SrcVectorDim, DstVectorDim, SrcScalarPerVector, ScaleScalarPerVector, DstScalarPerVector, SrcScalarStrideInVector, ScaleScalarStrideInVector, DstScalarStrideInVector, SrcResetCoordinateAfterRun, DstResetCoordinateAfterRun, NumThreadScratch > Struct Template Reference

#include <threadwise_tensor_slice_transfer_v3r1_dequant.hpp>

Public Types

using Index = MultiIndex<nDim>
using SrcCoord = decltype(make_tensor_coordinate(SrcDesc{}, Index{}))
using ScaleCoord = decltype(make_tensor_coordinate(SrcDesc{}, Index{}))
using DstCoord = decltype(make_tensor_coordinate(DstDesc{}, Index{}))

Public Member Functions

__device__ constexpr ThreadwiseTensorSliceTransfer_v3r1_dequant (const SrcDesc &src_desc, const Index &src_slice_origin, const SrcElementwiseOperation &src_element_op, const ScaleDesc &scale_desc, const Index &scale_slice_origin, const ScaleElementwiseOperation &scale_element_op, const DstDesc &dst_desc, const Index &dst_slice_origin, const DstElementwiseOperation &dst_element_op)
__device__ void SetSrcSliceOrigin (const SrcDesc &src_desc, const Index &src_slice_origin_idx)
__device__ void SetScaleSliceOrigin (const ScaleDesc &scale_desc, const Index &scale_slice_origin_idx)
__device__ void SetDstSliceOrigin (const DstDesc &dst_desc, const Index &dst_slice_origin_idx)
template<typename SrcBuffer, index_t ThreadScratchId = 0>
__device__ void RunRead (const SrcDesc &src_desc, const SrcBuffer &src_buf, Number< ThreadScratchId > thread_scratch_id=Number< ThreadScratchId >{})
template<typename ScaleBuffer>
__device__ void RunScaleRead (const ScaleDesc &scale_desc, const ScaleBuffer &scale_buf)
template<index_t ThreadScratchId>
__device__ void TransferDataFromSrcThreadScratchToDstThreadScratch (Number< ThreadScratchId > thread_scratch_id)
template<typename DstBuffer, index_t ThreadScratchId = 0>
__device__ void RunWrite (const DstDesc &dst_desc, DstBuffer &dst_buf, Number< ThreadScratchId > thread_scratch_id=Number< ThreadScratchId >{})
__device__ void MoveSrcSliceWindow (const SrcDesc &src_desc, const Index &src_slice_origin_step_idx)
__device__ void MoveDstSliceWindow (const DstDesc &dst_desc, const Index &dst_slice_origin_step_idx)

Static Public Member Functions

static __device__ constexpr auto GetSrcCoordinateResetStep ()
static __device__ constexpr auto GetDstCoordinateResetStep ()
static __device__ constexpr auto GetSrcThreadScratchDescriptor ()
static __device__ constexpr auto GetScaleThreadScratchDescriptor ()
static __device__ constexpr auto GetDstThreadScratchDescriptor ()

Static Public Attributes

static constexpr index_t nDim = SliceLengths::Size()
static constexpr auto I0 = Number<0>{}

Member Typedef Documentation

◆ DstCoord

template<typename SliceLengths, typename ScaleSliceLengths, typename SrcElementwiseOperation, typename ScaleElementwiseOperation, typename DstElementwiseOperation, InMemoryDataOperationEnum DstInMemOp, typename SrcData, typename ScaleData, typename DstData, typename SrcDesc, typename ScaleDesc, typename DstDesc, typename SrcDimAccessOrder, typename DstDimAccessOrder, index_t SrcVectorDim, index_t DstVectorDim, index_t SrcScalarPerVector, index_t ScaleScalarPerVector, index_t DstScalarPerVector, index_t SrcScalarStrideInVector, index_t ScaleScalarStrideInVector, index_t DstScalarStrideInVector, bool SrcResetCoordinateAfterRun, bool DstResetCoordinateAfterRun, index_t NumThreadScratch = 1>
using ck::ThreadwiseTensorSliceTransfer_v3r1_dequant< SliceLengths, ScaleSliceLengths, SrcElementwiseOperation, ScaleElementwiseOperation, DstElementwiseOperation, DstInMemOp, SrcData, ScaleData, DstData, SrcDesc, ScaleDesc, DstDesc, SrcDimAccessOrder, DstDimAccessOrder, SrcVectorDim, DstVectorDim, SrcScalarPerVector, ScaleScalarPerVector, DstScalarPerVector, SrcScalarStrideInVector, ScaleScalarStrideInVector, DstScalarStrideInVector, SrcResetCoordinateAfterRun, DstResetCoordinateAfterRun, NumThreadScratch >::DstCoord = decltype(make_tensor_coordinate(DstDesc{}, Index{}))

◆ Index

template<typename SliceLengths, typename ScaleSliceLengths, typename SrcElementwiseOperation, typename ScaleElementwiseOperation, typename DstElementwiseOperation, InMemoryDataOperationEnum DstInMemOp, typename SrcData, typename ScaleData, typename DstData, typename SrcDesc, typename ScaleDesc, typename DstDesc, typename SrcDimAccessOrder, typename DstDimAccessOrder, index_t SrcVectorDim, index_t DstVectorDim, index_t SrcScalarPerVector, index_t ScaleScalarPerVector, index_t DstScalarPerVector, index_t SrcScalarStrideInVector, index_t ScaleScalarStrideInVector, index_t DstScalarStrideInVector, bool SrcResetCoordinateAfterRun, bool DstResetCoordinateAfterRun, index_t NumThreadScratch = 1>
using ck::ThreadwiseTensorSliceTransfer_v3r1_dequant< SliceLengths, ScaleSliceLengths, SrcElementwiseOperation, ScaleElementwiseOperation, DstElementwiseOperation, DstInMemOp, SrcData, ScaleData, DstData, SrcDesc, ScaleDesc, DstDesc, SrcDimAccessOrder, DstDimAccessOrder, SrcVectorDim, DstVectorDim, SrcScalarPerVector, ScaleScalarPerVector, DstScalarPerVector, SrcScalarStrideInVector, ScaleScalarStrideInVector, DstScalarStrideInVector, SrcResetCoordinateAfterRun, DstResetCoordinateAfterRun, NumThreadScratch >::Index = MultiIndex<nDim>

◆ ScaleCoord

template<typename SliceLengths, typename ScaleSliceLengths, typename SrcElementwiseOperation, typename ScaleElementwiseOperation, typename DstElementwiseOperation, InMemoryDataOperationEnum DstInMemOp, typename SrcData, typename ScaleData, typename DstData, typename SrcDesc, typename ScaleDesc, typename DstDesc, typename SrcDimAccessOrder, typename DstDimAccessOrder, index_t SrcVectorDim, index_t DstVectorDim, index_t SrcScalarPerVector, index_t ScaleScalarPerVector, index_t DstScalarPerVector, index_t SrcScalarStrideInVector, index_t ScaleScalarStrideInVector, index_t DstScalarStrideInVector, bool SrcResetCoordinateAfterRun, bool DstResetCoordinateAfterRun, index_t NumThreadScratch = 1>
using ck::ThreadwiseTensorSliceTransfer_v3r1_dequant< SliceLengths, ScaleSliceLengths, SrcElementwiseOperation, ScaleElementwiseOperation, DstElementwiseOperation, DstInMemOp, SrcData, ScaleData, DstData, SrcDesc, ScaleDesc, DstDesc, SrcDimAccessOrder, DstDimAccessOrder, SrcVectorDim, DstVectorDim, SrcScalarPerVector, ScaleScalarPerVector, DstScalarPerVector, SrcScalarStrideInVector, ScaleScalarStrideInVector, DstScalarStrideInVector, SrcResetCoordinateAfterRun, DstResetCoordinateAfterRun, NumThreadScratch >::ScaleCoord = decltype(make_tensor_coordinate(SrcDesc{}, Index{}))

◆ SrcCoord

template<typename SliceLengths, typename ScaleSliceLengths, typename SrcElementwiseOperation, typename ScaleElementwiseOperation, typename DstElementwiseOperation, InMemoryDataOperationEnum DstInMemOp, typename SrcData, typename ScaleData, typename DstData, typename SrcDesc, typename ScaleDesc, typename DstDesc, typename SrcDimAccessOrder, typename DstDimAccessOrder, index_t SrcVectorDim, index_t DstVectorDim, index_t SrcScalarPerVector, index_t ScaleScalarPerVector, index_t DstScalarPerVector, index_t SrcScalarStrideInVector, index_t ScaleScalarStrideInVector, index_t DstScalarStrideInVector, bool SrcResetCoordinateAfterRun, bool DstResetCoordinateAfterRun, index_t NumThreadScratch = 1>
using ck::ThreadwiseTensorSliceTransfer_v3r1_dequant< SliceLengths, ScaleSliceLengths, SrcElementwiseOperation, ScaleElementwiseOperation, DstElementwiseOperation, DstInMemOp, SrcData, ScaleData, DstData, SrcDesc, ScaleDesc, DstDesc, SrcDimAccessOrder, DstDimAccessOrder, SrcVectorDim, DstVectorDim, SrcScalarPerVector, ScaleScalarPerVector, DstScalarPerVector, SrcScalarStrideInVector, ScaleScalarStrideInVector, DstScalarStrideInVector, SrcResetCoordinateAfterRun, DstResetCoordinateAfterRun, NumThreadScratch >::SrcCoord = decltype(make_tensor_coordinate(SrcDesc{}, Index{}))

Constructor & Destructor Documentation

◆ ThreadwiseTensorSliceTransfer_v3r1_dequant()

template<typename SliceLengths, typename ScaleSliceLengths, typename SrcElementwiseOperation, typename ScaleElementwiseOperation, typename DstElementwiseOperation, InMemoryDataOperationEnum DstInMemOp, typename SrcData, typename ScaleData, typename DstData, typename SrcDesc, typename ScaleDesc, typename DstDesc, typename SrcDimAccessOrder, typename DstDimAccessOrder, index_t SrcVectorDim, index_t DstVectorDim, index_t SrcScalarPerVector, index_t ScaleScalarPerVector, index_t DstScalarPerVector, index_t SrcScalarStrideInVector, index_t ScaleScalarStrideInVector, index_t DstScalarStrideInVector, bool SrcResetCoordinateAfterRun, bool DstResetCoordinateAfterRun, index_t NumThreadScratch = 1>
__device__ constexpr ck::ThreadwiseTensorSliceTransfer_v3r1_dequant< SliceLengths, ScaleSliceLengths, SrcElementwiseOperation, ScaleElementwiseOperation, DstElementwiseOperation, DstInMemOp, SrcData, ScaleData, DstData, SrcDesc, ScaleDesc, DstDesc, SrcDimAccessOrder, DstDimAccessOrder, SrcVectorDim, DstVectorDim, SrcScalarPerVector, ScaleScalarPerVector, DstScalarPerVector, SrcScalarStrideInVector, ScaleScalarStrideInVector, DstScalarStrideInVector, SrcResetCoordinateAfterRun, DstResetCoordinateAfterRun, NumThreadScratch >::ThreadwiseTensorSliceTransfer_v3r1_dequant ( const SrcDesc & src_desc,
const Index & src_slice_origin,
const SrcElementwiseOperation & src_element_op,
const ScaleDesc & scale_desc,
const Index & scale_slice_origin,
const ScaleElementwiseOperation & scale_element_op,
const DstDesc & dst_desc,
const Index & dst_slice_origin,
const DstElementwiseOperation & dst_element_op )
inlineconstexpr

Member Function Documentation

◆ GetDstCoordinateResetStep()

template<typename SliceLengths, typename ScaleSliceLengths, typename SrcElementwiseOperation, typename ScaleElementwiseOperation, typename DstElementwiseOperation, InMemoryDataOperationEnum DstInMemOp, typename SrcData, typename ScaleData, typename DstData, typename SrcDesc, typename ScaleDesc, typename DstDesc, typename SrcDimAccessOrder, typename DstDimAccessOrder, index_t SrcVectorDim, index_t DstVectorDim, index_t SrcScalarPerVector, index_t ScaleScalarPerVector, index_t DstScalarPerVector, index_t SrcScalarStrideInVector, index_t ScaleScalarStrideInVector, index_t DstScalarStrideInVector, bool SrcResetCoordinateAfterRun, bool DstResetCoordinateAfterRun, index_t NumThreadScratch = 1>
__device__ constexpr auto ck::ThreadwiseTensorSliceTransfer_v3r1_dequant< SliceLengths, ScaleSliceLengths, SrcElementwiseOperation, ScaleElementwiseOperation, DstElementwiseOperation, DstInMemOp, SrcData, ScaleData, DstData, SrcDesc, ScaleDesc, DstDesc, SrcDimAccessOrder, DstDimAccessOrder, SrcVectorDim, DstVectorDim, SrcScalarPerVector, ScaleScalarPerVector, DstScalarPerVector, SrcScalarStrideInVector, ScaleScalarStrideInVector, DstScalarStrideInVector, SrcResetCoordinateAfterRun, DstResetCoordinateAfterRun, NumThreadScratch >::GetDstCoordinateResetStep ( )
inlinestaticconstexpr

◆ GetDstThreadScratchDescriptor()

template<typename SliceLengths, typename ScaleSliceLengths, typename SrcElementwiseOperation, typename ScaleElementwiseOperation, typename DstElementwiseOperation, InMemoryDataOperationEnum DstInMemOp, typename SrcData, typename ScaleData, typename DstData, typename SrcDesc, typename ScaleDesc, typename DstDesc, typename SrcDimAccessOrder, typename DstDimAccessOrder, index_t SrcVectorDim, index_t DstVectorDim, index_t SrcScalarPerVector, index_t ScaleScalarPerVector, index_t DstScalarPerVector, index_t SrcScalarStrideInVector, index_t ScaleScalarStrideInVector, index_t DstScalarStrideInVector, bool SrcResetCoordinateAfterRun, bool DstResetCoordinateAfterRun, index_t NumThreadScratch = 1>
__device__ constexpr auto ck::ThreadwiseTensorSliceTransfer_v3r1_dequant< SliceLengths, ScaleSliceLengths, SrcElementwiseOperation, ScaleElementwiseOperation, DstElementwiseOperation, DstInMemOp, SrcData, ScaleData, DstData, SrcDesc, ScaleDesc, DstDesc, SrcDimAccessOrder, DstDimAccessOrder, SrcVectorDim, DstVectorDim, SrcScalarPerVector, ScaleScalarPerVector, DstScalarPerVector, SrcScalarStrideInVector, ScaleScalarStrideInVector, DstScalarStrideInVector, SrcResetCoordinateAfterRun, DstResetCoordinateAfterRun, NumThreadScratch >::GetDstThreadScratchDescriptor ( )
inlinestaticconstexpr

◆ GetScaleThreadScratchDescriptor()

template<typename SliceLengths, typename ScaleSliceLengths, typename SrcElementwiseOperation, typename ScaleElementwiseOperation, typename DstElementwiseOperation, InMemoryDataOperationEnum DstInMemOp, typename SrcData, typename ScaleData, typename DstData, typename SrcDesc, typename ScaleDesc, typename DstDesc, typename SrcDimAccessOrder, typename DstDimAccessOrder, index_t SrcVectorDim, index_t DstVectorDim, index_t SrcScalarPerVector, index_t ScaleScalarPerVector, index_t DstScalarPerVector, index_t SrcScalarStrideInVector, index_t ScaleScalarStrideInVector, index_t DstScalarStrideInVector, bool SrcResetCoordinateAfterRun, bool DstResetCoordinateAfterRun, index_t NumThreadScratch = 1>
__device__ constexpr auto ck::ThreadwiseTensorSliceTransfer_v3r1_dequant< SliceLengths, ScaleSliceLengths, SrcElementwiseOperation, ScaleElementwiseOperation, DstElementwiseOperation, DstInMemOp, SrcData, ScaleData, DstData, SrcDesc, ScaleDesc, DstDesc, SrcDimAccessOrder, DstDimAccessOrder, SrcVectorDim, DstVectorDim, SrcScalarPerVector, ScaleScalarPerVector, DstScalarPerVector, SrcScalarStrideInVector, ScaleScalarStrideInVector, DstScalarStrideInVector, SrcResetCoordinateAfterRun, DstResetCoordinateAfterRun, NumThreadScratch >::GetScaleThreadScratchDescriptor ( )
inlinestaticconstexpr

◆ GetSrcCoordinateResetStep()

template<typename SliceLengths, typename ScaleSliceLengths, typename SrcElementwiseOperation, typename ScaleElementwiseOperation, typename DstElementwiseOperation, InMemoryDataOperationEnum DstInMemOp, typename SrcData, typename ScaleData, typename DstData, typename SrcDesc, typename ScaleDesc, typename DstDesc, typename SrcDimAccessOrder, typename DstDimAccessOrder, index_t SrcVectorDim, index_t DstVectorDim, index_t SrcScalarPerVector, index_t ScaleScalarPerVector, index_t DstScalarPerVector, index_t SrcScalarStrideInVector, index_t ScaleScalarStrideInVector, index_t DstScalarStrideInVector, bool SrcResetCoordinateAfterRun, bool DstResetCoordinateAfterRun, index_t NumThreadScratch = 1>
__device__ constexpr auto ck::ThreadwiseTensorSliceTransfer_v3r1_dequant< SliceLengths, ScaleSliceLengths, SrcElementwiseOperation, ScaleElementwiseOperation, DstElementwiseOperation, DstInMemOp, SrcData, ScaleData, DstData, SrcDesc, ScaleDesc, DstDesc, SrcDimAccessOrder, DstDimAccessOrder, SrcVectorDim, DstVectorDim, SrcScalarPerVector, ScaleScalarPerVector, DstScalarPerVector, SrcScalarStrideInVector, ScaleScalarStrideInVector, DstScalarStrideInVector, SrcResetCoordinateAfterRun, DstResetCoordinateAfterRun, NumThreadScratch >::GetSrcCoordinateResetStep ( )
inlinestaticconstexpr

◆ GetSrcThreadScratchDescriptor()

template<typename SliceLengths, typename ScaleSliceLengths, typename SrcElementwiseOperation, typename ScaleElementwiseOperation, typename DstElementwiseOperation, InMemoryDataOperationEnum DstInMemOp, typename SrcData, typename ScaleData, typename DstData, typename SrcDesc, typename ScaleDesc, typename DstDesc, typename SrcDimAccessOrder, typename DstDimAccessOrder, index_t SrcVectorDim, index_t DstVectorDim, index_t SrcScalarPerVector, index_t ScaleScalarPerVector, index_t DstScalarPerVector, index_t SrcScalarStrideInVector, index_t ScaleScalarStrideInVector, index_t DstScalarStrideInVector, bool SrcResetCoordinateAfterRun, bool DstResetCoordinateAfterRun, index_t NumThreadScratch = 1>
__device__ constexpr auto ck::ThreadwiseTensorSliceTransfer_v3r1_dequant< SliceLengths, ScaleSliceLengths, SrcElementwiseOperation, ScaleElementwiseOperation, DstElementwiseOperation, DstInMemOp, SrcData, ScaleData, DstData, SrcDesc, ScaleDesc, DstDesc, SrcDimAccessOrder, DstDimAccessOrder, SrcVectorDim, DstVectorDim, SrcScalarPerVector, ScaleScalarPerVector, DstScalarPerVector, SrcScalarStrideInVector, ScaleScalarStrideInVector, DstScalarStrideInVector, SrcResetCoordinateAfterRun, DstResetCoordinateAfterRun, NumThreadScratch >::GetSrcThreadScratchDescriptor ( )
inlinestaticconstexpr

◆ MoveDstSliceWindow()

template<typename SliceLengths, typename ScaleSliceLengths, typename SrcElementwiseOperation, typename ScaleElementwiseOperation, typename DstElementwiseOperation, InMemoryDataOperationEnum DstInMemOp, typename SrcData, typename ScaleData, typename DstData, typename SrcDesc, typename ScaleDesc, typename DstDesc, typename SrcDimAccessOrder, typename DstDimAccessOrder, index_t SrcVectorDim, index_t DstVectorDim, index_t SrcScalarPerVector, index_t ScaleScalarPerVector, index_t DstScalarPerVector, index_t SrcScalarStrideInVector, index_t ScaleScalarStrideInVector, index_t DstScalarStrideInVector, bool SrcResetCoordinateAfterRun, bool DstResetCoordinateAfterRun, index_t NumThreadScratch = 1>
__device__ void ck::ThreadwiseTensorSliceTransfer_v3r1_dequant< SliceLengths, ScaleSliceLengths, SrcElementwiseOperation, ScaleElementwiseOperation, DstElementwiseOperation, DstInMemOp, SrcData, ScaleData, DstData, SrcDesc, ScaleDesc, DstDesc, SrcDimAccessOrder, DstDimAccessOrder, SrcVectorDim, DstVectorDim, SrcScalarPerVector, ScaleScalarPerVector, DstScalarPerVector, SrcScalarStrideInVector, ScaleScalarStrideInVector, DstScalarStrideInVector, SrcResetCoordinateAfterRun, DstResetCoordinateAfterRun, NumThreadScratch >::MoveDstSliceWindow ( const DstDesc & dst_desc,
const Index & dst_slice_origin_step_idx )
inline

◆ MoveSrcSliceWindow()

template<typename SliceLengths, typename ScaleSliceLengths, typename SrcElementwiseOperation, typename ScaleElementwiseOperation, typename DstElementwiseOperation, InMemoryDataOperationEnum DstInMemOp, typename SrcData, typename ScaleData, typename DstData, typename SrcDesc, typename ScaleDesc, typename DstDesc, typename SrcDimAccessOrder, typename DstDimAccessOrder, index_t SrcVectorDim, index_t DstVectorDim, index_t SrcScalarPerVector, index_t ScaleScalarPerVector, index_t DstScalarPerVector, index_t SrcScalarStrideInVector, index_t ScaleScalarStrideInVector, index_t DstScalarStrideInVector, bool SrcResetCoordinateAfterRun, bool DstResetCoordinateAfterRun, index_t NumThreadScratch = 1>
__device__ void ck::ThreadwiseTensorSliceTransfer_v3r1_dequant< SliceLengths, ScaleSliceLengths, SrcElementwiseOperation, ScaleElementwiseOperation, DstElementwiseOperation, DstInMemOp, SrcData, ScaleData, DstData, SrcDesc, ScaleDesc, DstDesc, SrcDimAccessOrder, DstDimAccessOrder, SrcVectorDim, DstVectorDim, SrcScalarPerVector, ScaleScalarPerVector, DstScalarPerVector, SrcScalarStrideInVector, ScaleScalarStrideInVector, DstScalarStrideInVector, SrcResetCoordinateAfterRun, DstResetCoordinateAfterRun, NumThreadScratch >::MoveSrcSliceWindow ( const SrcDesc & src_desc,
const Index & src_slice_origin_step_idx )
inline

◆ RunRead()

template<typename SliceLengths, typename ScaleSliceLengths, typename SrcElementwiseOperation, typename ScaleElementwiseOperation, typename DstElementwiseOperation, InMemoryDataOperationEnum DstInMemOp, typename SrcData, typename ScaleData, typename DstData, typename SrcDesc, typename ScaleDesc, typename DstDesc, typename SrcDimAccessOrder, typename DstDimAccessOrder, index_t SrcVectorDim, index_t DstVectorDim, index_t SrcScalarPerVector, index_t ScaleScalarPerVector, index_t DstScalarPerVector, index_t SrcScalarStrideInVector, index_t ScaleScalarStrideInVector, index_t DstScalarStrideInVector, bool SrcResetCoordinateAfterRun, bool DstResetCoordinateAfterRun, index_t NumThreadScratch = 1>
template<typename SrcBuffer, index_t ThreadScratchId = 0>
__device__ void ck::ThreadwiseTensorSliceTransfer_v3r1_dequant< SliceLengths, ScaleSliceLengths, SrcElementwiseOperation, ScaleElementwiseOperation, DstElementwiseOperation, DstInMemOp, SrcData, ScaleData, DstData, SrcDesc, ScaleDesc, DstDesc, SrcDimAccessOrder, DstDimAccessOrder, SrcVectorDim, DstVectorDim, SrcScalarPerVector, ScaleScalarPerVector, DstScalarPerVector, SrcScalarStrideInVector, ScaleScalarStrideInVector, DstScalarStrideInVector, SrcResetCoordinateAfterRun, DstResetCoordinateAfterRun, NumThreadScratch >::RunRead ( const SrcDesc & src_desc,
const SrcBuffer & src_buf,
Number< ThreadScratchId > thread_scratch_id = Number<ThreadScratchId>{} )
inline

◆ RunScaleRead()

template<typename SliceLengths, typename ScaleSliceLengths, typename SrcElementwiseOperation, typename ScaleElementwiseOperation, typename DstElementwiseOperation, InMemoryDataOperationEnum DstInMemOp, typename SrcData, typename ScaleData, typename DstData, typename SrcDesc, typename ScaleDesc, typename DstDesc, typename SrcDimAccessOrder, typename DstDimAccessOrder, index_t SrcVectorDim, index_t DstVectorDim, index_t SrcScalarPerVector, index_t ScaleScalarPerVector, index_t DstScalarPerVector, index_t SrcScalarStrideInVector, index_t ScaleScalarStrideInVector, index_t DstScalarStrideInVector, bool SrcResetCoordinateAfterRun, bool DstResetCoordinateAfterRun, index_t NumThreadScratch = 1>
template<typename ScaleBuffer>
__device__ void ck::ThreadwiseTensorSliceTransfer_v3r1_dequant< SliceLengths, ScaleSliceLengths, SrcElementwiseOperation, ScaleElementwiseOperation, DstElementwiseOperation, DstInMemOp, SrcData, ScaleData, DstData, SrcDesc, ScaleDesc, DstDesc, SrcDimAccessOrder, DstDimAccessOrder, SrcVectorDim, DstVectorDim, SrcScalarPerVector, ScaleScalarPerVector, DstScalarPerVector, SrcScalarStrideInVector, ScaleScalarStrideInVector, DstScalarStrideInVector, SrcResetCoordinateAfterRun, DstResetCoordinateAfterRun, NumThreadScratch >::RunScaleRead ( const ScaleDesc & scale_desc,
const ScaleBuffer & scale_buf )
inline

◆ RunWrite()

template<typename SliceLengths, typename ScaleSliceLengths, typename SrcElementwiseOperation, typename ScaleElementwiseOperation, typename DstElementwiseOperation, InMemoryDataOperationEnum DstInMemOp, typename SrcData, typename ScaleData, typename DstData, typename SrcDesc, typename ScaleDesc, typename DstDesc, typename SrcDimAccessOrder, typename DstDimAccessOrder, index_t SrcVectorDim, index_t DstVectorDim, index_t SrcScalarPerVector, index_t ScaleScalarPerVector, index_t DstScalarPerVector, index_t SrcScalarStrideInVector, index_t ScaleScalarStrideInVector, index_t DstScalarStrideInVector, bool SrcResetCoordinateAfterRun, bool DstResetCoordinateAfterRun, index_t NumThreadScratch = 1>
template<typename DstBuffer, index_t ThreadScratchId = 0>
__device__ void ck::ThreadwiseTensorSliceTransfer_v3r1_dequant< SliceLengths, ScaleSliceLengths, SrcElementwiseOperation, ScaleElementwiseOperation, DstElementwiseOperation, DstInMemOp, SrcData, ScaleData, DstData, SrcDesc, ScaleDesc, DstDesc, SrcDimAccessOrder, DstDimAccessOrder, SrcVectorDim, DstVectorDim, SrcScalarPerVector, ScaleScalarPerVector, DstScalarPerVector, SrcScalarStrideInVector, ScaleScalarStrideInVector, DstScalarStrideInVector, SrcResetCoordinateAfterRun, DstResetCoordinateAfterRun, NumThreadScratch >::RunWrite ( const DstDesc & dst_desc,
DstBuffer & dst_buf,
Number< ThreadScratchId > thread_scratch_id = Number<ThreadScratchId>{} )
inline

◆ SetDstSliceOrigin()

template<typename SliceLengths, typename ScaleSliceLengths, typename SrcElementwiseOperation, typename ScaleElementwiseOperation, typename DstElementwiseOperation, InMemoryDataOperationEnum DstInMemOp, typename SrcData, typename ScaleData, typename DstData, typename SrcDesc, typename ScaleDesc, typename DstDesc, typename SrcDimAccessOrder, typename DstDimAccessOrder, index_t SrcVectorDim, index_t DstVectorDim, index_t SrcScalarPerVector, index_t ScaleScalarPerVector, index_t DstScalarPerVector, index_t SrcScalarStrideInVector, index_t ScaleScalarStrideInVector, index_t DstScalarStrideInVector, bool SrcResetCoordinateAfterRun, bool DstResetCoordinateAfterRun, index_t NumThreadScratch = 1>
__device__ void ck::ThreadwiseTensorSliceTransfer_v3r1_dequant< SliceLengths, ScaleSliceLengths, SrcElementwiseOperation, ScaleElementwiseOperation, DstElementwiseOperation, DstInMemOp, SrcData, ScaleData, DstData, SrcDesc, ScaleDesc, DstDesc, SrcDimAccessOrder, DstDimAccessOrder, SrcVectorDim, DstVectorDim, SrcScalarPerVector, ScaleScalarPerVector, DstScalarPerVector, SrcScalarStrideInVector, ScaleScalarStrideInVector, DstScalarStrideInVector, SrcResetCoordinateAfterRun, DstResetCoordinateAfterRun, NumThreadScratch >::SetDstSliceOrigin ( const DstDesc & dst_desc,
const Index & dst_slice_origin_idx )
inline

◆ SetScaleSliceOrigin()

template<typename SliceLengths, typename ScaleSliceLengths, typename SrcElementwiseOperation, typename ScaleElementwiseOperation, typename DstElementwiseOperation, InMemoryDataOperationEnum DstInMemOp, typename SrcData, typename ScaleData, typename DstData, typename SrcDesc, typename ScaleDesc, typename DstDesc, typename SrcDimAccessOrder, typename DstDimAccessOrder, index_t SrcVectorDim, index_t DstVectorDim, index_t SrcScalarPerVector, index_t ScaleScalarPerVector, index_t DstScalarPerVector, index_t SrcScalarStrideInVector, index_t ScaleScalarStrideInVector, index_t DstScalarStrideInVector, bool SrcResetCoordinateAfterRun, bool DstResetCoordinateAfterRun, index_t NumThreadScratch = 1>
__device__ void ck::ThreadwiseTensorSliceTransfer_v3r1_dequant< SliceLengths, ScaleSliceLengths, SrcElementwiseOperation, ScaleElementwiseOperation, DstElementwiseOperation, DstInMemOp, SrcData, ScaleData, DstData, SrcDesc, ScaleDesc, DstDesc, SrcDimAccessOrder, DstDimAccessOrder, SrcVectorDim, DstVectorDim, SrcScalarPerVector, ScaleScalarPerVector, DstScalarPerVector, SrcScalarStrideInVector, ScaleScalarStrideInVector, DstScalarStrideInVector, SrcResetCoordinateAfterRun, DstResetCoordinateAfterRun, NumThreadScratch >::SetScaleSliceOrigin ( const ScaleDesc & scale_desc,
const Index & scale_slice_origin_idx )
inline

◆ SetSrcSliceOrigin()

template<typename SliceLengths, typename ScaleSliceLengths, typename SrcElementwiseOperation, typename ScaleElementwiseOperation, typename DstElementwiseOperation, InMemoryDataOperationEnum DstInMemOp, typename SrcData, typename ScaleData, typename DstData, typename SrcDesc, typename ScaleDesc, typename DstDesc, typename SrcDimAccessOrder, typename DstDimAccessOrder, index_t SrcVectorDim, index_t DstVectorDim, index_t SrcScalarPerVector, index_t ScaleScalarPerVector, index_t DstScalarPerVector, index_t SrcScalarStrideInVector, index_t ScaleScalarStrideInVector, index_t DstScalarStrideInVector, bool SrcResetCoordinateAfterRun, bool DstResetCoordinateAfterRun, index_t NumThreadScratch = 1>
__device__ void ck::ThreadwiseTensorSliceTransfer_v3r1_dequant< SliceLengths, ScaleSliceLengths, SrcElementwiseOperation, ScaleElementwiseOperation, DstElementwiseOperation, DstInMemOp, SrcData, ScaleData, DstData, SrcDesc, ScaleDesc, DstDesc, SrcDimAccessOrder, DstDimAccessOrder, SrcVectorDim, DstVectorDim, SrcScalarPerVector, ScaleScalarPerVector, DstScalarPerVector, SrcScalarStrideInVector, ScaleScalarStrideInVector, DstScalarStrideInVector, SrcResetCoordinateAfterRun, DstResetCoordinateAfterRun, NumThreadScratch >::SetSrcSliceOrigin ( const SrcDesc & src_desc,
const Index & src_slice_origin_idx )
inline

◆ TransferDataFromSrcThreadScratchToDstThreadScratch()

template<typename SliceLengths, typename ScaleSliceLengths, typename SrcElementwiseOperation, typename ScaleElementwiseOperation, typename DstElementwiseOperation, InMemoryDataOperationEnum DstInMemOp, typename SrcData, typename ScaleData, typename DstData, typename SrcDesc, typename ScaleDesc, typename DstDesc, typename SrcDimAccessOrder, typename DstDimAccessOrder, index_t SrcVectorDim, index_t DstVectorDim, index_t SrcScalarPerVector, index_t ScaleScalarPerVector, index_t DstScalarPerVector, index_t SrcScalarStrideInVector, index_t ScaleScalarStrideInVector, index_t DstScalarStrideInVector, bool SrcResetCoordinateAfterRun, bool DstResetCoordinateAfterRun, index_t NumThreadScratch = 1>
template<index_t ThreadScratchId>
__device__ void ck::ThreadwiseTensorSliceTransfer_v3r1_dequant< SliceLengths, ScaleSliceLengths, SrcElementwiseOperation, ScaleElementwiseOperation, DstElementwiseOperation, DstInMemOp, SrcData, ScaleData, DstData, SrcDesc, ScaleDesc, DstDesc, SrcDimAccessOrder, DstDimAccessOrder, SrcVectorDim, DstVectorDim, SrcScalarPerVector, ScaleScalarPerVector, DstScalarPerVector, SrcScalarStrideInVector, ScaleScalarStrideInVector, DstScalarStrideInVector, SrcResetCoordinateAfterRun, DstResetCoordinateAfterRun, NumThreadScratch >::TransferDataFromSrcThreadScratchToDstThreadScratch ( Number< ThreadScratchId > thread_scratch_id)
inline

Member Data Documentation

◆ I0

template<typename SliceLengths, typename ScaleSliceLengths, typename SrcElementwiseOperation, typename ScaleElementwiseOperation, typename DstElementwiseOperation, InMemoryDataOperationEnum DstInMemOp, typename SrcData, typename ScaleData, typename DstData, typename SrcDesc, typename ScaleDesc, typename DstDesc, typename SrcDimAccessOrder, typename DstDimAccessOrder, index_t SrcVectorDim, index_t DstVectorDim, index_t SrcScalarPerVector, index_t ScaleScalarPerVector, index_t DstScalarPerVector, index_t SrcScalarStrideInVector, index_t ScaleScalarStrideInVector, index_t DstScalarStrideInVector, bool SrcResetCoordinateAfterRun, bool DstResetCoordinateAfterRun, index_t NumThreadScratch = 1>
auto ck::ThreadwiseTensorSliceTransfer_v3r1_dequant< SliceLengths, ScaleSliceLengths, SrcElementwiseOperation, ScaleElementwiseOperation, DstElementwiseOperation, DstInMemOp, SrcData, ScaleData, DstData, SrcDesc, ScaleDesc, DstDesc, SrcDimAccessOrder, DstDimAccessOrder, SrcVectorDim, DstVectorDim, SrcScalarPerVector, ScaleScalarPerVector, DstScalarPerVector, SrcScalarStrideInVector, ScaleScalarStrideInVector, DstScalarStrideInVector, SrcResetCoordinateAfterRun, DstResetCoordinateAfterRun, NumThreadScratch >::I0 = Number<0>{}
staticconstexpr

◆ nDim

template<typename SliceLengths, typename ScaleSliceLengths, typename SrcElementwiseOperation, typename ScaleElementwiseOperation, typename DstElementwiseOperation, InMemoryDataOperationEnum DstInMemOp, typename SrcData, typename ScaleData, typename DstData, typename SrcDesc, typename ScaleDesc, typename DstDesc, typename SrcDimAccessOrder, typename DstDimAccessOrder, index_t SrcVectorDim, index_t DstVectorDim, index_t SrcScalarPerVector, index_t ScaleScalarPerVector, index_t DstScalarPerVector, index_t SrcScalarStrideInVector, index_t ScaleScalarStrideInVector, index_t DstScalarStrideInVector, bool SrcResetCoordinateAfterRun, bool DstResetCoordinateAfterRun, index_t NumThreadScratch = 1>
index_t ck::ThreadwiseTensorSliceTransfer_v3r1_dequant< SliceLengths, ScaleSliceLengths, SrcElementwiseOperation, ScaleElementwiseOperation, DstElementwiseOperation, DstInMemOp, SrcData, ScaleData, DstData, SrcDesc, ScaleDesc, DstDesc, SrcDimAccessOrder, DstDimAccessOrder, SrcVectorDim, DstVectorDim, SrcScalarPerVector, ScaleScalarPerVector, DstScalarPerVector, SrcScalarStrideInVector, ScaleScalarStrideInVector, DstScalarStrideInVector, SrcResetCoordinateAfterRun, DstResetCoordinateAfterRun, NumThreadScratch >::nDim = SliceLengths::Size()
staticconstexpr

The documentation for this struct was generated from the following file: