device_image_to_column_impl.hpp Source File#
device_image_to_column_impl.hpp
Go to the documentation of this file.
__device__ void copy(const SrcTensorType &src_tensor, DstTensorType &dst_tensor)
Perform optimized copy between two tensors partitions (threadwise copy). Tensors must have the same s...
Definition copy.hpp:36
float launch_and_time_kernel(const StreamConfig &stream_config, F kernel, dim3 grid_dim, dim3 block_dim, std::size_t lds_byte, Args... args)
Definition host_utility/kernel_launch.hpp:14
Definition convolution_backward_data_specialization.hpp:8
Definition convolution_backward_data_specialization.hpp:7
Definition ck.hpp:268
__host__ __device__ constexpr auto make_naive_tensor_descriptor(const Tuple< Lengths... > &lengths, const Tuple< Strides... > &strides)
Definition tensor_descriptor_helper.hpp:49
auto accumulate_n(ForwardIterator first, Size count, T init, BinaryOperation op) -> decltype(std::accumulate(first, std::next(first, count), init, op))
Definition library/utility/numeric.hpp:11
__host__ __device__ constexpr auto make_tuple(Xs &&... xs)
Definition utility/tuple.hpp:211
__global__ void kernel_tensor_rearrange(const InputGridDesc in_grid_desc, const InputDataType *__restrict__ p_in_global, const OutputGridDesc out_grid_desc, OutputDataType *__restrict__ p_out_global, const index_t batch_count, const Block2ETileMap block_2_tile_map, const ComputePtrOffsetOfStridedBatch compute_ptr_offset_of_batch)
Definition gridwise_tensor_rearrange.hpp:30
Definition ck/stream_config.hpp:10
Definition block_to_ctile_map.hpp:261
Definition gridwise_tensor_rearrange.hpp:71
static __host__ constexpr bool CheckValidity(const InputGridDesc &in_grid_desc, const OutputGridDesc &out_grid_desc)
Definition gridwise_tensor_rearrange.hpp:137
Definition tensor_operation/operator_transform/transform_conv_fwd_to_gemm.hpp:25
Definition device_base.hpp:197
BaseArgument()=default
BaseInvoker()=default
Convolution Tensor Rearrange.
Definition device_conv_tensor_rearrange.hpp:36
Definition device_image_to_column_impl.hpp:161
InputGridDesc in_grid_desc_m_k_
Definition device_image_to_column_impl.hpp:225
const std::array< index_t, NDimSpatial+3 > & image_g_n_c_wis_strides_
Definition device_image_to_column_impl.hpp:219
const std::array< index_t, NDimSpatial > & input_right_pads_
Definition device_image_to_column_impl.hpp:223
const std::array< index_t, NDimSpatial > & conv_filter_strides_
Definition device_image_to_column_impl.hpp:220
const InputDataType * p_in_
Definition device_image_to_column_impl.hpp:216
const std::array< index_t, NDimSpatial > & conv_filter_dilations_
Definition device_image_to_column_impl.hpp:221
const std::array< index_t, NDimSpatial > & input_left_pads_
Definition device_image_to_column_impl.hpp:222
const ck::index_t C_
Definition device_image_to_column_impl.hpp:213
Argument(const void *p_in, void *p_out, const ck::index_t G, const ck::index_t N, const ck::index_t C, const std::array< index_t, NDimSpatial > &input_spatial_lengths, const std::array< index_t, NDimSpatial > &filter_spatial_lengths, const std::array< index_t, NDimSpatial > &output_spatial_lengths, const std::array< index_t, NDimSpatial+3 > &image_g_n_c_wis_strides, const std::array< index_t, 3 > &gemm_g_m_k_strides, const std::array< index_t, NDimSpatial > &conv_filter_strides, const std::array< index_t, NDimSpatial > &conv_filter_dilations, const std::array< index_t, NDimSpatial > &input_left_pads, const std::array< index_t, NDimSpatial > &input_right_pads)
Definition device_image_to_column_impl.hpp:162
ComputePtrOffsetOfStridedBatch compute_ptr_offset_of_batch_
Definition device_image_to_column_impl.hpp:228
OutputDataType * p_out_
Definition device_image_to_column_impl.hpp:217
const ck::index_t X_
Definition device_image_to_column_impl.hpp:214
OutputGridDesc out_grid_desc_m_k_
Definition device_image_to_column_impl.hpp:226
void Print() const
Definition device_image_to_column_impl.hpp:206
const ck::index_t G_
Definition device_image_to_column_impl.hpp:212
Definition device_image_to_column_impl.hpp:232
float Run(const Argument &arg, const StreamConfig &stream_config=StreamConfig{})
Definition device_image_to_column_impl.hpp:233
float Run(const BaseArgument *p_arg, const StreamConfig &stream_config=StreamConfig{}) override
Definition device_image_to_column_impl.hpp:268
Definition device_image_to_column_impl.hpp:47
std::unique_ptr< BaseInvoker > MakeInvokerPointer() override
Definition device_image_to_column_impl.hpp:383
bool IsSupportedArgument(const Argument &arg)
Definition device_image_to_column_impl.hpp:275
TransformConvFwdToGemm< NDimSpatial, ConvolutionForwardSpecialization::Default > ConvToGemmFwdTransformer
Definition device_image_to_column_impl.hpp:61
static auto MakeInvoker()
Definition device_image_to_column_impl.hpp:349
static constexpr auto I0
Definition device_image_to_column_impl.hpp:57
std::string GetTypeString() const override
Definition device_image_to_column_impl.hpp:388
static auto MakeArgument(const void *p_in, void *p_out, const ck::index_t G, const ck::index_t N, const ck::index_t C, const std::array< index_t, NDimSpatial > &input_spatial_lengths, const std::array< index_t, NDimSpatial > &filter_spatial_lengths, const std::array< index_t, NDimSpatial > &output_spatial_lengths, const std::array< index_t, NDimSpatial+3 > &image_g_n_c_wis_strides, const std::array< index_t, 3 > &gemm_g_m_k_strides, const std::array< index_t, NDimSpatial > &conv_filter_strides, const std::array< index_t, NDimSpatial > &conv_filter_dilations, const std::array< index_t, NDimSpatial > &input_left_pads, const std::array< index_t, NDimSpatial > &input_right_pads)
Definition device_image_to_column_impl.hpp:318
static auto MakeOutDescriptor_M_K(const ck::index_t N, const ck::index_t C, const std::array< index_t, NDimSpatial > &filter_spatial_lengths, const std::array< index_t, NDimSpatial > &output_spatial_lengths, const std::array< index_t, 3 > &gemm_g_m_k_strides)
Definition device_image_to_column_impl.hpp:121
GridwiseTensorRearrange< InputGridDesc, InputDataType, OutputGridDesc, OutputDataType, BlockSize, MPerBlock, KPerBlock, ThreadClusterLengths, ScalarPerVector, InMemoryDataOperationEnum::Set, Block2ETileMap, ComputePtrOffsetOfStridedBatch<> > GridwiseTensorRearrangeKernel
Definition device_image_to_column_impl.hpp:147
static auto MakeInputDescriptor_M_K(const ck::index_t N, const ck::index_t C, const std::array< index_t, NDimSpatial > &input_spatial_lengths, const std::array< index_t, NDimSpatial > &filter_spatial_lengths, const std::array< index_t, NDimSpatial > &output_spatial_lengths, const std::array< index_t, NDimSpatial+3 > &image_g_n_c_wis_strides, const std::array< index_t, NDimSpatial > &conv_filter_strides, const std::array< index_t, NDimSpatial > &conv_filter_dilations, const std::array< index_t, NDimSpatial > &input_left_pads, const std::array< index_t, NDimSpatial > &input_right_pads)
Definition device_image_to_column_impl.hpp:70
remove_cvref_t< decltype(MakeInputDescriptor_M_K(1, 1, {}, {}, {}, {}, {}, {}, {}, {}))> InputGridDesc
Definition device_image_to_column_impl.hpp:139
std::unique_ptr< BaseArgument > MakeArgumentPointer(const void *p_in, void *p_out, const ck::index_t G, const ck::index_t N, const ck::index_t C, const std::array< index_t, NDimSpatial > &input_spatial_lengths, const std::array< index_t, NDimSpatial > &filter_spatial_lengths, const std::array< index_t, NDimSpatial > &output_spatial_lengths, const std::array< index_t, NDimSpatial+3 > &image_g_n_c_wis_strides, const std::array< index_t, 3 > &gemm_g_m_k_strides, const std::array< index_t, NDimSpatial > &conv_filter_strides, const std::array< index_t, NDimSpatial > &conv_filter_dilations, const std::array< index_t, NDimSpatial > &input_left_pads, const std::array< index_t, NDimSpatial > &input_right_pads) override
Make argument pointer for image to column.
Definition device_image_to_column_impl.hpp:352
bool IsSupportedArgument(const BaseArgument *p_arg) override
Definition device_image_to_column_impl.hpp:313
remove_cvref_t< decltype(MakeOutDescriptor_M_K(1, 1, {}, {}, {}))> OutputGridDesc
Definition device_image_to_column_impl.hpp:141
static constexpr auto matrix_padder
Definition device_image_to_column_impl.hpp:64
remove_cvref_t< decltype(BlockToCTileMap_M00_N0_M01Adapt< MPerBlock, KPerBlock, OutputGridDesc >( OutputGridDesc{}))> Block2ETileMap
Definition device_image_to_column_impl.hpp:143
static constexpr bool is_GNSpatialC
Definition device_image_to_column_impl.hpp:52
static constexpr auto I2
Definition device_image_to_column_impl.hpp:59
static constexpr auto I1
Definition device_image_to_column_impl.hpp:58
static constexpr bool is_NSpatialGC
Definition device_image_to_column_impl.hpp:48
Definition matrix_padder.hpp:180