flush_icache.hpp Source File# Composable Kernel: flush_icache.hpp Source File includeck_tilehost tile/host/flush_icache.hpp Go to the documentation of this file. 1// SPDX-License-Identifier: MIT 2// Copyright (c) 2025, Advanced Micro Devices, Inc. All rights reserved. 3 4#pragma once 5 6#include <hip/hip_runtime.h> 7 8namespace ck_tile { 9// GPU kernel to invalidate instruction cache for accurate benchmarking. 10// s_icache_inv: Asynchronously invalidates the L1 instruction cache on this compute unit, 11// forcing subsequent kernel runs to fetch instructions from HBM instead of cache. 12// 16x s_nop: Wait cycles (~16 cycles) to ensure cache invalidation completes before kernel 13// exits. Without these NOPs, the flush may not finish, leading to inconsistent 14// timing measurements where some instructions remain cached. 15static __global__ void flush_cache() 16{ 17 asm __volatile__("s_icache_inv \n\t" 18 "s_nop 0 \n\t" 19 "s_nop 0 \n\t" 20 "s_nop 0 \n\t" 21 "s_nop 0 \n\t" 22 "s_nop 0 \n\t" 23 "s_nop 0 \n\t" 24 "s_nop 0 \n\t" 25 "s_nop 0 \n\t" 26 "s_nop 0 \n\t" 27 "s_nop 0 \n\t" 28 "s_nop 0 \n\t" 29 "s_nop 0 \n\t" 30 "s_nop 0 \n\t" 31 "s_nop 0 \n\t" 32 "s_nop 0 \n\t" 33 "s_nop 0 \n\t" :: 34 :); 35} 36} // namespace ck_tile ck_tileDefinition tile/core/algorithm/cluster_descriptor.hpp:13