json_dump.hpp Source File

json_dump.hpp Source File#

Composable Kernel: json_dump.hpp Source File
json_dump.hpp
Go to the documentation of this file.
1// Copyright © Advanced Micro Devices, Inc. or its affiliates.
2// SPDX-License-Identifier: MIT
3
4#ifdef CK_ENABLE_JSON_DUMP
5#pragma GCC diagnostic push
6#pragma GCC diagnostic ignored "-Wzero-as-null-pointer-constant"
7#include "rapidjson/writer.h"
10#include "rapidjson/rapidjson.h"
11#pragma GCC diagnostic pop
12
13#define START_JSON_DUMP_FILE(file_name) \
14 std::string file_str(file_name); \
15 std::ofstream file(file_str); \
16 if(!file.is_open()) \
17 { \
18 throw std::runtime_error("Could not open file: " + std::string(file_name)); \
19 } \
20 rapidjson::StringBuffer s; \
21 rapidjson::Writer<rapidjson::StringBuffer> writer(s); \
22 writer.StartObject();
23
24#define END_JSON_DUMP_FILE() \
25 writer.EndObject(); \
26 file << s.GetString(); \
27 file.close(); \
28 std::cout << "Results written to " << file_str << " successfully" << std::endl;
29
30#define ADD_KEY_VALUE(key, value) add_key_value_pair(writer, key, value);
31#define ADD_PERF_TO_JSON(_time, tflops, gbytes) add_perf_to_json(writer, _time, tflops, gbytes);
32
33template <typename T>
34void add_key_value_pair(rapidjson::Writer<rapidjson::StringBuffer>& writer,
35 const char* key,
36 T value)
37{
38 writer.Key(key);
39 if constexpr(std::is_same<T, const char*>::value)
40 {
41 writer.String(value, static_cast<rapidjson::SizeType>(std::strlen(value)));
42 }
43 else if constexpr(std::is_same<T, std::string>::value)
44 {
45 writer.String(value.c_str(), static_cast<rapidjson::SizeType>(value.length()));
46 }
47 else if constexpr(std::is_floating_point<T>::value)
48 {
49 writer.Double(static_cast<double>(value));
50 }
51 else if constexpr(std::is_integral<T>::value)
52 {
53 writer.Int64(static_cast<int64_t>(value));
54 }
55 else
56 {
57 static_assert(std::is_same<T, const char*>::value || std::is_floating_point<T>::value ||
58 std::is_integral<T>::value,
59 "Unsupported type for JSON serialization");
60 }
61}
62
63static void add_perf_to_json(rapidjson::Writer<rapidjson::StringBuffer>& writer,
64 float time,
65 float tflops,
66 float gbytes)
67{
68 std::string roster("perf");
69 writer.String(roster.c_str(), static_cast<rapidjson::SizeType>(roster.length()));
70
71 writer.StartArray();
72 writer.StartObject();
73
74 add_key_value_pair(writer, "time", time);
75 add_key_value_pair(writer, "tflops", tflops);
76 add_key_value_pair(writer, "gbytes", gbytes);
77
78 writer.EndObject();
79 writer.EndArray();
80}
81
82#else
83#pragma GCC diagnostic push
84#pragma GCC diagnostic ignored "-Wunused-parameter"
85#pragma GCC diagnostic ignored "-Wunused-local-typedef"
86#define START_JSON_DUMP_FILE(file_name)
87#define END_JSON_DUMP_FILE() \
88 std::cout << "JSON dump disabled, To enable, set CK_ENABLE_JSON_DUMP cmake option" << std::endl;
89
90#define ADD_KEY_VALUE(key, value)
91#define ADD_PERF_TO_JSON(_time, tflops, gbytes)
92#endif
93
94// Helper traits to check for static member existence
95template <typename T, typename = void>
96struct has_warp_tile_members : std::false_type
97{
98};
99
100template <typename T>
102 T,
103 std::void_t<decltype(T::M_Warp_Tile), decltype(T::N_Warp_Tile), decltype(T::K_Warp_Tile)>>
104 : std::true_type
105{
106};
107
108template <typename ALayout,
109 typename BLayout,
110 typename CLayout,
111 typename ADataType,
112 typename BDataType,
113 typename CDataType,
114 typename GemmConfig,
115 template <typename>
116 typename DTypeTraits>
117void dump_gemm_json_results(const std::string& json_filename,
118 int M,
119 int N,
120 int K,
121 int stride_A,
122 int stride_B,
123 int stride_C,
124 bool persistent,
125 bool pass,
126 float ave_time,
127 float tflops,
128 float gb_per_sec,
129 const std::string& kernel_name = "gemm_basic")
130{
131 START_JSON_DUMP_FILE(json_filename);
132 ADD_KEY_VALUE("name", kernel_name);
133 ADD_KEY_VALUE("M", M);
134 ADD_KEY_VALUE("N", N);
135 ADD_KEY_VALUE("K", K);
136 ADD_KEY_VALUE("stride_A", stride_A);
137 ADD_KEY_VALUE("stride_B", stride_B);
138 ADD_KEY_VALUE("stride_C", stride_C);
139 ADD_KEY_VALUE("A_layout", ALayout::name);
140 ADD_KEY_VALUE("B_layout", BLayout::name);
141 ADD_KEY_VALUE("C_layout", CLayout::name);
142 using TraitsADataType = DTypeTraits<ADataType>;
143 using TraitsBDataType = DTypeTraits<BDataType>;
144 using TraitsCDataType = DTypeTraits<CDataType>;
145 ADD_KEY_VALUE("A_type", TraitsADataType::name);
146 ADD_KEY_VALUE("B_type", TraitsBDataType::name);
147 ADD_KEY_VALUE("C_type", TraitsCDataType::name);
148 ADD_KEY_VALUE("structured_sparsity", GemmConfig::UseStructuredSparsity ? "on" : "off");
149
151 {
152 ADD_KEY_VALUE("warp_tile",
153 std::to_string(GemmConfig::M_Warp_Tile) + "x" +
154 std::to_string(GemmConfig::N_Warp_Tile) + "x" +
155 std::to_string(GemmConfig::K_Warp_Tile));
156 }
157 ADD_KEY_VALUE("persistent", persistent ? "on" : "off");
158 ADD_KEY_VALUE("verification", pass ? "pass" : "fail");
159 ADD_PERF_TO_JSON(ave_time, tflops, gb_per_sec);
161}
162
163void dump_batched_gemm_json_results(const std::string& json_filename,
164 const std::string& op_name,
165 int M,
166 int N,
167 int K,
168 int stride_A,
169 int stride_B,
170 int stride_C,
171 int batch_stride_A,
172 int batch_stride_B,
173 int batch_stride_C,
174 int batch_count,
175 bool pass,
176 float ave_time,
177 float tflops,
178 float gb_per_sec,
179 const std::string& kernel_name = "batched_gemm_basic")
180{
181 START_JSON_DUMP_FILE(json_filename);
182 ADD_KEY_VALUE("name", kernel_name);
183 ADD_KEY_VALUE("op_name", op_name);
184 ADD_KEY_VALUE("M", M);
185 ADD_KEY_VALUE("N", N);
186 ADD_KEY_VALUE("K", K);
187 ADD_KEY_VALUE("stride_A", stride_A);
188 ADD_KEY_VALUE("stride_B", stride_B);
189 ADD_KEY_VALUE("stride_C", stride_C);
190 ADD_KEY_VALUE("batch_stride_A", batch_stride_A);
191 ADD_KEY_VALUE("batch_stride_B", batch_stride_B);
192 ADD_KEY_VALUE("batch_stride_C", batch_stride_C);
193 ADD_KEY_VALUE("batch_count", batch_count);
194 ADD_KEY_VALUE("verification", pass ? "pass" : "fail");
195 ADD_PERF_TO_JSON(ave_time, tflops, gb_per_sec)
197}
198
199template <typename ALayout, typename BLayout, typename CLayout>
200void dump_grouped_gemm_json_results(const std::string& json_filename,
201 const std::string& op_name,
202 int group_count,
203 bool pass,
204 float ave_time,
205 float tflops,
206 float gb_per_sec,
207 const std::string& kernel_name = "grouped_gemm")
208{
209 START_JSON_DUMP_FILE(json_filename);
210 ADD_KEY_VALUE("name", kernel_name);
211 ADD_KEY_VALUE("op_name", op_name);
212 ADD_KEY_VALUE("group_count", group_count);
213 ADD_KEY_VALUE("A_layout", ALayout::name);
214 ADD_KEY_VALUE("B_layout", BLayout::name);
215 ADD_KEY_VALUE("C_layout", CLayout::name);
216 ADD_KEY_VALUE("verification", pass ? "pass" : "fail");
217 ADD_PERF_TO_JSON(ave_time, tflops, gb_per_sec)
219}
220
221void dump_flatmm_json_results(const std::string& json_filename,
222 const std::string& datatype,
223 int M,
224 int N,
225 int K,
226 int stride_A,
227 int stride_B,
228 int stride_C,
229 int kbatch,
230 bool pass,
231 float ave_time,
232 float tflops,
233 float gb_per_sec,
234 const std::string& kernel_name = "flatmm_basic")
235{
236 START_JSON_DUMP_FILE(json_filename);
237 ADD_KEY_VALUE("name", kernel_name);
238 ADD_KEY_VALUE("DataType", datatype);
239 ADD_KEY_VALUE("M", M);
240 ADD_KEY_VALUE("N", N);
241 ADD_KEY_VALUE("K", K);
242 ADD_KEY_VALUE("StrideA", stride_A);
243 ADD_KEY_VALUE("StrideB", stride_B);
244 ADD_KEY_VALUE("StrideC", stride_C);
245 ADD_KEY_VALUE("kbatch", kbatch);
246 ADD_KEY_VALUE("verification", pass ? "pass" : "fail");
247 ADD_PERF_TO_JSON(ave_time, tflops, gb_per_sec)
249}
250
251void dump_gemm_multi_d_fp16_json_results(const std::string& json_filename,
252 const std::string& op_name,
253 int M,
254 int N,
255 int K,
256 int StrideA,
257 int StrideB,
258 int StrideD0,
259 int StrideD1,
260 int StrideE,
261 bool pass,
262 float ave_time,
263 float tflops,
264 float gb_per_sec,
265 const std::string& kernel_name = "gemm_multi_d_fp16")
266{
267 START_JSON_DUMP_FILE(json_filename);
268 ADD_KEY_VALUE("name", kernel_name);
269 ADD_KEY_VALUE("op_name", op_name);
270 ADD_KEY_VALUE("M", M);
271 ADD_KEY_VALUE("N", N);
272 ADD_KEY_VALUE("K", K);
273 ADD_KEY_VALUE("StrideA", StrideA);
274 ADD_KEY_VALUE("StrideB", StrideB);
275 ADD_KEY_VALUE("StrideD0", StrideD0);
276 ADD_KEY_VALUE("StrideD1", StrideD1);
277 ADD_KEY_VALUE("StrideE", StrideE);
278 ADD_KEY_VALUE("verification", pass ? "pass" : "fail");
279 ADD_PERF_TO_JSON(ave_time, tflops, gb_per_sec)
281}
282
283void dump_elementwise_json_results(const std::string& json_filename,
284 const std::string& prec,
285 int grid_size,
286 int block_size,
287 float ave_time,
288 float tflops,
289 float gb_per_sec,
290 const std::string& kernel_name = "elementwise")
291{
292 START_JSON_DUMP_FILE(json_filename);
293 ADD_KEY_VALUE("name", kernel_name);
294 ADD_KEY_VALUE("prec", prec);
295 ADD_KEY_VALUE("grid_size", grid_size);
296 ADD_KEY_VALUE("block_size", block_size);
297 ADD_PERF_TO_JSON(ave_time, tflops, gb_per_sec)
299}
300
301void dump_layernorm2d_fwd_json_results(const std::string& json_filename,
302 const std::string& prec_i,
303 const std::string& prec_o,
304 const std::string& prec_sm,
305 const std::string& prec_sy,
306 int m,
307 int n,
308 int x_stride,
309 int xr_stride,
310 int y_stride,
311 int yr_stride,
312 bool pass,
313 float ave_time,
314 float tflops,
315 float gb_per_sec,
316 const std::string& kernel_name = "layernorm2d_fwd")
317{
318 START_JSON_DUMP_FILE(json_filename);
319 ADD_KEY_VALUE("name", kernel_name);
320 ADD_KEY_VALUE("prec_i", prec_i);
321 ADD_KEY_VALUE("prec_o", prec_o);
322 ADD_KEY_VALUE("prec_sm", prec_sm);
323 ADD_KEY_VALUE("prec_sy", prec_sy);
324 ADD_KEY_VALUE("m", m);
325 ADD_KEY_VALUE("n", n);
326 ADD_KEY_VALUE("x_stride", x_stride);
327 ADD_KEY_VALUE("xr_stride", xr_stride);
328 ADD_KEY_VALUE("y_stride", y_stride);
329 ADD_KEY_VALUE("yr_stride", yr_stride);
330 ADD_KEY_VALUE("verification", pass ? "pass" : "fail");
331 ADD_PERF_TO_JSON(ave_time, tflops, gb_per_sec)
333}
334
335template <typename DataType, template <typename> typename DTypeTraits>
336void dump_reduce_json_results(const std::string& json_filename,
337 int N,
338 int C,
339 int H,
340 int W,
341 bool pass,
342 float ave_time,
343 float tflops,
344 float gb_per_sec,
345 const std::string& kernel_name = "reduce")
346{
347 START_JSON_DUMP_FILE(json_filename);
348 ADD_KEY_VALUE("name", kernel_name);
349 using Traits = DTypeTraits<DataType>;
350 ADD_KEY_VALUE("data_type", Traits::name);
351 ADD_KEY_VALUE("N", N);
352 ADD_KEY_VALUE("C", C);
353 ADD_KEY_VALUE("H", H);
354 ADD_KEY_VALUE("W", W);
355 ADD_KEY_VALUE("verification", pass ? "pass" : "fail");
356 ADD_PERF_TO_JSON(ave_time, tflops, gb_per_sec)
358}
359
360void dump_permute_json_results(const std::string& json_filename,
361 const std::string& data_type,
362 bool pass,
363 float ave_time,
364 float tflop,
365 float gb_per_sec,
366 const std::string& kernel_name = "permute")
367{
368 START_JSON_DUMP_FILE(json_filename);
369 ADD_KEY_VALUE("name", kernel_name);
370 ADD_KEY_VALUE("data_type", data_type);
371 ADD_KEY_VALUE("verification", pass ? "pass" : "fail");
372 ADD_PERF_TO_JSON(ave_time, tflop, gb_per_sec)
374}
375
376void dump_topk_softmax_json(const std::string& json_filename,
377 const std::string& input_prec,
378 const std::string& weight_prec,
379 int tokens,
380 int experts,
381 int topk,
382 int stride_input,
383 int stride_output,
384 float ave_time,
385 float tflop,
386 float gb_per_sec,
387 bool pass,
388 const std::string& kernel_name = "topk_softmax")
389{
390 START_JSON_DUMP_FILE(json_filename);
391 ADD_KEY_VALUE("name", kernel_name);
392 ADD_KEY_VALUE("input_prec", input_prec);
393 ADD_KEY_VALUE("weight_prec", weight_prec);
394 ADD_KEY_VALUE("tokens", tokens);
395 ADD_KEY_VALUE("experts", experts);
396 ADD_KEY_VALUE("topk", topk);
397 ADD_KEY_VALUE("stride_input", stride_input);
398 ADD_KEY_VALUE("stride_output", stride_output);
399 ADD_KEY_VALUE("verification", pass ? "pass" : "fail");
400 ADD_PERF_TO_JSON(ave_time, tflop, gb_per_sec);
402}
403
404void dump_rmsnorm2d_fwd_json(const std::string& json_filename,
405 const std::string& prec_str,
406 int m,
407 int n,
408 int x_stride,
409 int xr_stride,
410 int y_stride,
411 int yr_stride,
412 int use_model_sensitive_rmsnorm,
413 float ave_time,
414 float tflops,
415 float gb_per_sec,
416 bool pass,
417 const std::string& kernel_name = "rmsnorm2d_fwd")
418{
419 START_JSON_DUMP_FILE(json_filename);
420 ADD_KEY_VALUE("name", kernel_name);
421 ADD_KEY_VALUE("prec", prec_str);
422 ADD_KEY_VALUE("m", m);
423 ADD_KEY_VALUE("n", n);
424 ADD_KEY_VALUE("x_stride", x_stride);
425 ADD_KEY_VALUE("xr_stride", xr_stride);
426 ADD_KEY_VALUE("y_stride", y_stride);
427 ADD_KEY_VALUE("yr_stride", yr_stride);
428 ADD_KEY_VALUE("use_model_sensitive_rmsnorm", use_model_sensitive_rmsnorm);
429 ADD_KEY_VALUE("verification", pass ? "pass" : "fail");
430 ADD_PERF_TO_JSON(ave_time, tflops, gb_per_sec);
432}
433
435 const std::string& json_filename,
436 const std::string& input_data_type,
437 const std::string& quantized_data_type,
438 int m,
439 int n,
440 int stride,
441 float epsilon,
442 float ave_time,
443 float tflops,
444 float gb_per_sec,
445 bool pass,
446 const std::string& kernel_name = "add_rmsnorm2d_rdquant_fwd")
447{
448 START_JSON_DUMP_FILE(json_filename);
449 ADD_KEY_VALUE("name", kernel_name);
450 ADD_KEY_VALUE("input_data_type", input_data_type);
451 ADD_KEY_VALUE("quantized_data_type", quantized_data_type);
452 ADD_KEY_VALUE("m", m);
453 ADD_KEY_VALUE("n", n);
454 ADD_KEY_VALUE("stride", stride);
455 ADD_KEY_VALUE("epsilon", epsilon);
456 ADD_KEY_VALUE("verification", pass ? "pass" : "fail");
457 ADD_PERF_TO_JSON(ave_time, tflops, gb_per_sec);
459}
460
461void dump_smoothquant_json(const std::string& json_filename,
462 const std::string& prec_str,
463 int m,
464 int n,
465 int x_stride,
466 int y_stride,
467 float ave_time,
468 float tflops,
469 float gb_per_sec,
470 bool pass,
471 const std::string& kernel_name = "smoothquant")
472{
473 START_JSON_DUMP_FILE(json_filename);
474 ADD_KEY_VALUE("name", kernel_name);
475 ADD_KEY_VALUE("prec", prec_str);
476 ADD_KEY_VALUE("m", m);
477 ADD_KEY_VALUE("n", n);
478 ADD_KEY_VALUE("x_stride", x_stride);
479 ADD_KEY_VALUE("y_stride", y_stride);
480 ADD_KEY_VALUE("verification", pass ? "pass" : "fail");
481 ADD_PERF_TO_JSON(ave_time, tflops, gb_per_sec);
483}
484
485void dump_moe_sorting_json(const std::string& json_filename,
486 const std::string& index_prec,
487 const std::string& weight_prec,
488 const std::string& workspace_size,
489 int dispatch_policy,
490 int tokens,
491 int num_experts,
492 int topk,
493 float ave_time,
494 float tflops,
495 float gb_per_sec,
496 bool pass,
497 const std::string& kernel_name = "moe_sorting")
498{
499 START_JSON_DUMP_FILE(json_filename);
500 ADD_KEY_VALUE("name", kernel_name);
501 ADD_KEY_VALUE("index_prec", index_prec);
502 ADD_KEY_VALUE("weight_prec", weight_prec);
503 ADD_KEY_VALUE("workspace_size", workspace_size);
504 ADD_KEY_VALUE("dispatch_policy", dispatch_policy);
505 ADD_KEY_VALUE("tokens", tokens);
506 ADD_KEY_VALUE("num_experts", num_experts);
507 ADD_KEY_VALUE("topk", topk);
508 ADD_KEY_VALUE("verification", pass ? "pass" : "fail");
509 ADD_PERF_TO_JSON(ave_time, tflops, gb_per_sec)
511}
512
513void dump_batched_transpose_json(const std::string& json_filename,
514 int N,
515 int C,
516 int H,
517 int W,
518 const std::string& layout_in,
519 const std::string& layout_out,
520 const std::string& prec,
521 float ave_time,
522 float tflops,
523 float gb_per_sec,
524 bool pass,
525 const std::string& kernel_name = "batched_transpose")
526{
527 START_JSON_DUMP_FILE(json_filename);
528 ADD_KEY_VALUE("name", kernel_name);
529 ADD_KEY_VALUE("N", N);
530 ADD_KEY_VALUE("C", C);
531 ADD_KEY_VALUE("H", H);
532 ADD_KEY_VALUE("W", W);
533 ADD_KEY_VALUE("LayoutIn", layout_in);
534 ADD_KEY_VALUE("LayoutOut", layout_out);
535 ADD_KEY_VALUE("Precision", prec);
536 ADD_KEY_VALUE("verification", pass ? "pass" : "fail");
537 ADD_PERF_TO_JSON(ave_time, tflops, gb_per_sec)
539}
540
541void dump_moe_smoothquant_json(const std::string& json_filename,
542 const std::string& prec_i,
543 const std::string& prec_o,
544 int tokens,
545 int hidden_size,
546 int stride,
547 int experts,
548 int topk,
549 bool pass,
550 float ave_time,
551 float tflops,
552 float gb_per_sec,
553 const std::string& kernel_name = "moe_smoothquant")
554{
555 START_JSON_DUMP_FILE(json_filename);
556 ADD_KEY_VALUE("name", kernel_name);
557 ADD_KEY_VALUE("prec_i", prec_i);
558 ADD_KEY_VALUE("prec_o", prec_o);
559 ADD_KEY_VALUE("tokens", tokens);
560 ADD_KEY_VALUE("hidden_size", hidden_size);
561 ADD_KEY_VALUE("stride", stride);
562 ADD_KEY_VALUE("experts", experts);
563 ADD_KEY_VALUE("topk", topk);
564 ADD_KEY_VALUE("verification", pass ? "pass" : "fail");
565 ADD_PERF_TO_JSON(ave_time, tflops, gb_per_sec)
567}
568
569void dump_fused_moe_json(const std::string& json_filename,
570 const std::string& api_str,
571 const std::string& prec_str,
572 int tokens,
573 bool is_local_token,
574 int local_tokens,
575 int experts,
576 int topk,
577 int hidden_size,
578 int intermediate_size,
579 int stride,
580 int block_m,
581 int activation,
582 bool gate_only,
583 bool fused_quant,
584 bool pass,
585 float ave_time,
586 float tflops,
587 float tb_per_sec,
588 const std::string& kernel_name = "fused_moe")
589{
590 START_JSON_DUMP_FILE(json_filename);
591 ADD_KEY_VALUE("name", kernel_name);
592 ADD_KEY_VALUE("api", api_str);
593 ADD_KEY_VALUE("prec", prec_str);
594 ADD_KEY_VALUE("tokens", tokens);
595 if(is_local_token)
596 {
597 ADD_KEY_VALUE("local_tokens", local_tokens);
598 }
599 ADD_KEY_VALUE("experts", experts);
600 ADD_KEY_VALUE("topk", topk);
601 ADD_KEY_VALUE("hidden_size", hidden_size);
602 ADD_KEY_VALUE("intermediate_size", intermediate_size);
603 ADD_KEY_VALUE("stride", stride);
604 ADD_KEY_VALUE("block_m", block_m);
605 ADD_KEY_VALUE("activation", activation);
606 ADD_KEY_VALUE("gate_only", gate_only);
607 ADD_KEY_VALUE("fused_quant", fused_quant);
608 ADD_KEY_VALUE("verification", pass ? "pass" : "fail");
609 ADD_PERF_TO_JSON(ave_time, tflops, (tb_per_sec * 1024.0f))
611}
612
613void dump_fmha_fwd_json_results(const std::string& json_filename,
614 const std::string& prec,
615 const std::string& mode,
616 const std::string& io_layout,
617 int batch,
618 int nhead,
619 int nhead_k,
620 int seqlen_qs,
621 int seqlen_ks,
622 int seqlen_kpads,
623 int hdim_q,
624 int hdim_v,
625 float scale_s,
626 float p_drop,
627 bool lse,
628 bool squant,
629 const std::string& bias,
630 const std::string& vlayout,
631 bool pass,
632 float ave_time,
633 float tflops,
634 float gb_per_sec,
635 const std::string& kernel_name = "fmha_fwd")
636{
637 START_JSON_DUMP_FILE(json_filename);
638 ADD_KEY_VALUE("name", kernel_name);
639 ADD_KEY_VALUE("prec", prec);
640 ADD_KEY_VALUE("mode", mode);
641 ADD_KEY_VALUE("io_layout", io_layout);
642 ADD_KEY_VALUE("batch", batch);
643 ADD_KEY_VALUE("nhead", nhead);
644 ADD_KEY_VALUE("nhead_k", nhead_k);
645 ADD_KEY_VALUE("seqlen_q", seqlen_qs);
646 ADD_KEY_VALUE("seqlen_k", seqlen_ks);
647 ADD_KEY_VALUE("seqlen_kpads", seqlen_kpads);
648 ADD_KEY_VALUE("hdim_q", hdim_q);
649 ADD_KEY_VALUE("hdim_v", hdim_v);
650 ADD_KEY_VALUE("scale_s", scale_s);
651 ADD_KEY_VALUE("p_drop", p_drop);
652 ADD_KEY_VALUE("lse", lse);
653 ADD_KEY_VALUE("squant", squant);
654 ADD_KEY_VALUE("bias", bias);
655 ADD_KEY_VALUE("vlayout", vlayout);
656 ADD_KEY_VALUE("verification", pass ? "pass" : "fail");
657 ADD_PERF_TO_JSON(ave_time, tflops, gb_per_sec)
659}
660
661void dump_fmha_bwd_json_results(const std::string& json_filename,
662 const std::string& data_type,
663 const std::string& mode,
664 const std::string& i_perm,
665 const std::string& o_perm,
666 int batch,
667 int nhead,
668 int nhead_k,
669 int seqlen_q,
670 int seqlen_k,
671 int hdim_q,
672 int hdim_v,
673 float scale,
674 const std::string& bias,
675 bool use_dbias,
676 float p_drop,
677 bool s_randval,
678 bool deterministic,
679 const std::string& mask,
680 int mask_left,
681 int mask_right,
682 int workspace_size,
683 bool pass,
684 float ave_time,
685 float tflops,
686 float gb_per_sec,
687 const std::string& kernel_name = "fmha_bwd")
688{
689 START_JSON_DUMP_FILE(json_filename);
690 ADD_KEY_VALUE("name", kernel_name);
691 ADD_KEY_VALUE("prec", data_type);
692 ADD_KEY_VALUE("mode", mode);
693 ADD_KEY_VALUE("i_perm", i_perm);
694 ADD_KEY_VALUE("o_perm", o_perm);
695 ADD_KEY_VALUE("batch", batch);
696 ADD_KEY_VALUE("nhead", nhead);
697 ADD_KEY_VALUE("nhead_k", nhead_k);
698 ADD_KEY_VALUE("seqlen_q", seqlen_q);
699 ADD_KEY_VALUE("seqlen_k", seqlen_k);
700 ADD_KEY_VALUE("hdim_q", hdim_q);
701 ADD_KEY_VALUE("hdim_v", hdim_v);
702 ADD_KEY_VALUE("scale", scale);
703 ADD_KEY_VALUE("bias", bias);
704 ADD_KEY_VALUE("use_dbias", use_dbias);
705 ADD_KEY_VALUE("p_drop", p_drop);
706 ADD_KEY_VALUE("s_randval", s_randval);
707 ADD_KEY_VALUE("deterministic", deterministic ? "true" : "false");
708 ADD_KEY_VALUE("mask", mask);
709 ADD_KEY_VALUE("mask_left", mask_left);
710 ADD_KEY_VALUE("mask_right", mask_right);
711 ADD_KEY_VALUE("workspace_size", workspace_size);
712 ADD_KEY_VALUE("verification", pass ? "pass" : "fail");
713 ADD_PERF_TO_JSON(ave_time, tflops, gb_per_sec)
715}
716
717#ifndef CK_ENABLE_JSON_DUMP
718#pragma GCC diagnostic pop
719#endif
#define ADD_PERF_TO_JSON(_time, tflops, gbytes)
Definition json_dump.hpp:91
void dump_gemm_json_results(const std::string &json_filename, int M, int N, int K, int stride_A, int stride_B, int stride_C, bool persistent, bool pass, float ave_time, float tflops, float gb_per_sec, const std::string &kernel_name="gemm_basic")
Definition json_dump.hpp:117
#define END_JSON_DUMP_FILE()
Definition json_dump.hpp:87
void dump_rmsnorm2d_fwd_json(const std::string &json_filename, const std::string &prec_str, int m, int n, int x_stride, int xr_stride, int y_stride, int yr_stride, int use_model_sensitive_rmsnorm, float ave_time, float tflops, float gb_per_sec, bool pass, const std::string &kernel_name="rmsnorm2d_fwd")
Definition json_dump.hpp:404
void dump_batched_gemm_json_results(const std::string &json_filename, const std::string &op_name, int M, int N, int K, int stride_A, int stride_B, int stride_C, int batch_stride_A, int batch_stride_B, int batch_stride_C, int batch_count, bool pass, float ave_time, float tflops, float gb_per_sec, const std::string &kernel_name="batched_gemm_basic")
Definition json_dump.hpp:163
void dump_fmha_bwd_json_results(const std::string &json_filename, const std::string &data_type, const std::string &mode, const std::string &i_perm, const std::string &o_perm, int batch, int nhead, int nhead_k, int seqlen_q, int seqlen_k, int hdim_q, int hdim_v, float scale, const std::string &bias, bool use_dbias, float p_drop, bool s_randval, bool deterministic, const std::string &mask, int mask_left, int mask_right, int workspace_size, bool pass, float ave_time, float tflops, float gb_per_sec, const std::string &kernel_name="fmha_bwd")
Definition json_dump.hpp:661
void dump_layernorm2d_fwd_json_results(const std::string &json_filename, const std::string &prec_i, const std::string &prec_o, const std::string &prec_sm, const std::string &prec_sy, int m, int n, int x_stride, int xr_stride, int y_stride, int yr_stride, bool pass, float ave_time, float tflops, float gb_per_sec, const std::string &kernel_name="layernorm2d_fwd")
Definition json_dump.hpp:301
void dump_moe_smoothquant_json(const std::string &json_filename, const std::string &prec_i, const std::string &prec_o, int tokens, int hidden_size, int stride, int experts, int topk, bool pass, float ave_time, float tflops, float gb_per_sec, const std::string &kernel_name="moe_smoothquant")
Definition json_dump.hpp:541
void dump_reduce_json_results(const std::string &json_filename, int N, int C, int H, int W, bool pass, float ave_time, float tflops, float gb_per_sec, const std::string &kernel_name="reduce")
Definition json_dump.hpp:336
void dump_flatmm_json_results(const std::string &json_filename, const std::string &datatype, int M, int N, int K, int stride_A, int stride_B, int stride_C, int kbatch, bool pass, float ave_time, float tflops, float gb_per_sec, const std::string &kernel_name="flatmm_basic")
Definition json_dump.hpp:221
void dump_batched_transpose_json(const std::string &json_filename, int N, int C, int H, int W, const std::string &layout_in, const std::string &layout_out, const std::string &prec, float ave_time, float tflops, float gb_per_sec, bool pass, const std::string &kernel_name="batched_transpose")
Definition json_dump.hpp:513
#define START_JSON_DUMP_FILE(file_name)
Definition json_dump.hpp:86
void dump_permute_json_results(const std::string &json_filename, const std::string &data_type, bool pass, float ave_time, float tflop, float gb_per_sec, const std::string &kernel_name="permute")
Definition json_dump.hpp:360
void dump_gemm_multi_d_fp16_json_results(const std::string &json_filename, const std::string &op_name, int M, int N, int K, int StrideA, int StrideB, int StrideD0, int StrideD1, int StrideE, bool pass, float ave_time, float tflops, float gb_per_sec, const std::string &kernel_name="gemm_multi_d_fp16")
Definition json_dump.hpp:251
void dump_moe_sorting_json(const std::string &json_filename, const std::string &index_prec, const std::string &weight_prec, const std::string &workspace_size, int dispatch_policy, int tokens, int num_experts, int topk, float ave_time, float tflops, float gb_per_sec, bool pass, const std::string &kernel_name="moe_sorting")
Definition json_dump.hpp:485
void dump_grouped_gemm_json_results(const std::string &json_filename, const std::string &op_name, int group_count, bool pass, float ave_time, float tflops, float gb_per_sec, const std::string &kernel_name="grouped_gemm")
Definition json_dump.hpp:200
void dump_fmha_fwd_json_results(const std::string &json_filename, const std::string &prec, const std::string &mode, const std::string &io_layout, int batch, int nhead, int nhead_k, int seqlen_qs, int seqlen_ks, int seqlen_kpads, int hdim_q, int hdim_v, float scale_s, float p_drop, bool lse, bool squant, const std::string &bias, const std::string &vlayout, bool pass, float ave_time, float tflops, float gb_per_sec, const std::string &kernel_name="fmha_fwd")
Definition json_dump.hpp:613
void dump_fused_moe_json(const std::string &json_filename, const std::string &api_str, const std::string &prec_str, int tokens, bool is_local_token, int local_tokens, int experts, int topk, int hidden_size, int intermediate_size, int stride, int block_m, int activation, bool gate_only, bool fused_quant, bool pass, float ave_time, float tflops, float tb_per_sec, const std::string &kernel_name="fused_moe")
Definition json_dump.hpp:569
void dump_smoothquant_json(const std::string &json_filename, const std::string &prec_str, int m, int n, int x_stride, int y_stride, float ave_time, float tflops, float gb_per_sec, bool pass, const std::string &kernel_name="smoothquant")
Definition json_dump.hpp:461
#define ADD_KEY_VALUE(key, value)
Definition json_dump.hpp:90
void dump_elementwise_json_results(const std::string &json_filename, const std::string &prec, int grid_size, int block_size, float ave_time, float tflops, float gb_per_sec, const std::string &kernel_name="elementwise")
Definition json_dump.hpp:283
void dump_topk_softmax_json(const std::string &json_filename, const std::string &input_prec, const std::string &weight_prec, int tokens, int experts, int topk, int stride_input, int stride_output, float ave_time, float tflop, float gb_per_sec, bool pass, const std::string &kernel_name="topk_softmax")
Definition json_dump.hpp:376
void dump_add_rmsnorm2d_rdquant_fwd_json(const std::string &json_filename, const std::string &input_data_type, const std::string &quantized_data_type, int m, int n, int stride, float epsilon, float ave_time, float tflops, float gb_per_sec, bool pass, const std::string &kernel_name="add_rmsnorm2d_rdquant_fwd")
Definition json_dump.hpp:434
STL namespace.
const GenericPointer< typename T::ValueType > T2 value
Definition pointer.h:1697
common definitions and configuration
signed __int64 int64_t
Definition stdint.h:135
Definition json_dump.hpp:97