@@ -1121,8 +1121,10 @@ auto _CATALOG_ = kcatalog::toArray({
11211121{{'G', "gemm", {"F", "H", "S"}, {"T", "N", "N"}}, {-1, -1, {1024, 128, 1024}, {4096, 512, 4096}, {1024, 128, 1024}, {4096, 512, 4096}, {16, 16, 1}, "ABIhr"}, "at32+m64@40 am16+m64@40 aB wg 4x4 xaf rr vav hi pt ar sb64 sm sn sys np", {16, (LoopType) 255, 128, {(LoopType) 208, (LoopType) 255, (LoopType) 255}, {524288, 524288, 16777216}, {524288, 524288, 16777216}, {32, 32, 32}, {4, 4, 1}, 1, (WGType) 1, 257, 0, 0, {16, 16, 4}, {true, true, true}}, {'E', 17, {548120, 53799.9, 0, 0, 0, 0, 0.610964, 1.95, 1.59225, 4.49998, 0.00862291, 0.043525, 0, 1, 1.44207, 0.952908, 1.47695e-11}}},
11221122{{'G', "gemm", {"F", "H", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, 49, -1}, {-1, 128, -1}, {16, 16, 1}, "ABI"}, "at64 am32+m32@32 aB wg 2x2x2 ikr xaf st rr vav hi pt sr br sb64 sn sys nmk np", {16, (LoopType) 255, 128, {(LoopType) 209, (LoopType) 255, (LoopType) 2}, {262144, 524288, 16777216}, {262144, 524288, 16777216}, {16, 32, 64}, {2, 2, 2}, 1, (WGType) 1, 4357, 0, 8192, {16, 16, 4}, {true, true, true}}, {'E', 17, {919791, 14572.7, 150898, 11874.8, 0, 0, 0.970933, 2.00496, 1.73249, 5.83627, 0.0164407, 0.0180319, 0.00303982, 0.139818, 1.26024, 1.04913, 1.75433e-12}}},
11231123{{'G', "gemm", {"F", "H", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, 49, -1}, {-1, 128, -1}, {16, 16, 1}, "ABIH"}, "at64x2+m48@40 am32x2+m64@48 aB wg 16x2 sys xaf vav hi sr br sb1 sm dm grf256", {16, (LoopType) 255, 256, {(LoopType) 144, (LoopType) 255, (LoopType) 255}, {524288, 524288, 16777216}, {524288, 524288, 16777216}, {32, 32, 64}, {16, 2, 1}, 1, (WGType) 1, 1, 0, 0, {16, 16, 2}, {true, true, true}}, {'E', 17, {112988, 212095, 0, 0, 0, 0, 0.55059, 2.48824, 1.62374, 4.12957, 0.00804382, 0.00804382, 0, 1, 1.18515, 0.998873, 1.5126e-12}}},
1124- {{'G', "gemm", {"F", "H", "S"}, {"T", "N", "N"}}, {-1, -1, {1025, 129, 1}, {-1, 768, 3583}, {1025, 129, 1}, {-1, 768, 3583}, {16, 16, 1}, "ABIh"}, "at64x2+m64@8 am32x2+m32@8 aB wg 4x2 af st vav hi pt sr br sb64 sm sn grf256 sys np", {16, (LoopType) 255, 256, {(LoopType) 208, (LoopType) 255, (LoopType) 255}, {524288, 524288, 16777216}, {524288, 524288, 16777216}, {32, 32, 64}, {4, 2, 1}, 1, (WGType) 1, 257, 0, 0, {16, 16, 4}, {true, true, true}}, {'E', 17, {256058, 33684.4, 0, 0, 0, 0, 0.504291, 1.89712, 3.71959, 8.91844, 0.00785564, 0.00768242, 0.00317825, 1, 1.39291, 1.00693, 2.48102e-12}}},
1125- {{'G', "gemm", {"F", "H", "S"}, {"T", "N", "N"}}, {-1, -1, {3072, 448, 1}, {-1, -1, 8192}, {3072, 448, 1}, {-1, -1, 8192}, {16, 16, 1}, "ABIh"}, "at64x2+m64@8 am32x2+m32@8 aB wg 4x2 af st vav hi pt sr br sb64 sm sn grf256 sys np", {16, (LoopType) 255, 256, {(LoopType) 208, (LoopType) 255, (LoopType) 255}, {524288, 524288, 16777216}, {524288, 524288, 16777216}, {32, 32, 64}, {4, 2, 1}, 1, (WGType) 1, 257, 0, 0, {16, 16, 4}, {true, true, true}}, {'E', 17, {256058, 33684.4, 0, 0, 0, 0, 0.504291, 1.89712, 3.71959, 8.91844, 0.00785564, 0.00768242, 0.00317825, 1, 1.39291, 1.00693, 2.48102e-12}}},
1124+ {{'G', "gemm", {"F", "H", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, -1, 1}, {-1, -1, -1}, {1025, 129, 1}, {-1, 768, 3583}, {16, 16, 1}, "ABIh"}, "at64x2+m64@8 am32x2+m32@8 aB wg 4x2 af st vav hi pt sr br sb64 sm sn grf256 sys np", {16, (LoopType) 255, 256, {(LoopType) 208, (LoopType) 255, (LoopType) 255}, {524288, 524288, 16777216}, {524288, 524288, 16777216}, {32, 32, 64}, {4, 2, 1}, 1, (WGType) 1, 257, 0, 0, {16, 16, 4}, {true, true, true}}, {'E', 17, {256058, 33684.4, 0, 0, 0, 0, 0.504291, 1.89712, 3.71959, 8.91844, 0.00785564, 0.00768242, 0.00317825, 1, 1.39291, 1.00693, 2.48102e-12}}},
1125+ {{'G', "gemm", {"F", "H", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, -1, 1}, {-1, -1, -1}, {3072, 448, 1}, {-1, -1, 8192}, {16, 16, 1}, "ABIh"}, "at64x2+m64@8 am32x2+m32@8 aB wg 4x2 af st vav hi pt sr br sb64 sm sn grf256 sys np", {16, (LoopType) 255, 256, {(LoopType) 208, (LoopType) 255, (LoopType) 255}, {524288, 524288, 16777216}, {524288, 524288, 16777216}, {32, 32, 64}, {4, 2, 1}, 1, (WGType) 1, 257, 0, 0, {16, 16, 4}, {true, true, true}}, {'E', 17, {256058, 33684.4, 0, 0, 0, 0, 0.504291, 1.89712, 3.71959, 8.91844, 0.00785564, 0.00768242, 0.00317825, 1, 1.39291, 1.00693, 2.48102e-12}}},
1126+ {{'G', "gemm", {"F", "H", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {129, 938, -1}, {1024, 938, -1}, {16, 16, 1}, "ABI"}, "at64+m32@72 am16+m64@112 aB wg 8x1 sys af rr fx vav bo sr br sb128 dm grf256", {16, (LoopType) 255, 256, {(LoopType) 128, (LoopType) 255, (LoopType) 255}, {524288, 1048576, 16777216}, {524288, 1048576, 16777216}, {32, 64, 64}, {8, 1, 1}, 1, (WGType) 1, 257, 0, 0, {16, 16, 4}, {true, true, true}}, {'E', 17, {270358, 54496.7, 0, 0, 0, 0, 0.523135, 1.88679, 1.479, 4.60386, 0.0082182, 0.00495218, 0.00240476, 1, 1.36152, 0.994848, 1.98747e-12}}},
1127+ {{'G', "gemm", {"F", "H", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {129, 938, -1}, {-1, 938, -1}, {16, 16, 1}, "ABI"}, "at64 am32+m16@120 aB wg 32x1 cb4x2 ks64 nb 32x0 sys af fx vav bo sr br sn dm grf256", {16, (LoopType) 255, 256, {(LoopType) 128, (LoopType) 255, (LoopType) 255}, {262144, 1048576, 16777216}, {262144, 1048576, 16777216}, {16, 64, 64}, {32, 1, 1}, 1, (WGType) 1, 257, 32768, 0, {16, 16, 4}, {true, true, true}}, {'E', 17, {153538, 242908, 0, 0, 0, 0, 0.383955, 2.85486, 1.29169, 4.09481, 0.00817161, 0.00817161, 0, 1, 1.18876, 0.995675, 1.385e-12}}},
11261128{{'G', "gemm", {"F", "O", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {16, 16, 1}, "ABI"}, "at32+m128@96 am32x2+m64@96 aB wg 2x16 vav hi pt sr br sb128 bk0 grf256 sys acb cr16", {16, (LoopType) 255, 256, {(LoopType) 208, (LoopType) 255, (LoopType) 255}, {2097152, 262144, 16777216}, {2097152, 262144, 16777216}, {128, 16, 32}, {2, 16, 1}, 1, (WGType) 1, 257, 0, 0, {16, 16, 4}, {true, true, true}}, {'E', 17, {879529, 62860.9, 0, 0, 0, 0, 1.12572, 1.9182, 3.81465, 7.84556, 0.00532516, 0.00532516, 0, 1, 1.01261, 1.00705, -3.00232e-14}}},
11271129{{'G', "gemm", {"F", "O", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {16, 16, 1}, "ABI"}, "at64+m128@32 am128+m64@32 aB wg 2x8 xaf st hi pt sr br sb128 sn grf256 cr0 sys", {16, (LoopType) 255, 256, {(LoopType) 208, (LoopType) 255, (LoopType) 255}, {1048576, 262144, 16777216}, {1048576, 262144, 16777216}, {64, 16, 128}, {2, 8, 1}, 1, (WGType) 1, 257, 0, 0, {16, 16, 4}, {true, true, true}}, {'E', 17, {525002, 18498.2, 0, 0, 0, 0, 0.485217, 0.854072, 1.96694, 5.31108, 0.00356541, 0.00161949, 0.00452935, 0.938224, 1.01441, 1.01414, -4.57758e-14}}},
11281130{{'G', "gemm", {"F", "O", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, 1, -1}, {-1, 1, 8191}, {-1, 1, -1}, {-1, 1, 8191}, {16, 16, 1}, "ABI"}, "at128+m64@48 am128+m32@48 aB wg 2x1x4 ikr af hi pt sr br sb128 grf256 sys bk0 acb", {16, (LoopType) 255, 256, {(LoopType) 208, (LoopType) 255, (LoopType) 2}, {524288, 16384, 16777216}, {524288, 16384, 16777216}, {32, 1, 128}, {2, 1, 4}, 1, (WGType) 1, 4357, 0, 256, {16, 16, 4}, {true, true, true}}, {'W', 1, {32}}},
0 commit comments