Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions src/gpu/intel/gemm/jit/selector/db/kernel.db
Original file line number Diff line number Diff line change
Expand Up @@ -1123,6 +1123,12 @@ auto _CATALOG_ = kcatalog::toArray({
{{'G', "gemm", {"F", "H", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, 49, -1}, {-1, 128, -1}, {16, 16, 1}, "ABIH"}, "at64x2+m48@40 am32x2+m64@48 aB wg 16x2 sys xaf vav hi sr br sb1 sm dm grf256", {16, (LoopType) 255, 256, {(LoopType) 144, (LoopType) 255, (LoopType) 255}, {524288, 524288, 16777216}, {524288, 524288, 16777216}, {32, 32, 64}, {16, 2, 1}, 1, (WGType) 1, 1, 0, 0, {16, 16, 2}, {true, true, true}}, {'E', 17, {112988, 212095, 0, 0, 0, 0, 0.55059, 2.48824, 1.62374, 4.12957, 0.00804382, 0.00804382, 0, 1, 1.18515, 0.998873, 1.5126e-12}}},
{{'G', "gemm", {"F", "H", "S"}, {"T", "N", "N"}}, {-1, -1, {1025, 129, 1}, {-1, 768, 3583}, {1025, 129, 1}, {-1, 768, 3583}, {16, 16, 1}, "ABIh"}, "at64x2+m64@8 am32x2+m32@8 aB wg 4x2 af st vav hi pt sr br sb64 sm sn grf256 sys np", {16, (LoopType) 255, 256, {(LoopType) 208, (LoopType) 255, (LoopType) 255}, {524288, 524288, 16777216}, {524288, 524288, 16777216}, {32, 32, 64}, {4, 2, 1}, 1, (WGType) 1, 257, 0, 0, {16, 16, 4}, {true, true, true}}, {'E', 17, {256058, 33684.4, 0, 0, 0, 0, 0.504291, 1.89712, 3.71959, 8.91844, 0.00785564, 0.00768242, 0.00317825, 1, 1.39291, 1.00693, 2.48102e-12}}},
{{'G', "gemm", {"F", "H", "S"}, {"T", "N", "N"}}, {-1, -1, {3072, 448, 1}, {-1, -1, 8192}, {3072, 448, 1}, {-1, -1, 8192}, {16, 16, 1}, "ABIh"}, "at64x2+m64@8 am32x2+m32@8 aB wg 4x2 af st vav hi pt sr br sb64 sm sn grf256 sys np", {16, (LoopType) 255, 256, {(LoopType) 208, (LoopType) 255, (LoopType) 255}, {524288, 524288, 16777216}, {524288, 524288, 16777216}, {32, 32, 64}, {4, 2, 1}, 1, (WGType) 1, 257, 0, 0, {16, 16, 4}, {true, true, true}}, {'E', 17, {256058, 33684.4, 0, 0, 0, 0, 0.504291, 1.89712, 3.71959, 8.91844, 0.00785564, 0.00768242, 0.00317825, 1, 1.39291, 1.00693, 2.48102e-12}}},
{{'G', "gemm", {"F", "H", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {4, 4, 1}, "I"}, "aS32x2 aB16+S1,32@32 aB wg 32x1 cb3 ks64 nb 32x0 sys af vav hi sr br sn dm grf256", {16, (LoopType) 255, 256, {(LoopType) 144, (LoopType) 255, (LoopType) 255}, {262144, 2097152, 16777216}, {262144, 2097152, 16777216}, {16, 128, 64}, {32, 1, 1}, 1, (WGType) 1, 257, 49152, 0, {4, 4, 4}, {true, true, true}}, {'E', 17, {177711, 326055, 0, 0, 0, 0, 0.679089, 2.52106, 3.64744, 8.82899, 0.00792228, 0.00792228, 0, 1, 1.14501, 0.990098, 9.49038e-13}}},
{{'G', "gemm", {"F", "H", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {4, 4, 1}, "I"}, "aS16x2 aB16+S1,64@24 aB wg 16x2 cb4 ks32 nb 0x2 sys xaf rr fx vav hi sr br sn dm grf256", {16, (LoopType) 255, 256, {(LoopType) 144, (LoopType) 255, (LoopType) 255}, {524288, 1048576, 16777216}, {524288, 1048576, 16777216}, {32, 64, 32}, {16, 2, 1}, 1, (WGType) 1, 257, 32768, 0, {4, 4, 4}, {true, true, true}}, {'E', 17, {157777, 362177, 0, 0, 0, 0, 0.572884, 1.78845, 3.62824, 8.7431, 0.00788381, 0.00788381, 0, 0.999766, 1.14094, 0.993462, 9.76979e-13}}},
{{'G', "gemm", {"F", "H", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {4, 4, 1}, "I"}, "aS64x2+S16@24 aB16x2+S1,32@16 aB wg 32x1 cb4 ks64 ql nb 32x0 sys af vav hi sr br dm grf256", {16, (LoopType) 255, 256, {(LoopType) 144, (LoopType) 255, (LoopType) 255}, {262144, 1048576, 16777216}, {262144, 1048576, 16777216}, {16, 64, 64}, {32, 1, 1}, 1, (WGType) 1, 257, 32768, 0, {4, 4, 4}, {true, true, true}}, {'E', 17, {157566, 330240, 0, 0, 0, 0, 0.42122, 2.57823, 3.39749, 8.48835, 0.00795599, 0.00795599, 0, 1, 1.18566, 0.994066, 1.5408e-12}}},
{{'G', "gemm", {"F", "H", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {4, 4, 1}, "I"}, "aS16 aB16+S1,64@32 aB wg 32x1 cb3 ks64 sys xaf rr fx vav hi sr br sn dm grf256", {16, (LoopType) 255, 256, {(LoopType) 144, (LoopType) 255, (LoopType) 255}, {262144, 2097152, 16777216}, {262144, 2097152, 16777216}, {16, 128, 64}, {32, 1, 1}, 1, (WGType) 1, 257, 49152, 0, {4, 4, 4}, {true, true, true}}, {'E', 17, {177311, 325429, 0, 0, 0, 0, 0.782317, 2.61771, 3.64801, 8.81517, 0.00792667, 0.00792667, 0, 1, 1.16879, 0.998113, 1.3455e-12}}},
{{'G', "gemm", {"F", "H", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {4, 4, 1}, "I"}, "aS16x2 aB16+S1,64@32 aB wg 32x1 cb3 ks32 nb 32x0 sys xaf rr fx vav hi sr br dm grf256", {16, (LoopType) 255, 256, {(LoopType) 144, (LoopType) 255, (LoopType) 255}, {262144, 2097152, 16777216}, {262144, 2097152, 16777216}, {16, 128, 32}, {32, 1, 1}, 1, (WGType) 1, 257, 24576, 0, {4, 4, 4}, {true, true, true}}, {'E', 17, {163390, 386204, 0, 0, 0, 0, 0.734651, 2.51864, 3.59783, 8.77027, 0.00788992, 0.00788992, 0, 1, 1.15024, 0.992801, 1.00385e-12}}},
{{'G', "gemm", {"F", "H", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {4, 4, 1}, "I"}, "aS64+S16@24 aB16+S1,32@24 aB wg 32x1 cb4 ks32 ql sys xaf rr fx vav hi sr br sm dm grf256", {16, (LoopType) 255, 256, {(LoopType) 144, (LoopType) 255, (LoopType) 255}, {262144, 1048576, 16777216}, {262144, 1048576, 16777216}, {16, 64, 64}, {32, 1, 1}, 1, (WGType) 1, 257, 16384, 0, {4, 4, 4}, {true, true, true}}, {'E', 17, {140479, 331595, 0, 0, 0, 0, 0.402177, 2.6016, 3.38947, 8.45442, 0.00828123, 0.00828123, 0, 0.9984, 1.18131, 1.00933, -2.28559e-13}}},
{{'G', "gemm", {"F", "O", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {16, 16, 1}, "ABI"}, "at32+m128@96 am32x2+m64@96 aB wg 2x16 vav hi pt sr br sb128 bk0 grf256 sys acb cr16", {16, (LoopType) 255, 256, {(LoopType) 208, (LoopType) 255, (LoopType) 255}, {2097152, 262144, 16777216}, {2097152, 262144, 16777216}, {128, 16, 32}, {2, 16, 1}, 1, (WGType) 1, 257, 0, 0, {16, 16, 4}, {true, true, true}}, {'E', 17, {879529, 62860.9, 0, 0, 0, 0, 1.12572, 1.9182, 3.81465, 7.84556, 0.00532516, 0.00532516, 0, 1, 1.01261, 1.00705, -3.00232e-14}}},
{{'G', "gemm", {"F", "O", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {16, 16, 1}, "ABI"}, "at64+m128@32 am128+m64@32 aB wg 2x8 xaf st hi pt sr br sb128 sn grf256 cr0 sys", {16, (LoopType) 255, 256, {(LoopType) 208, (LoopType) 255, (LoopType) 255}, {1048576, 262144, 16777216}, {1048576, 262144, 16777216}, {64, 16, 128}, {2, 8, 1}, 1, (WGType) 1, 257, 0, 0, {16, 16, 4}, {true, true, true}}, {'E', 17, {525002, 18498.2, 0, 0, 0, 0, 0.485217, 0.854072, 1.96694, 5.31108, 0.00356541, 0.00161949, 0.00452935, 0.938224, 1.01441, 1.01414, -4.57758e-14}}},
{{'G', "gemm", {"F", "O", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, 1, -1}, {-1, 1, 8191}, {-1, 1, -1}, {-1, 1, 8191}, {16, 16, 1}, "ABI"}, "at128+m64@48 am128+m32@48 aB wg 2x1x4 ikr af hi pt sr br sb128 grf256 sys bk0 acb", {16, (LoopType) 255, 256, {(LoopType) 208, (LoopType) 255, (LoopType) 2}, {524288, 16384, 16777216}, {524288, 16384, 16777216}, {32, 1, 128}, {2, 1, 4}, 1, (WGType) 1, 4357, 0, 256, {16, 16, 4}, {true, true, true}}, {'W', 1, {32}}},
Expand Down Expand Up @@ -1197,6 +1203,12 @@ auto _CATALOG_ = kcatalog::toArray({
{{'G', "gemm", {"N", "H", "S"}, {"T", "N", "N"}}, {-1, -1, {1025, 129, 1}, {-1, 768, 3583}, {1025, 129, 1}, {-1, 768, 3583}, {16, 16, 1}, "ABIh"}, "at64x2+m64@8 am32x2+m32@8 aB wg 4x2 xaf st acb hi pt sr br sb64 sm sn grf256 sys np", {16, (LoopType) 255, 256, {(LoopType) 208, (LoopType) 255, (LoopType) 255}, {524288, 524288, 16777216}, {524288, 524288, 16777216}, {32, 32, 64}, {4, 2, 1}, 1, (WGType) 1, 257, 0, 0, {16, 16, 4}, {true, true, true}}, {'E', 17, {256058, 33684.4, 0, 0, 0, 0, 0.504291, 1.89712, 3.71959, 8.91844, 0.00785564, 0.00768242, 0.00317825, 1, 1.39291, 1.00693, 2.48102e-12}}},
{{'G', "gemm", {"O", "H", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, 8, -1}, {16, 16, 1}, "IAB"}, "at32x2+m64@16 am64x2 aB wg 4x1x4 ikr af vav sr br sb64 bm0 bk0 sys nmk np", {16, (LoopType) 255, 128, {(LoopType) 0, (LoopType) 1, (LoopType) 2}, {8192, 8192, 16777216}, {8192, 8192, 16777216}, {16, 8, 64}, {4, 1, 4}, 1, (WGType) 1, 261, 0, 2048, {16, 16, 4}, {true, true, true}}, {'W', 1, {128}}},
{{'G', "gemm", {"O", "H", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, 9, -1}, {-1, 64, -1}, {16, 16, 1}, "IAB"}, "at32x2+m64@16 am64x2 aB wg 4x1x4 ikr af vav sr br sb64 bm0 bk0 sys nmk grf256 np", {16, (LoopType) 255, 128, {(LoopType) 0, (LoopType) 1, (LoopType) 2}, {8192, 8192, 16777216}, {8192, 8192, 16777216}, {16, 16, 64}, {4, 1, 4}, 1, (WGType) 1, 261, 0, 2048, {16, 16, 4}, {true, true, true}}, {'W', 1, {128}}},
{{'G', "gemm", {"O", "H", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {4, 4, 1}, "I"}, "aS16x2+S64@8 aB16+S1,64@48 aB wg 32x1 cb4 ks32 nb 32x0 sys xaf rr fx vav hi sr br sn dm grf256", {16, (LoopType) 255, 256, {(LoopType) 144, (LoopType) 255, (LoopType) 255}, {262144, 2097152, 16777216}, {262144, 2097152, 16777216}, {16, 128, 32}, {32, 1, 1}, 1, (WGType) 1, 257, 32768, 0, {4, 4, 4}, {true, true, true}}, {'E', 17, {162082, 443372, 0, 0, 0, 0, 0.76118, 2.51186, 3.55751, 8.72228, 0.00790848, 0.00790848, 0, 0.979938, 1.20119, 0.998485, 1.51113e-12}}},
{{'G', "gemm", {"O", "H", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {4, 4, 1}, "I"}, "aS16x2+S64@40 aB16+S1,64@32 aB wg 16x1 cb4 ks16 ql nb 16x0 sys xaf st fx vav hi sr br sn dm grf256", {16, (LoopType) 255, 256, {(LoopType) 144, (LoopType) 255, (LoopType) 255}, {262144, 2097152, 16777216}, {262144, 2097152, 16777216}, {16, 128, 16}, {16, 1, 1}, 1, (WGType) 1, 257, 16384, 0, {4, 4, 4}, {true, true, true}}, {'E', 17, {374406, 217482, 0, 0, 0, 0, 0.682632, 1.71074, 3.59009, 8.65312, 0.00785572, 0.0076963, 0.000322578, 1, 1.30683, 1.0247, 2.57093e-12}}},
{{'G', "gemm", {"O", "H", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {4, 4, 1}, "I"}, "aS16+S64@8 aB16+S1,64@8 aB wg 16x1 cb4 ks16 nb 16x0 sys xaf rr fx vav hi sr br sn dm grf256", {16, (LoopType) 255, 256, {(LoopType) 144, (LoopType) 255, (LoopType) 255}, {262144, 2097152, 16777216}, {262144, 2097152, 16777216}, {16, 128, 16}, {16, 1, 1}, 1, (WGType) 1, 257, 16384, 0, {4, 4, 4}, {true, true, true}}, {'E', 17, {330322, 171050, 0, 0, 0, 0, 0.820923, 1.80509, 3.64403, 8.71665, 0.00800297, 0.00602537, 0.00217363, 1, 1.24875, 1.00016, 2.09286e-12}}},
{{'G', "gemm", {"O", "H", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {4, 4, 1}, "I"}, "aS32x2+S64@40 aB16x2+S1,32@8 aB wg 16x1 cb4 ks32 ql sys xaf rr fx vav hi sr br dm grf256", {16, (LoopType) 255, 256, {(LoopType) 144, (LoopType) 255, (LoopType) 255}, {262144, 1048576, 16777216}, {262144, 1048576, 16777216}, {16, 64, 32}, {16, 1, 1}, 1, (WGType) 1, 257, 16384, 0, {4, 4, 4}, {true, true, true}}, {'E', 17, {316428, 148920, 0, 0, 0, 0, 0.39205, 1.56724, 3.44087, 8.51925, 0.00794168, 0.00568289, 0.00221721, 1, 1.2479, 0.990352, 2.86173e-12}}},
{{'G', "gemm", {"O", "H", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {4, 4, 1}, "I"}, "aS32x2 aB32+S64@112 aB wg 32x1 cb4x2 ks64 ql nb 32x0 sys xaf st vav hi sr br dm", {16, (LoopType) 255, 128, {(LoopType) 144, (LoopType) 255, (LoopType) 255}, {262144, 524288, 16777216}, {262144, 524288, 16777216}, {16, 32, 64}, {32, 1, 1}, 1, (WGType) 1, 257, 16384, 0, {4, 4, 4}, {true, true, true}}, {'E', 17, {306996, 213072, 0, 0, 0, 0, 0.420192, 2.6846, 3.27894, 8.35946, 0.0101853, 0.00869937, 0.000219736, 0.705509, 1.3409, 1.00864, 4.14841e-12}}},
{{'G', "gemm", {"O", "H", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {4, 4, 1}, "I"}, "aS16x2+S16@104 aB16+S1,16@8 aB wg 16x2 cb3 ks32 nb 0x2 sys xaf fx vav hi sr br sm dm grf256", {16, (LoopType) 255, 256, {(LoopType) 144, (LoopType) 255, (LoopType) 255}, {262144, 2097152, 16777216}, {262144, 2097152, 16777216}, {16, 128, 32}, {16, 2, 1}, 1, (WGType) 1, 257, 49152, 0, {4, 4, 4}, {true, true, true}}, {'E', 17, {184248, 434316, 0, 0, 0, 0, 1.58876, 1.89817, 3.55657, 8.66518, 0.007961, 0.007961, 0, 0.997444, 1.22188, 0.996053, 1.66019e-12}}},
{{'G', "gemm", {"Q", "H", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, 8, -1}, {16, 16, 1}, "IAB"}, "at32+m32@16 am64 aB wg 4x1x4 ikr af vav sr br sb64 bm0 bk0 sys st np", {16, (LoopType) 255, 128, {(LoopType) 0, (LoopType) 1, (LoopType) 2}, {8192, 8192, 16777216}, {8192, 8192, 16777216}, {16, 8, 64}, {4, 1, 4}, 1, (WGType) 1, 261, 0, 2048, {16, 16, 4}, {true, true, true}}, {'E', 17, {231324, 37214.5, 12077.2, 7211.62, 0, 0, 0.533846, 2.1714, 0.354359, 3.98714, 0.0299984, 0.0213193, 0.00594342, 0.55014, 1.01154, 1.13636, -1.55278e-12}}},
{{'G', "gemm", {"Q", "H", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, 9, -1}, {-1, 32, -1}, {16, 16, 1}, "ABIh"}, "at32+m32@32 am64+m64@64 aB wg 4x1x4 vav sr br sb64 bm0 bk0 sys xaf sm sn st np grf256 ikr", {16, (LoopType) 255, 128, {(LoopType) 0, (LoopType) 1, (LoopType) 2}, {8192, 8192, 16777216}, {8192, 8192, 16777216}, {16, 16, 64}, {4, 1, 4}, 1, (WGType) 1, 261, 0, 2048, {16, 16, 4}, {true, true, true}}, {'E', 17, {225586, 69187.9, 10490.6, 15759.8, 0, 0, 0.601267, 1.84174, 0.789366, 4.16685, 0.0178126, 0.0173575, 0.000615127, 0.366169, 1.06063, 0.98351, 1.79892e-12}}},
{{'G', "gemm", {"Q", "H", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, 9, -1}, {-1, 32, -1}, {16, 16, 1}, "ABIh"}, "at32+m32@32 am64+m64@64 aB wg 4x1x2 vav sr br sb64 bm0 bk0 sys xaf sm sn st np grf256 ikr", {16, (LoopType) 255, 128, {(LoopType) 0, (LoopType) 1, (LoopType) 2}, {8192, 8192, 16777216}, {8192, 8192, 16777216}, {16, 16, 64}, {4, 1, 2}, 1, (WGType) 1, 261, 0, 2048, {16, 16, 4}, {true, true, true}}, {'E', 17, {205260, 23043.8, 36245.3, 16259.7, 0, 0, 0.500127, 1.78186, 1.52199, 6.05532, 0.013611, 0.00830487, 0.00315919, 0.504271, 1.18638, 1.05777, 6.06973e-14}}},
Expand Down