Skip to content

Commit 6473931

Browse files
committed
xe: conv: jit: rework walk order heuristic
1 parent 3646d91 commit 6473931

File tree

1 file changed

+20
-0
lines changed

1 file changed

+20
-0
lines changed

src/gpu/intel/conv/jit/config.cpp

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1644,6 +1644,26 @@ walk_order_t compute_walk_order(const config_t &cfg) {
16441644
size_t ab_bytes = get_memory_footprint(cfg, inner, outer);
16451645
if (ab_bytes <= l3_size) grid_inner = std::move(outer);
16461646
}
1647+
1648+
// Prefer square spatial dimensions to increase cache reuse due to iteration
1649+
// over kernel spatial dimensions. This optimization can likely be extended
1650+
// to bwd_d as well, it just hasn't been analyzed yet.
1651+
if (cfg.prb().is_fwd) {
1652+
auto &w_inner = grid_inner[pvars::ow];
1653+
auto &h_inner = grid_inner[pvars::oh];
1654+
auto rebalance_hw = [&]() {
1655+
if (grid_tile[pvars::oh] % (h_inner * 2)) return false;
1656+
if (w_inner % 2) return false;
1657+
if (w_inner < h_inner * 4) return false;
1658+
return true;
1659+
};
1660+
1661+
while (rebalance_hw()) {
1662+
w_inner /= 2;
1663+
h_inner *= 2;
1664+
}
1665+
}
1666+
16471667
// Add the blocks in this order:
16481668
// - Step 1. Add grid_inner blocks (fitting L3 cache)
16491669
// - Step 2. Add the remaining M/N blocks

0 commit comments

Comments
 (0)