Skip to content

Commit 9fe6163

Browse files
authored
HAC Tests fail with numpy multi threading (#1060)
* fix test bugs by forcing one threaded numpy * encorce arrays contiguous * pre commit
1 parent 70a154c commit 9fe6163

File tree

5 files changed

+77
-13
lines changed

5 files changed

+77
-13
lines changed

.github/workflows/ci-tests.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ jobs:
4343
- name: Run 'regular' tests
4444
run: |
4545
pixi run tests-regular
46+
pixi run tests-hac
4647
4748
- name: Upload coverage to Codecov (partial)
4849
uses: codecov/codecov-action@v4

pixi.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@ jaxlib = ">=0.4.38, <0.8"
6666
"tests-regular" = 'pytest tests -n 9 -m "not (extended or against_r_core or against_r_extended or plots)" --cov=pyfixest --cov-report=xml'
6767
"tests-extended" = 'pytest tests -n 9 -m "extended" --cov=pyfixest --cov-report=xml'
6868
"tests-fixest" = "pytest -rs tests/test_vs_fixest.py -n 9 --cov=pyfixest --cov-report=xml"
69+
"tests-hac" = { cmd = "pytest tests/test_hac_vs_fixest.py -v -n 9", env = { OMP_NUM_THREADS = "1", OPENBLAS_NUM_THREADS = "1", MKL_NUM_THREADS = "1", VECLIB_MAXIMUM_THREADS = "1", NUMEXPR_NUM_THREADS = "1" } }
6970
"debug" = "python pyfixest/debug.py"
7071
"update-test-data" = "Rscript tests/r_test_comparisons.R"
7172
"install-r-extended" = "Rscript r_test_requirements.R"

pyfixest/estimation/vcov_utils.py

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -118,8 +118,8 @@ def _hac_meat_loop(
118118
gamma_buffer.fill(0.0)
119119
weight = weights[lag_value]
120120

121-
scores_current = scores[lag_value:time_periods]
122-
scores_lagged = scores[: time_periods - lag_value]
121+
scores_current = np.ascontiguousarray(scores[lag_value:time_periods, :])
122+
scores_lagged = np.ascontiguousarray(scores[: time_periods - lag_value, :])
123123

124124
gamma_buffer[:, :] = scores_current.T @ scores_lagged
125125
meat += weight * (gamma_buffer + gamma_buffer.T)
@@ -142,10 +142,10 @@ def _get_bartlett_weights(lag: int):
142142
@nb.njit(parallel=False)
143143
def _nw_meat_time(scores: np.ndarray, time_arr: np.ndarray, lag: int):
144144
if time_arr is None:
145-
ordered_scores = scores
145+
ordered_scores = np.ascontiguousarray(scores)
146146
else:
147147
order = np.argsort(time_arr)
148-
ordered_scores = scores[order]
148+
ordered_scores = np.ascontiguousarray(scores[order])
149149

150150
time_periods, k = ordered_scores.shape
151151
weights = _get_bartlett_weights(lag=lag)
@@ -191,7 +191,7 @@ def _get_panel_idx(
191191
return order, units, starts, counts, panel_arr_sorted, time_arr_sorted
192192

193193

194-
# @nb.njit(parallel=False)
194+
@nb.njit(parallel=False)
195195
def _nw_meat_panel(
196196
scores: np.ndarray,
197197
time_arr: np.ndarray,
@@ -229,6 +229,7 @@ def _nw_meat_panel(
229229

230230
weights = _get_bartlett_weights(lag=lag)
231231

232+
scores = np.ascontiguousarray(scores)
232233
k = scores.shape[1]
233234

234235
meat_nw_panel = np.zeros((k, k))
@@ -240,14 +241,14 @@ def _nw_meat_panel(
240241
for start, count in zip(starts, counts):
241242
end = start + count
242243

243-
score_i = scores[start:end, :]
244+
score_i = np.ascontiguousarray(scores[start:end, :])
244245
gamma0 = score_i.T @ score_i
245246

246247
gamma_l_sum.fill(0.0)
247248
Lmax = min(lag, count - 1)
248249
for lag_value in range(1, Lmax + 1):
249-
score_curr = scores[start + lag_value : end, :]
250-
score_prev = scores[start : end - lag_value, :]
250+
score_curr = np.ascontiguousarray(scores[start + lag_value : end, :])
251+
score_prev = np.ascontiguousarray(scores[start : end - lag_value, :])
251252
gamma_l = score_curr.T @ score_prev
252253
gamma_l_sum += weights[lag_value] * (gamma_l + gamma_l.T)
253254

@@ -289,6 +290,7 @@ def _dk_meat_panel(
289290
time_periods, k = scores_time.shape
290291

291292
weights = _get_bartlett_weights(lag=lag)
293+
scores_time = np.ascontiguousarray(scores_time)
292294

293295
return _hac_meat_loop(
294296
scores=scores_time, weights=weights, time_periods=time_periods, k=k, lag=lag

tests/conftest.py

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
"Pytest configuration for pyfixest tests."
2+
3+
import os
4+
5+
import pytest
6+
7+
8+
@pytest.fixture(scope="session", autouse=True)
9+
def single_thread_blas():
10+
"""
11+
Force single-threaded BLAS for deterministic HAC standard errors.
12+
13+
What Claude says:
14+
15+
Multi-threaded BLAS libraries (OpenBLAS, MKL, Accelerate) can produce
16+
slightly different numerical results due to different parallel reduction
17+
orders when computing matrix multiplications. This causes sporadic test
18+
failures in HAC variance calculations even though both R and Python
19+
implementations are mathematically correct.
20+
21+
The differences arise because floating-point arithmetic is not associative:
22+
(a + b) + c ≠ a + (b + c) in IEEE 754. Different thread scheduling can
23+
change the order of operations, leading to different rounding errors.
24+
25+
By forcing single-threaded execution, we ensure deterministic results
26+
that match R's fixest package exactly.
27+
"""
28+
# Store original values to restore after tests
29+
original_values = {}
30+
31+
env_vars = [
32+
"OMP_NUM_THREADS",
33+
"OPENBLAS_NUM_THREADS",
34+
"MKL_NUM_THREADS",
35+
"VECLIB_MAXIMUM_THREADS",
36+
"NUMEXPR_NUM_THREADS",
37+
]
38+
39+
for var in env_vars:
40+
original_values[var] = os.environ.get(var)
41+
os.environ[var] = "1"
42+
43+
yield
44+
45+
# Restore original values after all tests complete
46+
for var, value in original_values.items():
47+
if value is None:
48+
os.environ.pop(var, None)
49+
else:
50+
os.environ[var] = value

tests/test_hac_vs_fixest.py

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,13 @@
1+
"""
2+
Tests for HAC (Heteroskedasticity and Autocorrelation Consistent) standard errors.
3+
4+
IMPORTANT: These tests require single-threaded BLAS for deterministic results.
5+
Multi-threaded BLAS libraries can produce slightly different numerical results
6+
(~1-4% variance in vcov elements) due to different parallel reduction orders,
7+
even though both implementations are mathematically correct. The conftest.py
8+
fixture `single_thread_blas` handles this automatically.
9+
"""
10+
111
import numpy as np
212
import pandas as pd
313
import pytest
@@ -249,10 +259,10 @@ def _get_r_panel_kwargs(time_id, panel_id, lag, inference):
249259
"vcov_kwargs",
250260
[
251261
{"lag": 2, "time_id": "year"},
252-
{"lag": 4, "time_id": "year"},
262+
{"lag": 8, "time_id": "year"},
253263
# now add panel id
254264
{"lag": 2, "time_id": "year", "panel_id": "unit"},
255-
{"lag": 4, "time_id": "year", "panel_id": "unit"},
265+
{"lag": 8, "time_id": "year", "panel_id": "unit"},
256266
# lag not required when panel_id is provided
257267
{"time_id": "year", "panel_id": "unit"},
258268
],
@@ -354,9 +364,9 @@ def test_single_fit_feols_hac_panel(
354364
"balanced",
355365
[
356366
"balanced-consecutive",
357-
# "balanced-non-consecutive",
358-
# "non-balanced-consecutive",
359-
# "non-balanced-non-consecutive",
367+
"balanced-non-consecutive",
368+
"non-balanced-consecutive",
369+
"non-balanced-non-consecutive",
360370
],
361371
)
362372
@pytest.mark.parametrize("fml", poisson_fmls)

0 commit comments

Comments
 (0)