add create index test

luciaquirke · luciaquirke · commit 54903cd1dd61 · 2025-11-11T03:20:19.000Z
diff --git a/bergson/data.py b/bergson/data.py
@@ -366,8 +366,8 @@ def create_index(
                 {
                     "num_grads": num_grads,
                     "dtype": struct_dtype,
-                    "unstructured_dtype": np.dtype(dtype).str,
-                    "grad_dimension": sum(grad_sizes.values()),
+                    "grad_sizes": grad_sizes,
+                    "base_dtype": np.dtype(dtype).str,
                 },
                 f,
                 indent=2,
@@ -433,9 +433,9 @@ def load_gradients(root_dir: Path, with_structure: bool = True) -> np.memmap:
         dtype = info["dtype"]
         shape = (num_grads,)
     else:
-        dtype = info["unstructured_dtype"]
-        grad_dimension = info["grad_dimension"]
-        shape = (num_grads, grad_dimension)
+        dtype = info["base_dtype"]
+        grad_sizes = info["grad_sizes"]
+        shape = (num_grads, sum(grad_sizes.values()))
 
     return np.memmap(
         root_dir / "gradients.bin",
diff --git a/tests/test_build.py b/tests/test_build.py
@@ -4,7 +4,7 @@
 import numpy as np
 import pytest
 import torch
-from transformers import AutoConfig, AutoModelForCausalLM
+from transformers import AutoModelForCausalLM
 
 from bergson import (
     AttentionConfig,
@@ -37,30 +37,6 @@ def test_build_e2e(tmp_path: Path):
     assert result.returncode == 0
 
 
-@pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA not available")
-def test_large_gradients_build(tmp_path: Path, dataset):
-    config = AutoConfig.from_pretrained(
-        "EleutherAI/pythia-1.4b", trust_remote_code=True
-    )
-    model = AutoModelForCausalLM.from_config(config)
-    model.cuda()
-
-    collect_gradients(
-        model=model,
-        data=dataset,
-        processor=GradientProcessor(),
-        path=tmp_path,
-        skip_preconditioners=True,
-    )
-
-    # Load a large gradient index without structure.
-    load_gradients(tmp_path, with_structure=False)
-
-    with pytest.raises(ValueError):
-        # Max item size exceeded.
-        load_gradients(tmp_path, with_structure=True)
-
-
 @pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA not available")
 def test_build_consistency(tmp_path: Path, model, dataset):
     collect_gradients(
diff --git a/tests/test_data.py b/tests/test_data.py
@@ -0,0 +1,36 @@
+import math
+from pathlib import Path
+
+import numpy as np
+import pytest
+import torch
+from transformers import AutoConfig, AutoModelForCausalLM
+
+from bergson.data import create_index, load_gradients
+from bergson.gradients import GradientCollector
+
+
+@pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA not available")
+def test_large_gradients_build(tmp_path: Path, dataset):
+    # Create index for uncompressed gradients from a large model.
+    config = AutoConfig.from_pretrained(
+        "EleutherAI/pythia-1.4b", trust_remote_code=True
+    )
+    model = AutoModelForCausalLM.from_config(config)
+    collector = GradientCollector(model, lambda x: x)
+    grad_sizes = {name: math.prod(s) for name, s in collector.shapes().items()}
+
+    create_index(
+        tmp_path,
+        num_grads=len(dataset),
+        grad_sizes=grad_sizes,
+        dtype=np.float32,
+        with_structure=False,
+    )
+
+    # Load a large gradient index without structure.
+    load_gradients(tmp_path, with_structure=False)
+
+    with pytest.raises(ValueError):
+        # Max item size exceeded.
+        load_gradients(tmp_path, with_structure=True)