EleutherAI
diff --git a/‎bergson/__main__.py‎
Lines changed: 2 additions & 2 deletions b/‎bergson/__main__.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎bergson/build.py‎
Lines changed: 2 additions & 4 deletions b/‎bergson/build.py‎
Lines changed: 2 additions & 4 deletions
diff --git a/‎bergson/collection.py‎
Lines changed: 8 additions & 10 deletions b/‎bergson/collection.py‎
Lines changed: 8 additions & 10 deletions
diff --git a/‎bergson/data.py‎
Lines changed: 0 additions & 3 deletions b/‎bergson/data.py‎
Lines changed: 0 additions & 3 deletions
@@ -17,9 +17,9 @@ class Build:
 
     def execute(self):
         """Build the gradient dataset."""
-        if not self.cfg.save_index and not self.cfg.save_processor:
+        if not self.cfg.save_index and self.cfg.skip_preconditioners:
             raise ValueError(
-                "At least one of save_index or save_processor must be True"
+                "Either save_index must be True or skip_preconditioners must be False"
             )
 
         build_gradient_dataset(self.cfg)
 
@@ -147,7 +147,7 @@ def worker(
             projection_type=cfg.projection_type,
             include_bias=cfg.include_bias,
         )
-        if rank == 0 and cfg.save_processor:
+        if rank == 0:
             processor.save(cfg.partial_run_path)
 
     if cfg.split_attention_modules:
@@ -171,7 +171,6 @@ def worker(
             target_modules=target_modules,
             attention_cfgs=attention_cfgs,
             save_index=cfg.save_index,
-            save_processor=cfg.save_processor,
             drop_columns=cfg.drop_columns,
             token_batch_size=cfg.token_batch_size,
             module_wise=cfg.module_wise,
@@ -199,7 +198,6 @@ def flush():
                 attention_cfgs=attention_cfgs,
                 save_index=cfg.save_index,
                 # Save a processor state checkpoint after each shard
-                save_processor=cfg.save_processor,
                 drop_columns=cfg.drop_columns,
                 token_batch_size=cfg.token_batch_size,
                 module_wise=cfg.module_wise,
@@ -213,7 +211,7 @@ def flush():
                 flush()
         flush()
 
-        if cfg.save_processor:
+        if rank == 0:
             processor.save(cfg.partial_run_path)
 
 
 
@@ -13,7 +13,7 @@
 from .data import create_index, pad_and_tensor
 from .gradients import AttentionConfig, GradientCollector, GradientProcessor
 from .peft import set_peft_enabled
-from .score_writer import ScoreWriter
+from .scorer import Scorer
 
 
 def collect_gradients(
@@ -29,9 +29,8 @@ def collect_gradients(
     target_modules: set[str] | None = None,
     attention_cfgs: dict[str, AttentionConfig] | None = None,
     save_index: bool = True,
-    save_processor: bool = True,
     drop_columns: bool = False,
-    score_writer: ScoreWriter | None = None,
+    scorer: Scorer | None = None,
     token_batch_size: int | None = None,
     module_wise: bool = False,
 ):
@@ -65,8 +64,8 @@ def callback(name: str, g: torch.Tensor, indices: list[int]):
         else:
             mod_grads[name] = g.to(dtype=dtype)
 
-        if score_writer and module_wise:
-            score_writer(indices, mod_grads, name=name)
+        if scorer and module_wise:
+            scorer(indices, mod_grads, name=name)
 
         # Compute the outer product of the flattened gradient
         if not skip_preconditioners:
@@ -161,11 +160,11 @@ def callback(name: str, g: torch.Tensor, indices: list[int]):
             for module_name in mod_grads.keys():
                 grad_buffer[module_name][indices] = mod_grads[module_name].numpy()
 
-        if score_writer is not None:
+        if scorer is not None:
             if module_wise:
-                score_writer.finalize_module_wise(indices)
+                scorer.finalize_module_wise(indices)
             else:
-                score_writer(indices, mod_grads)
+                scorer(indices, mod_grads)
 
         mod_grads.clear()
         per_doc_losses[indices] = losses.detach().type_as(per_doc_losses)
@@ -187,8 +186,7 @@ def callback(name: str, g: torch.Tensor, indices: list[int]):
         )
         data.save_to_disk(path / "data.hf")
 
-        if save_processor:
-            processor.save(path)
+        processor.save(path)
 
     # Make sure the gradients are written to disk
     if grad_buffer is not None:
 
@@ -118,9 +118,6 @@ class IndexConfig:
     save_index: bool = True
     """Whether to write the gradient index to disk."""
 
-    save_processor: bool = True
-    """Whether to write the gradient processor to disk."""
-
     data: DataConfig = field(default_factory=DataConfig)
     """Specification of the data on which to build the index."""