add a function for detaching the neural mem state

lucidrains · lucidrains · commit f4444d4ad87e · 2025-03-06T14:53:38.000-08:00
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "titans-pytorch"
-version = "0.4.3"
+version = "0.4.5"
 description = "Titans"
 authors = [
     { name = "Phil Wang", email = "lucidrains@gmail.com" }
diff --git a/tests/test_titans.py b/tests/test_titans.py
@@ -405,3 +405,23 @@ def test_assoc_scan(
     assert second_half.shape == inputs2.shape
 
     assert torch.allclose(output[:, -1], second_half[:, -1], atol = 1e-5)
+
+def test_mem_state_detach():
+    from titans_pytorch.neural_memory import mem_state_detach
+
+    mem = NeuralMemory(
+        dim = 384,
+        chunk_size = 2,
+        qk_rmsnorm = True,
+        dim_head = 64,
+        heads = 4,
+    )
+
+    seq = torch.randn(4, 64, 384)
+
+    state = None
+
+    for _ in range(2):
+        parallel_retrieved, state = mem(seq, state = state)
+        state = mem_state_detach(state)
+        parallel_retrieved.sum().backward()
diff --git a/titans_pytorch/__init__.py b/titans_pytorch/__init__.py
@@ -1,5 +1,7 @@
 from titans_pytorch.neural_memory import (
     NeuralMemory,
+    NeuralMemState,
+    mem_state_detach
 )
 
 from titans_pytorch.memory_models import (
diff --git a/titans_pytorch/neural_memory.py b/titans_pytorch/neural_memory.py
@@ -7,10 +7,11 @@
 from collections import namedtuple
 
 import torch
-from torch import nn, stack, cat, tensor, Tensor
+from torch import nn, stack, cat, is_tensor, tensor, Tensor
 import torch.nn.functional as F
 from torch.nn import Linear, Module, Parameter, ParameterList, ParameterDict
 from torch.func import functional_call, vmap, grad
+from torch.utils._pytree import tree_map, tree_flatten, tree_unflatten
 
 from tensordict import TensorDict
 
@@ -40,6 +41,8 @@
 
 LinearNoBias = partial(Linear, bias = False)
 
+# neural mem state related
+
 NeuralMemState = namedtuple('NeuralMemState', [
     'seq_index',
     'weights',
@@ -48,6 +51,13 @@
     'updates',
 ])
 
+def mem_state_detach(
+    state: NeuralMemState
+):
+    assert isinstance(state, NeuralMemState)
+    state = tree_map(lambda t: t.detach() if is_tensor(t) else t, tuple(state))
+    return NeuralMemState(*state)
+
 # functions
 
 def exists(v):
@@ -854,6 +864,7 @@ def forward(
         seq,
         store_seq = None,
         state: NeuralMemState | None = None,
+        detach_mem_state = False,
         prev_weights = None,
         store_mask: Tensor | None = None,
         return_surprises = False
@@ -1003,6 +1014,11 @@ def accum_updates(past_updates, future_updates):
             updates
         )
 
+        # maybe detach
+
+        if detach_mem_state:
+            next_neural_mem_state = mem_state_detach(next_neural_mem_state)
+
         # returning
 
         if not return_surprises:

Original file line number	Diff line number	Diff line change
`@@ -1,5 +1,7 @@`
`1`	`1`	`from titans_pytorch.neural_memory import (`
`2`	`2`	`NeuralMemory,`
	`3`	`+ NeuralMemState,`
	`4`	`+ mem_state_detach`
`3`	`5`	`)`
`4`	`6`
`5`	`7`	`from titans_pytorch.memory_models import (`