Timestep-AI
diff --git a/‎.github/workflows/ci-cd.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/ci-cd.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/api/main.py‎
Lines changed: 197 additions & 4 deletions b/‎src/api/main.py‎
Lines changed: 197 additions & 4 deletions
diff --git a/‎src/api/stores.py‎
Lines changed: 28 additions & 0 deletions b/‎src/api/stores.py‎
Lines changed: 28 additions & 0 deletions
diff --git a/‎supabase/config.toml.example‎
Lines changed: 1 addition & 1 deletion b/‎supabase/config.toml.example‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎supabase/functions/agent-chat-v2/chatkit/server.ts‎
Lines changed: 3 additions & 1 deletion b/‎supabase/functions/agent-chat-v2/chatkit/server.ts‎
Lines changed: 3 additions & 1 deletion
@@ -20,7 +20,7 @@ env:
   VITE_SUPABASE_URL: http://127.0.0.1:54321
   VITE_SUPABASE_ANON_KEY: sb_publishable_ACJWlzQHlZjBrEguHvfOxg_3BJgxAaH
   # Supabase edge function secrets
-  DEFAULT_AGENT_MODEL: ollama/gpt-oss:120b-cloud
+  DEFAULT_AGENT_MODEL: openai/gpt-4.1
   OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
   OLLAMA_API_KEY: ${{ secrets.OLLAMA_API_KEY }}
   HF_TOKEN: ${{ secrets.HF_TOKEN }}
 
@@ -12,8 +12,10 @@
 from openai import AsyncOpenAI
 from chatkit.agents import simple_to_agent_input, stream_agent_response, AgentContext, ClientToolCall
 from chatkit.server import StreamingResult, ChatKitServer
-from chatkit.types import ThreadMetadata, UserMessageItem, ThreadStreamEvent, UserMessageTextContent, ClientToolCallItem
+from chatkit.types import ThreadMetadata, UserMessageItem, ThreadStreamEvent, UserMessageTextContent, ClientToolCallItem, ThreadsAddClientToolOutputReq, StreamingReq, ThreadItemDoneEvent, ThreadItemAddedEvent, AssistantMessageItem
+from datetime import datetime
 from chatkit.store import Store, AttachmentStore
+from chatkit.server import DEFAULT_PAGE_SIZE
 from supabase import create_client, Client
 from .stores import ChatKitDataStore, ChatKitAttachmentStore, TContext
 
@@ -383,6 +385,61 @@ def __init__(
     ):
         super().__init__(data_store, attachment_store)
 
+    async def _process_streaming_impl(
+        self, request: StreamingReq, context: TContext
+    ) -> AsyncIterator[ThreadStreamEvent]:
+        # Override to fix threads.add_client_tool_output handler
+        # The library loads only 1 item, but we need to load more to find pending tool calls
+        # if an assistant message was saved after the tool call
+        
+        if isinstance(request, ThreadsAddClientToolOutputReq):
+            thread = await self.store.load_thread(
+                request.params.thread_id, context=context
+            )
+            # Load DEFAULT_PAGE_SIZE items instead of just 1 to find pending tool calls
+            items = await self.store.load_thread_items(
+                thread.id, None, DEFAULT_PAGE_SIZE, "desc", context
+            )
+            logger.info(f"[_process_streaming_impl] Loaded {len(items.data)} items for thread {thread.id}")
+            logger.info(f"[_process_streaming_impl] Item types: {[item.type if hasattr(item, 'type') else type(item).__name__ for item in items.data]}")
+            tool_call = next(
+                (
+                    item
+                    for item in items.data
+                    if isinstance(item, ClientToolCallItem)
+                    and item.status == "pending"
+                ),
+                None,
+            )
+            if not tool_call:
+                logger.error(f"[_process_streaming_impl] No pending ClientToolCallItem found in {len(items.data)} items")
+                logger.error(f"[_process_streaming_impl] Items: {items.data}")
+                raise ValueError(
+                    f"Last thread item in {thread.id} was not a ClientToolCallItem"
+                )
+
+            tool_call.output = request.params.result
+            tool_call.status = "completed"
+
+            await self.store.save_item(thread.id, tool_call, context=context)
+
+            # Safety against dangling pending tool calls if there are
+            # multiple in a row, which should be impossible, and
+            # integrations should ultimately filter out pending tool calls
+            # when creating input response messages.
+            await self._cleanup_pending_client_tool_call(thread, context)
+
+            async for event in self._process_events(
+                thread,
+                context,
+                lambda: self.respond(thread, None, context),
+            ):
+                yield event
+        else:
+            # For all other cases, use the parent's implementation
+            async for event in super()._process_streaming_impl(request, context):
+                yield event
+
     async def respond(
         self,
         thread: ThreadMetadata,
@@ -494,9 +551,55 @@ def sanitize_item(item):
                         return sanitized
 
                     # For regular messages, keep only role and content
+                    # But ensure content is properly formatted for the Agents SDK
+                    # For agent inputs, assistant messages should use input_text content (not output_text)
+                    role = item.get("role")
+                    content = item.get("content", [])
+                    
+                    if isinstance(content, list):
+                        # Convert content items to the format Agents SDK expects
+                        formatted_content = []
+                        for c in content:
+                            if isinstance(c, dict):
+                                # Convert output_text to input_text for assistant messages (agent inputs use input_text)
+                                content_type = c.get("type")
+                                if content_type == "output_text":
+                                    # Convert output_text to input_text for agent inputs
+                                    formatted_content.append({
+                                        "type": "input_text",
+                                        "text": c.get("text", ""),
+                                    })
+                                elif content_type == "input_text":
+                                    # Already correct format
+                                    formatted_content.append({
+                                        "type": "input_text",
+                                        "text": c.get("text", ""),
+                                    })
+                                elif "text" in c:
+                                    # Unknown type but has text, convert to input_text
+                                    formatted_content.append({
+                                        "type": "input_text",
+                                        "text": c.get("text", ""),
+                                    })
+                                else:
+                                    # Unknown format, try to preserve it
+                                    formatted_content.append(c)
+                            elif isinstance(c, str):
+                                # Plain string, wrap in input_text
+                                formatted_content.append({
+                                    "type": "input_text",
+                                    "text": c,
+                                })
+                            else:
+                                formatted_content.append(c)
+                        content = formatted_content
+                    elif isinstance(content, str):
+                        # Plain string, wrap in input_text array
+                        content = [{"type": "input_text", "text": content}]
+                    
                     return {
-                        "role": item.get("role"),
-                        "content": item.get("content"),
+                        "role": role,
+                        "content": content,
                     }
                 return item
 
@@ -598,7 +701,97 @@ def sanitize_item(item):
         )
         logger.info(f"[python-respond] Runner.run_streamed returned, result type: {type(result)}")
 
-        async for event in stream_agent_response(agent_context, result):
+        # Wrap stream_agent_response to fix __fake_id__ in ThreadItemAddedEvent and ThreadItemDoneEvent items
+        # CRITICAL: If items are saved with __fake_id__, they will overwrite each other due to PRIMARY KEY constraint
+        # CRITICAL: Both thread.item.added and thread.item.done must have the SAME ID so the frontend recognizes them as the same item
+        # This ensures ChatKit items have proper IDs (defense-in-depth - add_thread_item also fixes IDs)
+        async def fix_chatkit_event_ids(events):
+            event_count = 0
+            # Track IDs we've generated for items, so thread.item.added and thread.item.done use the same ID
+            item_id_map: dict[str, str] = {}  # Maps original __fake_id__ to generated ID
+            
+            async for event in events:
+                event_count += 1
+                event_type = event.type if hasattr(event, 'type') else type(event).__name__
+                logger.info(f"[python-respond] Event #{event_count}: {event_type}")
+                
+                # Fix __fake_id__ in ThreadItemAddedEvent items
+                if isinstance(event, ThreadItemAddedEvent) and hasattr(event, 'item'):
+                    item = event.item
+                    original_id = item.id if hasattr(item, 'id') else 'N/A'
+                    item_type = item.type if hasattr(item, 'type') else type(item).__name__
+                    content_preview = ""
+                    content_length = 0
+                    if isinstance(item, AssistantMessageItem) and item.content:
+                        # Get first 50 chars of content for logging
+                        first_content = item.content[0] if item.content else None
+                        if first_content and hasattr(first_content, 'text'):
+                            content_length = len(first_content.text)
+                            content_preview = first_content.text[:50] + "..." if len(first_content.text) > 50 else first_content.text
+                    logger.info(f"[python-respond] ThreadItemAddedEvent: type={item_type}, id={original_id}, content_length={content_length}, content_preview={content_preview}")
+                    
+                    if hasattr(item, 'id') and (item.id == '__fake_id__' or not item.id or item.id == 'N/A'):
+                        # Check if we've already generated an ID for this item (from a previous event)
+                        if original_id in item_id_map:
+                            item.id = item_id_map[original_id]
+                            logger.info(f"[python-respond] Reusing ID for ThreadItemAddedEvent: {original_id} -> {item.id}")
+                        else:
+                            logger.error(f"[python-respond] CRITICAL: Fixing __fake_id__ for {type(item).__name__} in ThreadItemAddedEvent (original_id={original_id})")
+                            thread_meta = ThreadMetadata(id=thread.id, created_at=datetime.now())
+                            if isinstance(item, ClientToolCallItem):
+                                item_type_for_id = "tool_call"
+                            elif isinstance(item, AssistantMessageItem):
+                                item_type_for_id = "message"
+                            elif isinstance(item, UserMessageItem):
+                                item_type_for_id = "message"
+                            else:
+                                item_type_for_id = "message"
+                            item.id = self.store.generate_item_id(item_type_for_id, thread_meta, context)
+                            item_id_map[original_id] = item.id
+                            logger.info(f"[python-respond] Fixed ID in ThreadItemAddedEvent: {original_id} -> {item.id}")
+                    else:
+                        logger.info(f"[python-respond] Item {type(item).__name__} already has valid ID: {original_id}")
+                
+                # Fix __fake_id__ in ThreadItemDoneEvent items before they're saved
+                if isinstance(event, ThreadItemDoneEvent) and hasattr(event, 'item'):
+                    item = event.item
+                    original_id = item.id if hasattr(item, 'id') else 'N/A'
+                    item_type = item.type if hasattr(item, 'type') else type(item).__name__
+                    content_preview = ""
+                    content_length = 0
+                    if isinstance(item, AssistantMessageItem) and item.content:
+                        # Get first 50 chars of content for logging
+                        first_content = item.content[0] if item.content else None
+                        if first_content and hasattr(first_content, 'text'):
+                            content_length = len(first_content.text)
+                            content_preview = first_content.text[:50] + "..." if len(first_content.text) > 50 else first_content.text
+                    logger.info(f"[python-respond] ThreadItemDoneEvent: type={item_type}, id={original_id}, content_length={content_length}, content_preview={content_preview}")
+                    
+                    if hasattr(item, 'id') and (item.id == '__fake_id__' or not item.id or item.id == 'N/A'):
+                        # Check if we've already generated an ID for this item (from thread.item.added)
+                        if original_id in item_id_map:
+                            item.id = item_id_map[original_id]
+                            logger.info(f"[python-respond] Reusing ID for ThreadItemDoneEvent: {original_id} -> {item.id}")
+                        else:
+                            logger.error(f"[python-respond] CRITICAL: Fixing __fake_id__ for {type(item).__name__} in ThreadItemDoneEvent (original_id={original_id})")
+                            thread_meta = ThreadMetadata(id=thread.id, created_at=datetime.now())
+                            if isinstance(item, ClientToolCallItem):
+                                item_type_for_id = "tool_call"
+                            elif isinstance(item, AssistantMessageItem):
+                                item_type_for_id = "message"
+                            elif isinstance(item, UserMessageItem):
+                                item_type_for_id = "message"
+                            else:
+                                item_type_for_id = "message"
+                            item.id = self.store.generate_item_id(item_type_for_id, thread_meta, context)
+                            item_id_map[original_id] = item.id
+                            logger.info(f"[python-respond] Fixed ID in ThreadItemDoneEvent: {original_id} -> {item.id}")
+                    else:
+                        logger.info(f"[python-respond] Item {type(item).__name__} already has valid ID: {original_id}")
+                yield event
+        
+        # Stream events with fixed IDs
+        async for event in fix_chatkit_event_ids(stream_agent_response(agent_context, result)):
             yield event
 
 server = MyChatKitServer(data_store, attachment_store)
 
@@ -2,6 +2,7 @@
 from typing import Any
 from datetime import datetime
 import os
+import logging
 
 from fastapi import HTTPException
 from chatkit.store import Store, AttachmentStore
@@ -18,6 +19,8 @@
 )
 from openai import AsyncOpenAI
 
+logger = logging.getLogger(__name__)
+
 
 class TContext(dict):
     """Request-scoped context passed through ChatKit and Store.
@@ -91,6 +94,31 @@ async def add_thread_item(
         if not context.user_id:
             raise HTTPException(status_code=400, detail="Missing user_id")
 
+        item_id = item.id if hasattr(item, 'id') else 'N/A'
+        logger.info(f"[add_thread_item] Adding item to thread {thread_id}: type={item.type if hasattr(item, 'type') else type(item).__name__}, id={item_id}")
+        
+        # CRITICAL: Check if ID is invalid for any item type
+        # If items are saved with __fake_id__, they will overwrite each other due to PRIMARY KEY constraint
+        if item_id == '__fake_id__' or not item_id or item_id == 'N/A':
+            logger.error(f"[add_thread_item] WARNING: Item has invalid ID: {item_id}, type={item.type if hasattr(item, 'type') else type(item).__name__}")
+            # Generate a proper ID if missing
+            # Create a minimal ThreadMetadata for generate_item_id
+            thread_meta = ThreadMetadata(id=thread_id, created_at=datetime.now())
+            # Determine item type for ID generation
+            if isinstance(item, ClientToolCallItem):
+                item_type_for_id = "tool_call"
+                logger.info(f"[add_thread_item] ClientToolCallItem: status={item.status}, name={item.name}, call_id={item.call_id}")
+            elif isinstance(item, AssistantMessageItem):
+                item_type_for_id = "message"
+            elif isinstance(item, UserMessageItem):
+                item_type_for_id = "message"
+            else:
+                item_type_for_id = "message"  # Default fallback
+            item.id = self.generate_item_id(item_type_for_id, thread_meta, context)
+            logger.info(f"[add_thread_item] Generated new ID for item: {item.id}")
+        elif isinstance(item, ClientToolCallItem):
+            logger.info(f"[add_thread_item] ClientToolCallItem: status={item.status}, name={item.name}, call_id={item.call_id}, id={item_id}")
+
         import httpx
 
         client = self._get_client(context)
 
@@ -6,7 +6,7 @@ enabled = true
 [edge_runtime.secrets]
 ANTHROPIC_API_KEY = "your-anthropic-api-key"
 # Default model for agents
-DEFAULT_AGENT_MODEL = "ollama/gpt-oss:120b-cloud"
+DEFAULT_AGENT_MODEL = "openai/gpt-4.1"
 # Add your Hugging Face token here for AI features
 HF_TOKEN = "your-huggingface-token"
 OLLAMA_API_KEY = "your-ollama-cloud-api-key"
 
@@ -444,7 +444,9 @@ export class ChatKitServer<TCtx = TContext> {
         const thread = await this.store.load_thread(req.params.thread_id, context);
         // Load recent items to find the pending client_tool_call
         // Match Python: items = await self.store.load_thread_items(thread.id, None, 1, "desc", context)
-        const items = await this.store.load_thread_items(thread.id, null, 1, 'desc', context);
+        // BUT: We need to load more items to find the pending tool call if an assistant message was saved after it
+        // Load DEFAULT_PAGE_SIZE items to find the pending tool call
+        const items = await this.store.load_thread_items(thread.id, null, DEFAULT_PAGE_SIZE, 'desc', context);
         // Match Python: tool_call = next((item for item in items.data if isinstance(item, ClientToolCallItem) and item.status == "pending"), None)
         const toolCall = items.data.find((item: ThreadItem) => {
           const typedItem = item as { type: string; status?: string };