Skip to content

Commit d4c140f

Browse files
committed
Handle in stream errors
1 parent c061a18 commit d4c140f

File tree

1 file changed

+25
-0
lines changed

1 file changed

+25
-0
lines changed

asimov/services/inference_clients.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -604,6 +604,8 @@ async def _tool_chain_stream(
604604
"anthropic-beta": "prompt-caching-2024-07-31",
605605
},
606606
) as response:
607+
response.raise_for_status()
608+
607609
async for line in response.aiter_lines():
608610
if not line.startswith("data: "):
609611
continue
@@ -671,12 +673,35 @@ async def _tool_chain_stream(
671673
chunk_json["usage"]["output_tokens"]
672674
)
673675
break
676+
elif chunk_type == "error":
677+
if chunk_json["error"]["type"] == "overloaded_error":
678+
raise httpx.HTTPStatusError(
679+
message="Stream message sent overloaded!",
680+
request=httpx.Request(
681+
"GET", "https://dummy_request.com"
682+
),
683+
response=httpx.Response(status_code=529),
684+
)
685+
674686
return current_content
675687
except httpx.HTTPStatusError as e:
676688
if e.response.status_code == 429:
677689
print("429 backoff")
678690
await asyncio.sleep(3**retry)
679691
continue
692+
693+
if e.response.status_code == 529:
694+
print("529 overloaded")
695+
await asyncio.sleep(3**retry)
696+
continue
697+
698+
if (
699+
e.response.status_code == 400
700+
and b"prompt too long" in await e.response.aread()
701+
):
702+
print("Context limit reached, returning")
703+
return serialized_messages
704+
680705
raise
681706
except (httpx.RequestError, httpx.HTTPError) as e:
682707
if retry < 4: # Allow retrying on connection errors

0 commit comments

Comments
 (0)