Change system messages to optional

iantbutler01 · iantbutler01 · commit efde2a769bed · 2024-10-23T21:28:18.000Z
diff --git a/asimov/services/inference_clients.py b/asimov/services/inference_clients.py
@@ -48,10 +48,6 @@ class AnthropicMessage:
     content: List[AnthropicMessageContent]
 
 
-class ModelFamily(Enum):
-    Anthropic = "Anthropic"
-
-
 class InferenceClient(ABC):
     @abstractmethod
     async def connect_and_listen(
@@ -67,10 +63,10 @@ async def get_generation(
 
 
 class BedrockInferenceClient(InferenceClient):
-    def __init__(self, model: str):
+    def __init__(self, model: str, region_name="us-east-1"):
         self.model = model
+        self.region_name = region_name
         self.session = aioboto3.Session()
-        self.model_family = ModelFamily.Anthropic
         self.anthropic_version = "bedrock-2023-05-31"
 
     async def get_generation(
@@ -99,7 +95,7 @@ async def get_generation(
 
         async with self.session.client(
             service_name="bedrock-runtime",
-            region_name="us-east-1",
+            region_name=self.region_name,
         ) as client:
             response = await client.invoke_model(
                 body=json.dumps(body.__dict__),
@@ -179,16 +175,22 @@ def __init__(
     async def get_generation(
         self, messages: List[ChatMessage], max_tokens=4096, top_p=0.5, temperature=0.5
     ):
+        system = None
+        if messages[0]["role"] == "system":
+            system = {
+                "system": [
+                    {"type": "text", "text": messages[0]["content"]}
+                    | (
+                        {"cache_control": {"type": "ephemeral"}}
+                        if messages[0].get("cache_marker")
+                        else {}
+                    )
+                ]
+            }
+            messages = messages[1:]
+
         request = {
             "model": self.model,
-            "system": [
-                {"type": "text", "text": messages[0]["content"]}
-                | (
-                    {"cache_control": {"type": "ephemeral"}}
-                    if messages[0].get("cache_marker")
-                    else {}
-                )
-            ],
             "top_p": top_p,
             "temperature": temperature,
             "max_tokens": max_tokens,
@@ -199,10 +201,14 @@ async def get_generation(
                     if msg.get("cache_marker")
                     else {}
                 )
-                for msg in messages[1:]
+                for msg in messages
             ],
             "stream": False,
         }
+
+        if system:
+            request.update(system)
+
         async with httpx.AsyncClient() as client:
             response = await client.post(
                 self.api_url,
@@ -223,16 +229,23 @@ async def get_generation(
     async def connect_and_listen(
         self, messages: List[ChatMessage], max_tokens=4096, top_p=0.5, temperature=0.5
     ):
+
+        system = None
+        if messages[0]["role"] == "system":
+            system = {
+                "system": [
+                    {"type": "text", "text": messages[0]["content"]}
+                    | (
+                        {"cache_control": {"type": "ephemeral"}}
+                        if messages[0].get("cache_marker")
+                        else {}
+                    )
+                ]
+            }
+            messages = messages[1:]
+
         request = {
             "model": self.model,
-            "system": [
-                {"type": "text", "text": messages[0]["content"]}
-                | (
-                    {"cache_control": {"type": "ephemeral"}}
-                    if messages[0].get("cache_marker")
-                    else {}
-                )
-            ],
             "top_p": top_p,
             "temperature": temperature,
             "max_tokens": max_tokens,
@@ -248,11 +261,14 @@ async def connect_and_listen(
                         )
                     ],
                 }
-                for msg in messages[1:]
+                for msg in messages
             ],
             "stream": True,
         }
 
+        if system:
+            request.update(system)
+
         async with httpx.AsyncClient() as client:
             async with client.stream(
                 "POST",
@@ -272,9 +288,6 @@ async def connect_and_listen(
                     if response.status_code != 200:
                         message_logs = [{"role": msg["role"]} for msg in messages[1:]]
 
-                        print(line)
-                        pprint(message_logs)
-
                     if line.startswith("data: "):
                         data = json.loads(line[6:])
                         chunk_type = data["type"]