6767 ],
6868 "max_tokens" : 150 , # Reduced from 500
6969 "temperature" : 0.1 ,
70- "seed" : 0 ,
70+ # "seed": 0,
7171 },
7272 payload_completions = {
7373 "model" : "deepseek-ai/DeepSeek-R1-Distill-Llama-8B" ,
7474 "prompt" : text_prompt ,
7575 "max_tokens" : 150 ,
7676 "temperature" : 0.1 ,
77- "seed" : 0 ,
77+ # "seed": 0,
7878 },
7979 repeat_count = 10 ,
8080 expected_log = [],
159159 "multimodal_agg" : (
160160 DeploymentGraph (
161161 module = "graphs.agg:Frontend" ,
162- config = "configs/agg.yaml" ,
162+ config = "configs/agg-llava .yaml" ,
163163 directory = "/workspace/examples/multimodal" ,
164164 endpoints = ["v1/chat/completions" ],
165165 response_handlers = [
@@ -257,12 +257,22 @@ def __init__(self, graph: DeploymentGraph, request, port=8000, timeout=900):
257257 if graph .config :
258258 command .extend (["-f" , os .path .join (graph .directory , graph .config )])
259259
260- command .extend (["--Frontend.port" , str (port )])
261-
262- health_check_urls = [(f"http://localhost:{ port } /v1/models" , self ._check_model )]
263-
260+ # Handle multimodal deployments differently
264261 if "multimodal" in graph .directory :
262+ # Set DYNAMO_PORT environment variable for multimodal
263+ env = os .environ .copy ()
264+ env ["DYNAMO_PORT" ] = str (port )
265265 health_check_urls = []
266+ # Don't add health check on port since multimodal uses DYNAMO_PORT
267+ health_check_ports = []
268+ else :
269+ # Regular LLM deployments
270+ command .extend (["--Frontend.port" , str (port )])
271+ health_check_urls = [
272+ (f"http://localhost:{ port } /v1/models" , self ._check_model )
273+ ]
274+ health_check_ports = [port ]
275+ env = None
266276
267277 self .port = port
268278
@@ -271,11 +281,12 @@ def __init__(self, graph: DeploymentGraph, request, port=8000, timeout=900):
271281 timeout = timeout ,
272282 display_output = True ,
273283 working_dir = graph .directory ,
274- health_check_ports = [ port ] ,
284+ health_check_ports = health_check_ports ,
275285 health_check_urls = health_check_urls ,
276286 delayed_start = graph .delayed_start ,
277287 stragglers = ["http" ],
278288 log_dir = request .node .name ,
289+ env = env , # Pass the environment variables
279290 )
280291
281292 def _check_model (self , response ):
0 commit comments