Skip to content

Commit d5170ce

Browse files
committed
feat: add runtime payload support for evals
1 parent 71981f2 commit d5170ce

File tree

4 files changed

+28
-12
lines changed

4 files changed

+28
-12
lines changed

.changeset/some-streets-sell.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
---
2+
"@voltagent/evals": patch
3+
"@voltagent/core": patch
4+
---
5+
6+
feat: add runtime payload support for evals

packages/core/src/eval/runtime/runtime.ts

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
import { safeStringify } from "@voltagent/internal/utils";
22

3+
const RUNTIME_METADATA_KEY = "__runtime";
4+
35
export type SamplingPolicy =
46
| { type: "always" }
57
| { type: "never" }
@@ -199,6 +201,15 @@ export async function runLocalScorers<Payload extends Record<string, unknown>>(
199201

200202
const durationMs = Date.now() - start;
201203

204+
const runtimeSnapshot: Record<string, unknown> = {
205+
payload: cloneRecord(payload) ?? payload ?? null,
206+
params: cloneRecord(scorerParams) ?? scorerParams ?? {},
207+
};
208+
209+
metadata = mergeMetadata(metadata, {
210+
[RUNTIME_METADATA_KEY]: runtimeSnapshot,
211+
});
212+
202213
const execution: LocalScorerExecutionResult = {
203214
id: definition.id,
204215
name: definition.name,

packages/evals/src/voltops/run.ts

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -424,10 +424,18 @@ function inferTerminalStatus(summary: ExperimentSummary): CompleteEvalRunRequest
424424
const hasErrors = summary.errorCount > 0;
425425
const hasFailures = summary.failureCount > 0;
426426
const criteriaEvaluations = summary.criteria ?? [];
427-
const passedAllCriteria =
428-
criteriaEvaluations.length === 0 || criteriaEvaluations.every((entry) => entry.passed);
427+
const hasCriteria = criteriaEvaluations.length > 0;
428+
const passedAllCriteria = criteriaEvaluations.every((entry) => entry.passed);
429429

430-
if (hasErrors || hasFailures || !passedAllCriteria) {
430+
if (hasErrors) {
431+
return "failed";
432+
}
433+
434+
if (hasCriteria) {
435+
return passedAllCriteria ? "succeeded" : "failed";
436+
}
437+
438+
if (hasFailures) {
431439
return "failed";
432440
}
433441

website/docs/evals/live-evaluations.md

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -176,15 +176,6 @@ const customScorer = buildScorer({
176176
.build();
177177
```
178178

179-
**Lazy-loaded scorer:**
180-
181-
```ts
182-
scorer: async () => {
183-
const { createAnswerCorrectnessScorer } = await import("@voltagent/scorers");
184-
return createAnswerCorrectnessScorer();
185-
};
186-
```
187-
188179
#### `params`
189180

190181
Static or dynamic parameters passed to the scorer.

0 commit comments

Comments
 (0)