POST/api/v1/evals

Create and run an evaluation

Creates a new eval run and executes it synchronously against the specified model. The provider API key must be configured in Settings. Returns the full results including per-case scores and an overall summary.

Authentication

Send Authorization: Bearer YOUR_API_KEY on every request. Generate API keys at /dashboard/api-keys.

Request body required

Example

{
  "projectId": "00000000-0000-0000-0000-000000000000",
  "name": "qa-regression-v2",
  "model": "gpt-4o",
  "prompt": "Answer the question: {{input}}",
  "cases": [
    {
      "input": "<Test input>",
      "expectedOutput": "<Expected output for comparison>"
    }
  ],
  "scorers": [
    "exact-match",
    "faithfulness",
    "relevance"
  ]
}

Schema

{
  "application/json": {
    "schema": {
      "type": "object",
      "required": [
        "projectId",
        "name",
        "model",
        "prompt",
        "cases",
        "scorers"
      ],
      "properties": {
        "projectId": {
          "type": "string",
          "format": "uuid",
          "description": "Project ID"
        },
        "name": {
          "type": "string",
          "description": "Name for this eval run",
          "example": "qa-regression-v2"
        },
        "model": {
          "type": "string",
          "description": "Model identifier",
          "example": "gpt-4o"
        },
        "prompt": {
          "type": "string",
          "description": "Prompt template with {{input}} placeholder",
          "example": "Answer the question: {{input}}"
        },
        "cases": {
          "type": "array",
          "items": {
            "type": "object",
            "required": [
              "input"
            ],
            "properties": {
              "input": {
                "type": "string",
                "description": "Test input"
              },
              "expectedOutput": {
                "type": "string",
                "description": "Expected output for comparison"
              }
            }
          },
          "description": "Array of test cases"
        },
        "scorers": {
          "type": "array",
          "items": {
            "type": "string"
          },
          "description": "Scorer names to apply",
          "example": [
            "exact-match",
            "faithfulness",
            "relevance"
          ]
        }
      }
    }
  }
}

Response

201 example

{
  "success": true,
  "data": {
    "id": "00000000-0000-0000-0000-000000000000",
    "name": "string",
    "status": "passed",
    "model": "string",
    "score": 0,
    "results": [
      {
        "input": "string",
        "expectedOutput": "string",
        "actualOutput": "string",
        "score": 0,
        "passed": false,
        "latency": 0,
        "tokenUsage": {},
        "scorerResults": {}
      }
    ],
    "summary": {
      "totalCases": 0,
      "passedCases": 0,
      "failedCases": 0,
      "passRate": 0,
      "avgScore": 0,
      "totalLatency": 0
    }
  }
}

All status codes

201Eval run created and executed

400(no description)

429(no description)

Code samples

cURL

curl -X POST \
  https://evalguard.ai/api/v1/evals \
  -H "Authorization: Bearer $EVALGUARD_API_KEY" \
  -H "Content-Type: application/json" \
  -d '{ "projectId": "00000000-0000-0000-0000-000000000000", "name": "qa-regression-v2", "model": "gpt-4o", "prompt": "Answer the question: {{input}}", "cases": [ { "input": "<Test input>", "expectedOutput": "<Expected output for comparison>" } ], "scorers": [ "exact-match", "faithfulness", "relevance" ] }'

TypeScript

import { EvalGuard } from "@evalguard/sdk";

const client = new EvalGuard({ apiKey: process.env.EVALGUARD_API_KEY });

const response = await client.request({
  method: "POST",
  path: "/api/v1/evals",
  body: {
    "projectId": "00000000-0000-0000-0000-000000000000",
    "name": "qa-regression-v2",
    "model": "gpt-4o",
    "prompt": "Answer the question: {{input}}",
    "cases": [
      {
        "input": "<Test input>",
        "expectedOutput": "<Expected output for comparison>"
      }
    ],
    "scorers": [
      "exact-match",
      "faithfulness",
      "relevance"
    ]
  },
});
console.log(response);

Python

from evalguard import EvalGuard
import os

client = EvalGuard(api_key=os.environ["EVALGUARD_API_KEY"])

response = client.request(
    method="POST",
    path="/api/v1/evals",
    body={
    "projectId": "00000000-0000-0000-0000-000000000000",
    "name": "qa-regression-v2",
    "model": "gpt-4o",
    "prompt": "Answer the question: {{input}}",
    "cases": [
        {
            "input": "<Test input>",
            "expectedOutput": "<Expected output for comparison>"
        }
    ],
    "scorers": [
        "exact-match",
        "faithfulness",
        "relevance"
    ]
},
)
print(response)

package main

import (
	"context"
	"fmt"
	"os"

	"github.com/evalguard/evalguard-go"
)

func main() {
	client := evalguard.NewClient(os.Getenv("EVALGUARD_API_KEY"))
	resp, err := client.Request(context.Background(), "POST", "/api/v1/evals", map[string]any{"projectId": "00000000-0000-0000-0000-000000000000", "name": "qa-regression-v2", "model": "gpt-4o", "prompt": "Answer the question: {{input}}", "cases": []any{map[string]any{"input": "<Test input>", "expectedOutput": "<Expected output for comparison>"}}, "scorers": []any{"exact-match", "faithfulness", "relevance"}})
	if err != nil { panic(err) }
	fmt.Println(resp)
}

Errors

400429

Request body required

Response

Code samples

Errors

Other Evals endpoints