Skip to content

Batched Chat Completions

Source https://github.com/vllm-project/vllm/blob/main/examples/online_serving/batched_chat_completions.py.

# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
"""Examples of batched chat completions via the vLLM OpenAI-compatible API.

The /v1/chat/completions/batch endpoint accepts ``messages`` as a list of
conversations.  Each conversation is processed independently and the response
contains one choice per conversation, indexed 0, 1, ..., N-1.

Start a server first, e.g.:
    vllm serve Qwen/Qwen2.5-1.5B-Instruct --port 8000

Current limitations compared to /v1/chat/completions:
    - Streaming is not supported.
    - Tool use is not supported.
    - Beam search is not supported.
"""

import json
import os

import httpx

BASE_URL = os.environ.get("VLLM_BASE_URL", "http://localhost:8000")
MODEL = os.environ.get("VLLM_MODEL", "Qwen/Qwen2.5-1.5B-Instruct")
BATCH_URL = f"{BASE_URL}/v1/chat/completions/batch"


def post_batch(payload: dict) -> dict:
    response = httpx.post(BATCH_URL, json=payload, timeout=60)
    response.raise_for_status()
    return response.json()


def main() -> None:
    print("=== Example 1a: single conversation (standard endpoint) ===")
    response = httpx.post(
        f"{BASE_URL}/v1/chat/completions",
        json={
            "model": MODEL,
            "messages": [{"role": "user", "content": "What is the capital of Japan?"}],
        },
        timeout=60,
    )
    response.raise_for_status()
    data = response.json()
    for choice in data["choices"]:
        print(f"  [{choice['index']}] {choice['message']['content']}")

    print("\n=== Example 1b: batched plain text (2 conversations) ===")
    data = post_batch(
        {
            "model": MODEL,
            "messages": [
                [{"role": "user", "content": "What is the capital of France?"}],
                [{"role": "user", "content": "What is the capital of Japan?"}],
            ],
        }
    )
    for choice in data["choices"]:
        print(f"  [{choice['index']}] {choice['message']['content']}")

    print("\n=== Example 2: batch with regex constraint (yes|no) ===")
    data = post_batch(
        {
            "model": MODEL,
            "messages": [
                [{"role": "user", "content": "Is the sky blue? Answer yes or no."}],
                [{"role": "user", "content": "Is fire cold? Answer yes or no."}],
            ],
            "structured_outputs": {"regex": "(yes|no)"},
        }
    )
    for choice in data["choices"]:
        print(f"  [{choice['index']}] {choice['message']['content']}")

    print("\n=== Example 3: batch with json_schema ===")
    person_schema = {
        "type": "object",
        "properties": {
            "name": {"type": "string", "description": "Full name of the person"},
            "age": {"type": "integer", "description": "Age in years"},
        },
        "required": ["name", "age"],
    }
    data = post_batch(
        {
            "model": MODEL,
            "messages": [
                [
                    {
                        "role": "user",
                        "content": "Describe the person: name Alice, age 30.",
                    }
                ],
                [{"role": "user", "content": "Describe the person: name Bob, age 25."}],
            ],
            "response_format": {
                "type": "json_schema",
                "json_schema": {
                    "name": "person",
                    "strict": True,
                    "schema": person_schema,
                },
            },
        }
    )
    for choice in data["choices"]:
        person = json.loads(choice["message"]["content"])
        print(f"  [{choice['index']}] {person}")

    print("\n=== Example 4: batch book summaries ===")
    book_schema = {
        "type": "object",
        "properties": {
            "author": {
                "type": "string",
                "description": "Full name of the author",
            },
            "num_pages": {
                "type": "integer",
                "description": "Number of pages in the book",
            },
            "short_summary": {
                "type": "string",
                "description": "A one-sentence summary of the book",
            },
            "long_summary": {
                "type": "string",
                "description": (
                    "A detailed two to three sentence summary covering "
                    "the main themes and plot"
                ),
            },
        },
        "required": ["author", "num_pages", "short_summary", "long_summary"],
    }
    system_msg = {
        "role": "system",
        "content": (
            "You are a literary analyst. Extract structured information "
            "from book descriptions."
        ),
    }
    data = post_batch(
        {
            "model": MODEL,
            "messages": [
                [
                    system_msg,
                    {
                        "role": "user",
                        "content": (
                            "Extract information from this book: '1984' by George"
                            " Orwell, published in 1949, 328 pages. A dystopian"
                            " novel set in a totalitarian society ruled by Big"
                            " Brother, following Winston Smith as he secretly"
                            " rebels against the oppressive Party that surveils"
                            " and controls every aspect of life."
                        ),
                    },
                ],
                [
                    system_msg,
                    {
                        "role": "user",
                        "content": (
                            "Extract information from this book: 'The Hitchhiker's"
                            " Guide to the Galaxy' by Douglas Adams, published in"
                            " 1979, 193 pages. A comedic science fiction novel"
                            " following Arthur Dent, an ordinary Englishman who is"
                            " whisked off Earth moments before it is demolished to"
                            " make way for a hyperspace bypass, and his subsequent"
                            " absurd adventures across the universe."
                        ),
                    },
                ],
            ],
            "response_format": {
                "type": "json_schema",
                "json_schema": {
                    "name": "book_summary",
                    "strict": True,
                    "schema": book_schema,
                },
            },
        }
    )
    for choice in data["choices"]:
        book = json.loads(choice["message"]["content"])
        print(f"  [{choice['index']}] {book}")


if __name__ == "__main__":
    main()