Skip to content

Completion

Completion

any_llm.completion(model, messages, *, tools=None, tool_choice=None, max_turns=None, temperature=None, top_p=None, max_tokens=None, response_format=None, stream=None, n=None, stop=None, presence_penalty=None, frequency_penalty=None, seed=None, api_key=None, api_base=None, timeout=None, user=None, **kwargs)

Create a chat completion.

Parameters:

Name Type Description Default
model str

Model identifier in format 'provider/model' (e.g., 'mistral/mistral-small')

required
messages list[dict[str, Any]]

List of messages for the conversation

required
tools Optional[List[Union[dict[str, Any], Callable[..., Any]]]]

List of tools for tool calling. Can be Python callables or OpenAI tool format dicts

None
tool_choice Optional[Union[str, dict[str, Any]]]

Controls which tools the model can call

None
max_turns Optional[int]

Maximum number of tool execution turns

None
temperature Optional[float]

Controls randomness in the response (0.0 to 2.0)

None
top_p Optional[float]

Controls diversity via nucleus sampling (0.0 to 1.0)

None
max_tokens Optional[int]

Maximum number of tokens to generate

None
response_format dict[str, Any] | type[BaseModel] | None

Format specification for the response

None
stream Optional[bool]

Whether to stream the response

None
n Optional[int]

Number of completions to generate

None
stop Optional[Union[str, List[str]]]

Stop sequences for generation

None
presence_penalty Optional[float]

Penalize new tokens based on presence in text

None
frequency_penalty Optional[float]

Penalize new tokens based on frequency in text

None
seed Optional[int]

Random seed for reproducible results

None
api_key Optional[str]

API key for the provider

None
api_base Optional[str]

Base URL for the provider API

None
timeout Optional[Union[float, int]]

Request timeout in seconds

None
user Optional[str]

Unique identifier for the end user

None
**kwargs Any

Additional provider-specific parameters

{}

Returns:

Type Description
ChatCompletion | Stream[ChatCompletionChunk]

The completion response from the provider

Source code in src/any_llm/api.py
def completion(
    model: str,
    messages: list[dict[str, Any]],
    *,
    tools: Optional[List[Union[dict[str, Any], Callable[..., Any]]]] = None,
    tool_choice: Optional[Union[str, dict[str, Any]]] = None,
    max_turns: Optional[int] = None,
    temperature: Optional[float] = None,
    top_p: Optional[float] = None,
    max_tokens: Optional[int] = None,
    response_format: dict[str, Any] | type[BaseModel] | None = None,
    stream: Optional[bool] = None,
    n: Optional[int] = None,
    stop: Optional[Union[str, List[str]]] = None,
    presence_penalty: Optional[float] = None,
    frequency_penalty: Optional[float] = None,
    seed: Optional[int] = None,
    api_key: Optional[str] = None,
    api_base: Optional[str] = None,
    timeout: Optional[Union[float, int]] = None,
    user: Optional[str] = None,
    **kwargs: Any,
) -> ChatCompletion | Stream[ChatCompletionChunk]:
    """Create a chat completion.

    Args:
        model: Model identifier in format 'provider/model' (e.g., 'mistral/mistral-small')
        messages: List of messages for the conversation
        tools: List of tools for tool calling. Can be Python callables or OpenAI tool format dicts
        tool_choice: Controls which tools the model can call
        max_turns: Maximum number of tool execution turns
        temperature: Controls randomness in the response (0.0 to 2.0)
        top_p: Controls diversity via nucleus sampling (0.0 to 1.0)
        max_tokens: Maximum number of tokens to generate
        response_format: Format specification for the response
        stream: Whether to stream the response
        n: Number of completions to generate
        stop: Stop sequences for generation
        presence_penalty: Penalize new tokens based on presence in text
        frequency_penalty: Penalize new tokens based on frequency in text
        seed: Random seed for reproducible results
        api_key: API key for the provider
        api_base: Base URL for the provider API
        timeout: Request timeout in seconds
        user: Unique identifier for the end user
        **kwargs: Additional provider-specific parameters

    Returns:
        The completion response from the provider

    """
    provider, model_name, completion_kwargs = _prepare_completion_request(
        model,
        messages,
        tools=tools,
        tool_choice=tool_choice,
        max_turns=max_turns,
        temperature=temperature,
        top_p=top_p,
        max_tokens=max_tokens,
        response_format=response_format,
        stream=stream,
        n=n,
        stop=stop,
        presence_penalty=presence_penalty,
        frequency_penalty=frequency_penalty,
        seed=seed,
        api_key=api_key,
        api_base=api_base,
        timeout=timeout,
        user=user,
        **kwargs,
    )

    return provider.completion(model_name, messages, **completion_kwargs)

any_llm.acompletion(model, messages, *, tools=None, tool_choice=None, max_turns=None, temperature=None, top_p=None, max_tokens=None, response_format=None, stream=None, n=None, stop=None, presence_penalty=None, frequency_penalty=None, seed=None, api_key=None, api_base=None, timeout=None, user=None, **kwargs) async

Create a chat completion asynchronously.

Parameters:

Name Type Description Default
model str

Model identifier in format 'provider/model' (e.g., 'mistral/mistral-small')

required
messages list[dict[str, Any]]

List of messages for the conversation

required
tools Optional[List[Union[dict[str, Any], Callable[..., Any]]]]

List of tools for tool calling. Can be Python callables or OpenAI tool format dicts

None
tool_choice Optional[Union[str, dict[str, Any]]]

Controls which tools the model can call

None
max_turns Optional[int]

Maximum number of tool execution turns

None
temperature Optional[float]

Controls randomness in the response (0.0 to 2.0)

None
top_p Optional[float]

Controls diversity via nucleus sampling (0.0 to 1.0)

None
max_tokens Optional[int]

Maximum number of tokens to generate

None
response_format dict[str, Any] | type[BaseModel] | None

Format specification for the response

None
stream Optional[bool]

Whether to stream the response

None
n Optional[int]

Number of completions to generate

None
stop Optional[Union[str, List[str]]]

Stop sequences for generation

None
presence_penalty Optional[float]

Penalize new tokens based on presence in text

None
frequency_penalty Optional[float]

Penalize new tokens based on frequency in text

None
seed Optional[int]

Random seed for reproducible results

None
api_key Optional[str]

API key for the provider

None
api_base Optional[str]

Base URL for the provider API

None
timeout Optional[Union[float, int]]

Request timeout in seconds

None
user Optional[str]

Unique identifier for the end user

None
**kwargs Any

Additional provider-specific parameters

{}

Returns:

Type Description
ChatCompletion | Stream[ChatCompletionChunk]

The completion response from the provider

Source code in src/any_llm/api.py
async def acompletion(
    model: str,
    messages: list[dict[str, Any]],
    *,
    tools: Optional[List[Union[dict[str, Any], Callable[..., Any]]]] = None,
    tool_choice: Optional[Union[str, dict[str, Any]]] = None,
    max_turns: Optional[int] = None,
    temperature: Optional[float] = None,
    top_p: Optional[float] = None,
    max_tokens: Optional[int] = None,
    response_format: dict[str, Any] | type[BaseModel] | None = None,
    stream: Optional[bool] = None,
    n: Optional[int] = None,
    stop: Optional[Union[str, List[str]]] = None,
    presence_penalty: Optional[float] = None,
    frequency_penalty: Optional[float] = None,
    seed: Optional[int] = None,
    api_key: Optional[str] = None,
    api_base: Optional[str] = None,
    timeout: Optional[Union[float, int]] = None,
    user: Optional[str] = None,
    **kwargs: Any,
) -> ChatCompletion | Stream[ChatCompletionChunk]:
    """Create a chat completion asynchronously.

    Args:
        model: Model identifier in format 'provider/model' (e.g., 'mistral/mistral-small')
        messages: List of messages for the conversation
        tools: List of tools for tool calling. Can be Python callables or OpenAI tool format dicts
        tool_choice: Controls which tools the model can call
        max_turns: Maximum number of tool execution turns
        temperature: Controls randomness in the response (0.0 to 2.0)
        top_p: Controls diversity via nucleus sampling (0.0 to 1.0)
        max_tokens: Maximum number of tokens to generate
        response_format: Format specification for the response
        stream: Whether to stream the response
        n: Number of completions to generate
        stop: Stop sequences for generation
        presence_penalty: Penalize new tokens based on presence in text
        frequency_penalty: Penalize new tokens based on frequency in text
        seed: Random seed for reproducible results
        api_key: API key for the provider
        api_base: Base URL for the provider API
        timeout: Request timeout in seconds
        user: Unique identifier for the end user
        **kwargs: Additional provider-specific parameters

    Returns:
        The completion response from the provider

    """
    provider, model_name, completion_kwargs = _prepare_completion_request(
        model,
        messages,
        tools=tools,
        tool_choice=tool_choice,
        max_turns=max_turns,
        temperature=temperature,
        top_p=top_p,
        max_tokens=max_tokens,
        response_format=response_format,
        stream=stream,
        n=n,
        stop=stop,
        presence_penalty=presence_penalty,
        frequency_penalty=frequency_penalty,
        seed=seed,
        api_key=api_key,
        api_base=api_base,
        timeout=timeout,
        user=user,
        **kwargs,
    )

    return await provider.acompletion(model_name, messages, **completion_kwargs)