Skip to content

AnyLLM

AnyLLM

any_llm.AnyLLM

Bases: ABC

Provider for the LLM.

Source code in src/any_llm/any_llm.py
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
class AnyLLM(ABC):
    """Provider for the LLM."""

    # === Provider-specific configuration (to be overridden by subclasses) ===
    PROVIDER_NAME: str
    """Must match the name of the provider directory  (case sensitive)"""

    PROVIDER_DOCUMENTATION_URL: str
    """Link to the provider's documentation"""

    ENV_API_KEY_NAME: str
    """Environment variable name for the API key"""

    # === Feature support flags (to be set by subclasses) ===
    SUPPORTS_COMPLETION_STREAMING: bool
    """OpenAI Streaming Completion API"""

    SUPPORTS_COMPLETION: bool
    """OpenAI Completion API"""

    SUPPORTS_COMPLETION_REASONING: bool
    """Reasoning Content attached to Completion API Response"""

    SUPPORTS_COMPLETION_IMAGE: bool
    """Image Support for Completion API"""

    SUPPORTS_COMPLETION_PDF: bool
    """PDF Support for Completion API"""

    SUPPORTS_EMBEDDING: bool
    """OpenAI Embedding API"""

    SUPPORTS_RESPONSES: bool
    """OpenAI Responses API"""

    SUPPORTS_LIST_MODELS: bool
    """OpenAI Models API"""

    API_BASE: str | None = None
    """This is used to set the API base for the provider.
    It is not required but may prove useful for providers that have overridable api bases.
    """

    # === Internal Flag Checks ===
    MISSING_PACKAGES_ERROR: ImportError | None = None
    """Some providers use SDKs that are not installed by default.
    This flag is used to check if the packages are installed before instantiating the provider.
    """

    def __init__(self, api_key: str | None = None, api_base: str | None = None, **kwargs: Any) -> None:
        self._verify_no_missing_packages()
        self._init_client(
            api_key=self._verify_and_set_api_key(api_key),
            api_base=api_base,
            **kwargs,
        )

    def _verify_no_missing_packages(self) -> None:
        if self.MISSING_PACKAGES_ERROR is not None:
            msg = f"{self.PROVIDER_NAME} required packages are not installed. Please install them with `pip install any-llm-sdk[{self.PROVIDER_NAME}]`"
            raise ImportError(msg) from self.MISSING_PACKAGES_ERROR

    def _verify_and_set_api_key(self, api_key: str | None = None) -> str | None:
        # Standardized API key handling. Splitting into its own function so that providers
        # can easily override this method if they don't want verification (for instance, LMStudio)
        if not api_key:
            api_key = os.getenv(self.ENV_API_KEY_NAME)

        if not api_key:
            raise MissingApiKeyError(self.PROVIDER_NAME, self.ENV_API_KEY_NAME)
        return api_key

    @classmethod
    def create(
        cls, provider: str | LLMProvider, api_key: str | None = None, api_base: str | None = None, **kwargs: Any
    ) -> AnyLLM:
        """Create a provider instance using the given provider name and config.

        Args:
            provider: The provider name (e.g., 'openai', 'anthropic')
            api_key: API key for the provider
            api_base: Base URL for the provider API
            **kwargs: Additional provider-specific arguments

        Returns:
            Provider instance for the specified provider

        """
        return cls._create_provider(provider, api_key=api_key, api_base=api_base, **kwargs)

    @classmethod
    def _create_provider(
        cls, provider_key: str | LLMProvider, api_key: str | None = None, api_base: str | None = None, **kwargs: Any
    ) -> AnyLLM:
        """Dynamically load and create an instance of a provider based on the naming convention."""
        provider_key = LLMProvider.from_string(provider_key).value

        provider_class_name = f"{provider_key.capitalize()}Provider"
        provider_module_name = f"{provider_key}"

        module_path = f"any_llm.providers.{provider_module_name}"

        try:
            module = importlib.import_module(module_path)
        except ImportError as e:
            msg = f"Could not import module {module_path}: {e!s}. Please ensure the provider is supported by doing AnyLLM.get_supported_providers()"
            raise ImportError(msg) from e

        provider_class: type[AnyLLM] = getattr(module, provider_class_name)
        return provider_class(api_key=api_key, api_base=api_base, **kwargs)

    @classmethod
    def get_provider_class(cls, provider_key: str | LLMProvider) -> type[AnyLLM]:
        """Get the provider class without instantiating it.

        Args:
            provider_key: The provider key (e.g., 'anthropic', 'openai')

        Returns:
            The provider class

        """
        provider_key = LLMProvider.from_string(provider_key).value

        provider_class_name = f"{provider_key.capitalize()}Provider"
        provider_module_name = f"{provider_key}"

        module_path = f"any_llm.providers.{provider_module_name}"

        try:
            module = importlib.import_module(module_path)
        except ImportError as e:
            msg = f"Could not import module {module_path}: {e!s}. Please ensure the provider is supported by doing AnyLLM.get_supported_providers()"
            raise ImportError(msg) from e

        provider_class: type[AnyLLM] = getattr(module, provider_class_name)
        return provider_class

    @classmethod
    def get_supported_providers(cls) -> list[str]:
        """Get a list of supported provider keys."""
        return [provider.value for provider in LLMProvider]

    @classmethod
    def get_all_provider_metadata(cls) -> list[ProviderMetadata]:
        """Get metadata for all supported providers.

        Returns:
            List of dictionaries containing provider metadata

        """
        providers: list[ProviderMetadata] = []
        for provider_key in cls.get_supported_providers():
            provider_class = cls.get_provider_class(provider_key)
            metadata = provider_class.get_provider_metadata()
            providers.append(metadata)

        # Sort providers by name
        providers.sort(key=lambda x: x.name)
        return providers

    @classmethod
    def get_provider_enum(cls, provider_key: str) -> LLMProvider:
        """Convert a string provider key to a ProviderName enum."""
        try:
            return LLMProvider(provider_key)
        except ValueError as e:
            supported = [provider.value for provider in LLMProvider]
            raise UnsupportedProviderError(provider_key, supported) from e

    @classmethod
    def split_model_provider(cls, model: str) -> tuple[LLMProvider, str]:
        """Extract the provider key from the model identifier.

        Supports both new format 'provider:model' (e.g., 'mistral:mistral-small')
        and legacy format 'provider/model' (e.g., 'mistral/mistral-small').

        The legacy format will be deprecated in version 1.0.
        """
        colon_index = model.find(":")
        slash_index = model.find("/")

        # Determine which delimiter comes first
        if colon_index != -1 and (slash_index == -1 or colon_index < slash_index):
            # The colon came first, so it's using the new syntax.
            provider, model_name = model.split(":", 1)
        elif slash_index != -1:
            # Slash comes first, so it's the legacy syntax
            warnings.warn(
                f"Model format 'provider/model' is deprecated and will be removed in version 1.0. "
                f"Please use 'provider:model' format instead. Got: '{model}'",
                DeprecationWarning,
                stacklevel=3,
            )
            provider, model_name = model.split("/", 1)
        else:
            msg = f"Invalid model format. Expected 'provider:model' or 'provider/model', got '{model}'"
            raise ValueError(msg)

        if not provider or not model_name:
            msg = f"Invalid model format. Expected 'provider:model' or 'provider/model', got '{model}'"
            raise ValueError(msg)
        return cls.get_provider_enum(provider), model_name

    @staticmethod
    @abstractmethod
    def _convert_completion_params(params: CompletionParams, **kwargs: Any) -> dict[str, Any]:
        msg = "Subclasses must implement this method"
        raise NotImplementedError(msg)

    @staticmethod
    @abstractmethod
    def _convert_completion_response(response: Any) -> ChatCompletion:
        msg = "Subclasses must implement this method"
        raise NotImplementedError(msg)

    @staticmethod
    @abstractmethod
    def _convert_completion_chunk_response(response: Any, **kwargs: Any) -> ChatCompletionChunk:
        msg = "Subclasses must implement this method"
        raise NotImplementedError(msg)

    @staticmethod
    @abstractmethod
    def _convert_embedding_params(params: Any, **kwargs: Any) -> dict[str, Any]:
        msg = "Subclasses must implement this method"
        raise NotImplementedError(msg)

    @staticmethod
    @abstractmethod
    def _convert_embedding_response(response: Any) -> CreateEmbeddingResponse:
        msg = "Subclasses must implement this method"
        raise NotImplementedError(msg)

    @staticmethod
    @abstractmethod
    def _convert_list_models_response(response: Any) -> Sequence[Model]:
        msg = "Subclasses must implement this method"
        raise NotImplementedError(msg)

    @classmethod
    def get_provider_metadata(cls) -> ProviderMetadata:
        """Get provider metadata without requiring instantiation.

        Returns:
            Dictionary containing provider metadata including name, environment variable,
            documentation URL, and class name.

        """
        return ProviderMetadata(
            name=cls.PROVIDER_NAME,
            env_key=cls.ENV_API_KEY_NAME,
            doc_url=cls.PROVIDER_DOCUMENTATION_URL,
            streaming=cls.SUPPORTS_COMPLETION_STREAMING,
            reasoning=cls.SUPPORTS_COMPLETION_REASONING,
            completion=cls.SUPPORTS_COMPLETION,
            image=cls.SUPPORTS_COMPLETION_IMAGE,
            pdf=cls.SUPPORTS_COMPLETION_PDF,
            embedding=cls.SUPPORTS_EMBEDDING,
            responses=cls.SUPPORTS_RESPONSES,
            list_models=cls.SUPPORTS_LIST_MODELS,
            class_name=cls.__name__,
        )

    @abstractmethod
    def _init_client(self, api_key: str | None = None, api_base: str | None = None, **kwargs: Any) -> None:
        msg = "Subclasses must implement this method"
        raise NotImplementedError(msg)

    def completion(
        self,
        **kwargs: Any,
    ) -> ChatCompletion | Iterator[ChatCompletionChunk]:
        """Create a chat completion synchronously.

        See [AnyLLM.acompletion][any_llm.any_llm.AnyLLM.acompletion]
        """
        allow_running_loop = kwargs.pop("allow_running_loop", INSIDE_NOTEBOOK)
        response = run_async_in_sync(self.acompletion(**kwargs), allow_running_loop=allow_running_loop)
        if isinstance(response, ChatCompletion):
            return response

        return async_iter_to_sync_iter(response)

    async def acompletion(
        self,
        model: str,
        messages: list[dict[str, Any] | ChatCompletionMessage],
        *,
        tools: list[dict[str, Any] | Callable[..., Any]] | None = None,
        tool_choice: str | dict[str, Any] | None = None,
        temperature: float | None = None,
        top_p: float | None = None,
        max_tokens: int | None = None,
        response_format: dict[str, Any] | type[BaseModel] | None = None,
        stream: bool | None = None,
        n: int | None = None,
        stop: str | list[str] | None = None,
        presence_penalty: float | None = None,
        frequency_penalty: float | None = None,
        seed: int | None = None,
        user: str | None = None,
        parallel_tool_calls: bool | None = None,
        logprobs: bool | None = None,
        top_logprobs: int | None = None,
        logit_bias: dict[str, float] | None = None,
        stream_options: dict[str, Any] | None = None,
        max_completion_tokens: int | None = None,
        reasoning_effort: Literal["minimal", "low", "medium", "high", "auto"] | None = "auto",
        **kwargs: Any,
    ) -> ChatCompletion | AsyncIterator[ChatCompletionChunk]:
        """Create a chat completion asynchronously.

        Args:
            model: Model identifier for the chosen provider (e.g., model='gpt-4.1-mini' for LLMProvider.OPENAI).
            messages: List of messages for the conversation
            tools: List of tools for tool calling. Can be Python callables or OpenAI tool format dicts
            tool_choice: Controls which tools the model can call
            temperature: Controls randomness in the response (0.0 to 2.0)
            top_p: Controls diversity via nucleus sampling (0.0 to 1.0)
            max_tokens: Maximum number of tokens to generate
            response_format: Format specification for the response
            stream: Whether to stream the response
            n: Number of completions to generate
            stop: Stop sequences for generation
            presence_penalty: Penalize new tokens based on presence in text
            frequency_penalty: Penalize new tokens based on frequency in text
            seed: Random seed for reproducible results
            user: Unique identifier for the end user
            parallel_tool_calls: Whether to allow parallel tool calls
            logprobs: Include token-level log probabilities in the response
            top_logprobs: Number of alternatives to return when logprobs are requested
            logit_bias: Bias the likelihood of specified tokens during generation
            stream_options: Additional options controlling streaming behavior
            max_completion_tokens: Maximum number of tokens for the completion
            reasoning_effort: Reasoning effort level for models that support it. "auto" will map to each provider's default.
            **kwargs: Additional provider-specific arguments that will be passed to the provider's API call.

        Returns:
            The completion response from the provider

        """
        all_args = locals()
        all_args.pop("self")
        all_args["model_id"] = all_args.pop("model")
        kwargs = all_args.pop("kwargs")

        if tools:
            all_args["tools"] = prepare_tools(tools)

        for i, message in enumerate(messages):
            if isinstance(message, ChatCompletionMessage):
                # Dump the message but exclude the extra field that we extend from OpenAI Spec
                messages[i] = message.model_dump(exclude_none=True, exclude={"reasoning"})
        all_args["messages"] = messages

        return await self._acompletion(CompletionParams(**all_args), **kwargs)

    async def _acompletion(
        self, params: CompletionParams, **kwargs: Any
    ) -> ChatCompletion | AsyncIterator[ChatCompletionChunk]:
        if not self.SUPPORTS_COMPLETION:
            msg = "Provider doesn't support completion."
            raise NotImplementedError(msg)
        msg = "Subclasses must implement _acompletion method"
        raise NotImplementedError(msg)

    def responses(self, **kwargs: Any) -> Response | Iterator[ResponseStreamEvent]:
        """Create a response synchronously.

        See [AnyLLM.aresponses][any_llm.any_llm.AnyLLM.aresponses]
        """
        allow_running_loop = kwargs.pop("allow_running_loop", INSIDE_NOTEBOOK)
        response = run_async_in_sync(self.aresponses(**kwargs), allow_running_loop=allow_running_loop)
        if isinstance(response, Response):
            return response
        return async_iter_to_sync_iter(response)

    async def aresponses(
        self,
        model: str,
        input_data: str | ResponseInputParam,
        *,
        tools: list[dict[str, Any] | Callable[..., Any]] | None = None,
        tool_choice: str | dict[str, Any] | None = None,
        max_output_tokens: int | None = None,
        temperature: float | None = None,
        top_p: float | None = None,
        stream: bool | None = None,
        instructions: str | None = None,
        max_tool_calls: int | None = None,
        parallel_tool_calls: int | None = None,
        reasoning: Any | None = None,
        text: Any | None = None,
        **kwargs: Any,
    ) -> Response | AsyncIterator[ResponseStreamEvent]:
        """Create a response using the OpenAI-style Responses API.

        This follows the OpenAI Responses API shape and returns the aliased
        `any_llm.types.responses.Response` type. If `stream=True`, an iterator of
        `any_llm.types.responses.ResponseStreamEvent` items is returned.

        Args:
            model: Model identifier for the chosen provider (e.g., model='gpt-4.1-mini' for LLMProvider.OPENAI).
            input_data: The input payload accepted by provider's Responses API.
                For OpenAI-compatible providers, this is typically a list mixing
                text, images, and tool instructions, or a dict per OpenAI spec.
            tools: Optional tools for tool calling (Python callables or OpenAI tool dicts)
            tool_choice: Controls which tools the model can call
            max_output_tokens: Maximum number of output tokens to generate
            temperature: Controls randomness in the response (0.0 to 2.0)
            top_p: Controls diversity via nucleus sampling (0.0 to 1.0)
            stream: Whether to stream response events
            instructions: A system (or developer) message inserted into the model's context.
            max_tool_calls: The maximum number of total calls to built-in tools that can be processed in a response. This maximum number applies across all built-in tool calls, not per individual tool. Any further attempts to call a tool by the model will be ignored.
            parallel_tool_calls: Whether to allow the model to run tool calls in parallel.
            reasoning: Configuration options for reasoning models.
            text: Configuration options for a text response from the model. Can be plain text or structured JSON data.
            **kwargs: Additional provider-specific arguments that will be passed to the provider's API call.

        Returns:
            Either a `Response` object (non-streaming) or an iterator of
            `ResponseStreamEvent` (streaming).

        Raises:
            NotImplementedError: If the selected provider does not support the Responses API.

        """
        all_args = locals()
        all_args.pop("self")
        all_args["input"] = all_args.pop("input_data")
        kwargs = all_args.pop("kwargs")

        if tools:
            all_args["tools"] = prepare_tools(tools)

        return await self._aresponses(ResponsesParams(**all_args, **kwargs))

    async def _aresponses(
        self, params: ResponsesParams, **kwargs: Any
    ) -> Response | AsyncIterator[ResponseStreamEvent]:
        if not self.SUPPORTS_RESPONSES:
            msg = "Provider doesn't support responses."
            raise NotImplementedError(msg)
        msg = "Subclasses must implement _aresponses method"
        raise NotImplementedError(msg)

    def _embedding(self, model: str, inputs: str | list[str], **kwargs: Any) -> CreateEmbeddingResponse:
        allow_running_loop = kwargs.pop("allow_running_loop", INSIDE_NOTEBOOK)
        return run_async_in_sync(self.aembedding(model, inputs, **kwargs), allow_running_loop=allow_running_loop)

    async def aembedding(self, model: str, inputs: str | list[str], **kwargs: Any) -> CreateEmbeddingResponse:
        return await self._aembedding(model, inputs, **kwargs)

    async def _aembedding(self, model: str, inputs: str | list[str], **kwargs: Any) -> CreateEmbeddingResponse:
        if not self.SUPPORTS_EMBEDDING:
            msg = "Provider doesn't support embedding."
            raise NotImplementedError(msg)
        msg = "Subclasses must implement _aembedding method"
        raise NotImplementedError(msg)

    def list_models(self, **kwargs: Any) -> Sequence[Model]:
        allow_running_loop = kwargs.pop("allow_running_loop", INSIDE_NOTEBOOK)
        return run_async_in_sync(self.alist_models(**kwargs), allow_running_loop=allow_running_loop)

    async def alist_models(self, **kwargs: Any) -> Sequence[Model]:
        return await self._alist_models(**kwargs)

    async def _alist_models(self, **kwargs: Any) -> Sequence[Model]:
        if not self.SUPPORTS_LIST_MODELS:
            msg = "Provider doesn't support listing models."
            raise NotImplementedError(msg)
        msg = "Subclasses must implement _alist_models method"
        raise NotImplementedError(msg)

API_BASE = None class-attribute instance-attribute

This is used to set the API base for the provider. It is not required but may prove useful for providers that have overridable api bases.

ENV_API_KEY_NAME instance-attribute

Environment variable name for the API key

MISSING_PACKAGES_ERROR = None class-attribute instance-attribute

Some providers use SDKs that are not installed by default. This flag is used to check if the packages are installed before instantiating the provider.

PROVIDER_DOCUMENTATION_URL instance-attribute

Link to the provider's documentation

PROVIDER_NAME instance-attribute

Must match the name of the provider directory (case sensitive)

SUPPORTS_COMPLETION instance-attribute

OpenAI Completion API

SUPPORTS_COMPLETION_IMAGE instance-attribute

Image Support for Completion API

SUPPORTS_COMPLETION_PDF instance-attribute

PDF Support for Completion API

SUPPORTS_COMPLETION_REASONING instance-attribute

Reasoning Content attached to Completion API Response

SUPPORTS_COMPLETION_STREAMING instance-attribute

OpenAI Streaming Completion API

SUPPORTS_EMBEDDING instance-attribute

OpenAI Embedding API

SUPPORTS_LIST_MODELS instance-attribute

OpenAI Models API

SUPPORTS_RESPONSES instance-attribute

OpenAI Responses API

acompletion(model, messages, *, tools=None, tool_choice=None, temperature=None, top_p=None, max_tokens=None, response_format=None, stream=None, n=None, stop=None, presence_penalty=None, frequency_penalty=None, seed=None, user=None, parallel_tool_calls=None, logprobs=None, top_logprobs=None, logit_bias=None, stream_options=None, max_completion_tokens=None, reasoning_effort='auto', **kwargs) async

Create a chat completion asynchronously.

Parameters:

Name Type Description Default
model str

Model identifier for the chosen provider (e.g., model='gpt-4.1-mini' for LLMProvider.OPENAI).

required
messages list[dict[str, Any] | ChatCompletionMessage]

List of messages for the conversation

required
tools list[dict[str, Any] | Callable[..., Any]] | None

List of tools for tool calling. Can be Python callables or OpenAI tool format dicts

None
tool_choice str | dict[str, Any] | None

Controls which tools the model can call

None
temperature float | None

Controls randomness in the response (0.0 to 2.0)

None
top_p float | None

Controls diversity via nucleus sampling (0.0 to 1.0)

None
max_tokens int | None

Maximum number of tokens to generate

None
response_format dict[str, Any] | type[BaseModel] | None

Format specification for the response

None
stream bool | None

Whether to stream the response

None
n int | None

Number of completions to generate

None
stop str | list[str] | None

Stop sequences for generation

None
presence_penalty float | None

Penalize new tokens based on presence in text

None
frequency_penalty float | None

Penalize new tokens based on frequency in text

None
seed int | None

Random seed for reproducible results

None
user str | None

Unique identifier for the end user

None
parallel_tool_calls bool | None

Whether to allow parallel tool calls

None
logprobs bool | None

Include token-level log probabilities in the response

None
top_logprobs int | None

Number of alternatives to return when logprobs are requested

None
logit_bias dict[str, float] | None

Bias the likelihood of specified tokens during generation

None
stream_options dict[str, Any] | None

Additional options controlling streaming behavior

None
max_completion_tokens int | None

Maximum number of tokens for the completion

None
reasoning_effort Literal['minimal', 'low', 'medium', 'high', 'auto'] | None

Reasoning effort level for models that support it. "auto" will map to each provider's default.

'auto'
**kwargs Any

Additional provider-specific arguments that will be passed to the provider's API call.

{}

Returns:

Type Description
ChatCompletion | AsyncIterator[ChatCompletionChunk]

The completion response from the provider

Source code in src/any_llm/any_llm.py
async def acompletion(
    self,
    model: str,
    messages: list[dict[str, Any] | ChatCompletionMessage],
    *,
    tools: list[dict[str, Any] | Callable[..., Any]] | None = None,
    tool_choice: str | dict[str, Any] | None = None,
    temperature: float | None = None,
    top_p: float | None = None,
    max_tokens: int | None = None,
    response_format: dict[str, Any] | type[BaseModel] | None = None,
    stream: bool | None = None,
    n: int | None = None,
    stop: str | list[str] | None = None,
    presence_penalty: float | None = None,
    frequency_penalty: float | None = None,
    seed: int | None = None,
    user: str | None = None,
    parallel_tool_calls: bool | None = None,
    logprobs: bool | None = None,
    top_logprobs: int | None = None,
    logit_bias: dict[str, float] | None = None,
    stream_options: dict[str, Any] | None = None,
    max_completion_tokens: int | None = None,
    reasoning_effort: Literal["minimal", "low", "medium", "high", "auto"] | None = "auto",
    **kwargs: Any,
) -> ChatCompletion | AsyncIterator[ChatCompletionChunk]:
    """Create a chat completion asynchronously.

    Args:
        model: Model identifier for the chosen provider (e.g., model='gpt-4.1-mini' for LLMProvider.OPENAI).
        messages: List of messages for the conversation
        tools: List of tools for tool calling. Can be Python callables or OpenAI tool format dicts
        tool_choice: Controls which tools the model can call
        temperature: Controls randomness in the response (0.0 to 2.0)
        top_p: Controls diversity via nucleus sampling (0.0 to 1.0)
        max_tokens: Maximum number of tokens to generate
        response_format: Format specification for the response
        stream: Whether to stream the response
        n: Number of completions to generate
        stop: Stop sequences for generation
        presence_penalty: Penalize new tokens based on presence in text
        frequency_penalty: Penalize new tokens based on frequency in text
        seed: Random seed for reproducible results
        user: Unique identifier for the end user
        parallel_tool_calls: Whether to allow parallel tool calls
        logprobs: Include token-level log probabilities in the response
        top_logprobs: Number of alternatives to return when logprobs are requested
        logit_bias: Bias the likelihood of specified tokens during generation
        stream_options: Additional options controlling streaming behavior
        max_completion_tokens: Maximum number of tokens for the completion
        reasoning_effort: Reasoning effort level for models that support it. "auto" will map to each provider's default.
        **kwargs: Additional provider-specific arguments that will be passed to the provider's API call.

    Returns:
        The completion response from the provider

    """
    all_args = locals()
    all_args.pop("self")
    all_args["model_id"] = all_args.pop("model")
    kwargs = all_args.pop("kwargs")

    if tools:
        all_args["tools"] = prepare_tools(tools)

    for i, message in enumerate(messages):
        if isinstance(message, ChatCompletionMessage):
            # Dump the message but exclude the extra field that we extend from OpenAI Spec
            messages[i] = message.model_dump(exclude_none=True, exclude={"reasoning"})
    all_args["messages"] = messages

    return await self._acompletion(CompletionParams(**all_args), **kwargs)

aresponses(model, input_data, *, tools=None, tool_choice=None, max_output_tokens=None, temperature=None, top_p=None, stream=None, instructions=None, max_tool_calls=None, parallel_tool_calls=None, reasoning=None, text=None, **kwargs) async

Create a response using the OpenAI-style Responses API.

This follows the OpenAI Responses API shape and returns the aliased any_llm.types.responses.Response type. If stream=True, an iterator of any_llm.types.responses.ResponseStreamEvent items is returned.

Parameters:

Name Type Description Default
model str

Model identifier for the chosen provider (e.g., model='gpt-4.1-mini' for LLMProvider.OPENAI).

required
input_data str | ResponseInputParam

The input payload accepted by provider's Responses API. For OpenAI-compatible providers, this is typically a list mixing text, images, and tool instructions, or a dict per OpenAI spec.

required
tools list[dict[str, Any] | Callable[..., Any]] | None

Optional tools for tool calling (Python callables or OpenAI tool dicts)

None
tool_choice str | dict[str, Any] | None

Controls which tools the model can call

None
max_output_tokens int | None

Maximum number of output tokens to generate

None
temperature float | None

Controls randomness in the response (0.0 to 2.0)

None
top_p float | None

Controls diversity via nucleus sampling (0.0 to 1.0)

None
stream bool | None

Whether to stream response events

None
instructions str | None

A system (or developer) message inserted into the model's context.

None
max_tool_calls int | None

The maximum number of total calls to built-in tools that can be processed in a response. This maximum number applies across all built-in tool calls, not per individual tool. Any further attempts to call a tool by the model will be ignored.

None
parallel_tool_calls int | None

Whether to allow the model to run tool calls in parallel.

None
reasoning Any | None

Configuration options for reasoning models.

None
text Any | None

Configuration options for a text response from the model. Can be plain text or structured JSON data.

None
**kwargs Any

Additional provider-specific arguments that will be passed to the provider's API call.

{}

Returns:

Type Description
Response | AsyncIterator[ResponseStreamEvent]

Either a Response object (non-streaming) or an iterator of

Response | AsyncIterator[ResponseStreamEvent]

ResponseStreamEvent (streaming).

Raises:

Type Description
NotImplementedError

If the selected provider does not support the Responses API.

Source code in src/any_llm/any_llm.py
async def aresponses(
    self,
    model: str,
    input_data: str | ResponseInputParam,
    *,
    tools: list[dict[str, Any] | Callable[..., Any]] | None = None,
    tool_choice: str | dict[str, Any] | None = None,
    max_output_tokens: int | None = None,
    temperature: float | None = None,
    top_p: float | None = None,
    stream: bool | None = None,
    instructions: str | None = None,
    max_tool_calls: int | None = None,
    parallel_tool_calls: int | None = None,
    reasoning: Any | None = None,
    text: Any | None = None,
    **kwargs: Any,
) -> Response | AsyncIterator[ResponseStreamEvent]:
    """Create a response using the OpenAI-style Responses API.

    This follows the OpenAI Responses API shape and returns the aliased
    `any_llm.types.responses.Response` type. If `stream=True`, an iterator of
    `any_llm.types.responses.ResponseStreamEvent` items is returned.

    Args:
        model: Model identifier for the chosen provider (e.g., model='gpt-4.1-mini' for LLMProvider.OPENAI).
        input_data: The input payload accepted by provider's Responses API.
            For OpenAI-compatible providers, this is typically a list mixing
            text, images, and tool instructions, or a dict per OpenAI spec.
        tools: Optional tools for tool calling (Python callables or OpenAI tool dicts)
        tool_choice: Controls which tools the model can call
        max_output_tokens: Maximum number of output tokens to generate
        temperature: Controls randomness in the response (0.0 to 2.0)
        top_p: Controls diversity via nucleus sampling (0.0 to 1.0)
        stream: Whether to stream response events
        instructions: A system (or developer) message inserted into the model's context.
        max_tool_calls: The maximum number of total calls to built-in tools that can be processed in a response. This maximum number applies across all built-in tool calls, not per individual tool. Any further attempts to call a tool by the model will be ignored.
        parallel_tool_calls: Whether to allow the model to run tool calls in parallel.
        reasoning: Configuration options for reasoning models.
        text: Configuration options for a text response from the model. Can be plain text or structured JSON data.
        **kwargs: Additional provider-specific arguments that will be passed to the provider's API call.

    Returns:
        Either a `Response` object (non-streaming) or an iterator of
        `ResponseStreamEvent` (streaming).

    Raises:
        NotImplementedError: If the selected provider does not support the Responses API.

    """
    all_args = locals()
    all_args.pop("self")
    all_args["input"] = all_args.pop("input_data")
    kwargs = all_args.pop("kwargs")

    if tools:
        all_args["tools"] = prepare_tools(tools)

    return await self._aresponses(ResponsesParams(**all_args, **kwargs))

completion(**kwargs)

Create a chat completion synchronously.

See AnyLLM.acompletion

Source code in src/any_llm/any_llm.py
def completion(
    self,
    **kwargs: Any,
) -> ChatCompletion | Iterator[ChatCompletionChunk]:
    """Create a chat completion synchronously.

    See [AnyLLM.acompletion][any_llm.any_llm.AnyLLM.acompletion]
    """
    allow_running_loop = kwargs.pop("allow_running_loop", INSIDE_NOTEBOOK)
    response = run_async_in_sync(self.acompletion(**kwargs), allow_running_loop=allow_running_loop)
    if isinstance(response, ChatCompletion):
        return response

    return async_iter_to_sync_iter(response)

create(provider, api_key=None, api_base=None, **kwargs) classmethod

Create a provider instance using the given provider name and config.

Parameters:

Name Type Description Default
provider str | LLMProvider

The provider name (e.g., 'openai', 'anthropic')

required
api_key str | None

API key for the provider

None
api_base str | None

Base URL for the provider API

None
**kwargs Any

Additional provider-specific arguments

{}

Returns:

Type Description
AnyLLM

Provider instance for the specified provider

Source code in src/any_llm/any_llm.py
@classmethod
def create(
    cls, provider: str | LLMProvider, api_key: str | None = None, api_base: str | None = None, **kwargs: Any
) -> AnyLLM:
    """Create a provider instance using the given provider name and config.

    Args:
        provider: The provider name (e.g., 'openai', 'anthropic')
        api_key: API key for the provider
        api_base: Base URL for the provider API
        **kwargs: Additional provider-specific arguments

    Returns:
        Provider instance for the specified provider

    """
    return cls._create_provider(provider, api_key=api_key, api_base=api_base, **kwargs)

get_all_provider_metadata() classmethod

Get metadata for all supported providers.

Returns:

Type Description
list[ProviderMetadata]

List of dictionaries containing provider metadata

Source code in src/any_llm/any_llm.py
@classmethod
def get_all_provider_metadata(cls) -> list[ProviderMetadata]:
    """Get metadata for all supported providers.

    Returns:
        List of dictionaries containing provider metadata

    """
    providers: list[ProviderMetadata] = []
    for provider_key in cls.get_supported_providers():
        provider_class = cls.get_provider_class(provider_key)
        metadata = provider_class.get_provider_metadata()
        providers.append(metadata)

    # Sort providers by name
    providers.sort(key=lambda x: x.name)
    return providers

get_provider_class(provider_key) classmethod

Get the provider class without instantiating it.

Parameters:

Name Type Description Default
provider_key str | LLMProvider

The provider key (e.g., 'anthropic', 'openai')

required

Returns:

Type Description
type[AnyLLM]

The provider class

Source code in src/any_llm/any_llm.py
@classmethod
def get_provider_class(cls, provider_key: str | LLMProvider) -> type[AnyLLM]:
    """Get the provider class without instantiating it.

    Args:
        provider_key: The provider key (e.g., 'anthropic', 'openai')

    Returns:
        The provider class

    """
    provider_key = LLMProvider.from_string(provider_key).value

    provider_class_name = f"{provider_key.capitalize()}Provider"
    provider_module_name = f"{provider_key}"

    module_path = f"any_llm.providers.{provider_module_name}"

    try:
        module = importlib.import_module(module_path)
    except ImportError as e:
        msg = f"Could not import module {module_path}: {e!s}. Please ensure the provider is supported by doing AnyLLM.get_supported_providers()"
        raise ImportError(msg) from e

    provider_class: type[AnyLLM] = getattr(module, provider_class_name)
    return provider_class

get_provider_enum(provider_key) classmethod

Convert a string provider key to a ProviderName enum.

Source code in src/any_llm/any_llm.py
@classmethod
def get_provider_enum(cls, provider_key: str) -> LLMProvider:
    """Convert a string provider key to a ProviderName enum."""
    try:
        return LLMProvider(provider_key)
    except ValueError as e:
        supported = [provider.value for provider in LLMProvider]
        raise UnsupportedProviderError(provider_key, supported) from e

get_provider_metadata() classmethod

Get provider metadata without requiring instantiation.

Returns:

Type Description
ProviderMetadata

Dictionary containing provider metadata including name, environment variable,

ProviderMetadata

documentation URL, and class name.

Source code in src/any_llm/any_llm.py
@classmethod
def get_provider_metadata(cls) -> ProviderMetadata:
    """Get provider metadata without requiring instantiation.

    Returns:
        Dictionary containing provider metadata including name, environment variable,
        documentation URL, and class name.

    """
    return ProviderMetadata(
        name=cls.PROVIDER_NAME,
        env_key=cls.ENV_API_KEY_NAME,
        doc_url=cls.PROVIDER_DOCUMENTATION_URL,
        streaming=cls.SUPPORTS_COMPLETION_STREAMING,
        reasoning=cls.SUPPORTS_COMPLETION_REASONING,
        completion=cls.SUPPORTS_COMPLETION,
        image=cls.SUPPORTS_COMPLETION_IMAGE,
        pdf=cls.SUPPORTS_COMPLETION_PDF,
        embedding=cls.SUPPORTS_EMBEDDING,
        responses=cls.SUPPORTS_RESPONSES,
        list_models=cls.SUPPORTS_LIST_MODELS,
        class_name=cls.__name__,
    )

get_supported_providers() classmethod

Get a list of supported provider keys.

Source code in src/any_llm/any_llm.py
@classmethod
def get_supported_providers(cls) -> list[str]:
    """Get a list of supported provider keys."""
    return [provider.value for provider in LLMProvider]

responses(**kwargs)

Create a response synchronously.

See AnyLLM.aresponses

Source code in src/any_llm/any_llm.py
def responses(self, **kwargs: Any) -> Response | Iterator[ResponseStreamEvent]:
    """Create a response synchronously.

    See [AnyLLM.aresponses][any_llm.any_llm.AnyLLM.aresponses]
    """
    allow_running_loop = kwargs.pop("allow_running_loop", INSIDE_NOTEBOOK)
    response = run_async_in_sync(self.aresponses(**kwargs), allow_running_loop=allow_running_loop)
    if isinstance(response, Response):
        return response
    return async_iter_to_sync_iter(response)

split_model_provider(model) classmethod

Extract the provider key from the model identifier.

Supports both new format 'provider:model' (e.g., 'mistral:mistral-small') and legacy format 'provider/model' (e.g., 'mistral/mistral-small').

The legacy format will be deprecated in version 1.0.

Source code in src/any_llm/any_llm.py
@classmethod
def split_model_provider(cls, model: str) -> tuple[LLMProvider, str]:
    """Extract the provider key from the model identifier.

    Supports both new format 'provider:model' (e.g., 'mistral:mistral-small')
    and legacy format 'provider/model' (e.g., 'mistral/mistral-small').

    The legacy format will be deprecated in version 1.0.
    """
    colon_index = model.find(":")
    slash_index = model.find("/")

    # Determine which delimiter comes first
    if colon_index != -1 and (slash_index == -1 or colon_index < slash_index):
        # The colon came first, so it's using the new syntax.
        provider, model_name = model.split(":", 1)
    elif slash_index != -1:
        # Slash comes first, so it's the legacy syntax
        warnings.warn(
            f"Model format 'provider/model' is deprecated and will be removed in version 1.0. "
            f"Please use 'provider:model' format instead. Got: '{model}'",
            DeprecationWarning,
            stacklevel=3,
        )
        provider, model_name = model.split("/", 1)
    else:
        msg = f"Invalid model format. Expected 'provider:model' or 'provider/model', got '{model}'"
        raise ValueError(msg)

    if not provider or not model_name:
        msg = f"Invalid model format. Expected 'provider:model' or 'provider/model', got '{model}'"
        raise ValueError(msg)
    return cls.get_provider_enum(provider), model_name