Providers¶

Bases: LLM

OpenAI LLM provider.

Implements the LLM interface for OpenAI's GPT models, including support for structured outputs via the responses.parse API.

The API key is read from the OPENAI_API_KEY environment variable.

Attributes:

Name	Type	Description
`client`		The async OpenAI client instance.

Example

llm = OpenAI( ... model="gpt-4o", ... input_cost=2.5, ... output_cost=10.0, ... ) response = await llm.get_response("Hello, GPT!")

Source code in src/majordomo_llm/providers/openai.py

class OpenAI(LLM):
    """OpenAI LLM provider.

    Implements the LLM interface for OpenAI's GPT models, including
    support for structured outputs via the responses.parse API.

    The API key is read from the ``OPENAI_API_KEY`` environment variable.

    Attributes:
        client: The async OpenAI client instance.

    Example:
        >>> llm = OpenAI(
        ...     model="gpt-4o",
        ...     input_cost=2.5,
        ...     output_cost=10.0,
        ... )
        >>> response = await llm.get_response("Hello, GPT!")
    """

    def __init__(
        self,
        model: str,
        input_cost: float,
        output_cost: float,
        supports_temperature_top_p: bool = True,
        *,
        api_key: str | None = None,
        api_key_alias: str | None = None,
    ) -> None:
        """Initialize the OpenAI provider.

        Args:
            model: The GPT model identifier (e.g., "gpt-4o", "gpt-5").
            input_cost: Cost per million input tokens in USD.
            output_cost: Cost per million output tokens in USD.
            supports_temperature_top_p: Whether temperature/top_p are supported.
            api_key: Optional API key. Defaults to ``OPENAI_API_KEY`` env var.
            api_key_alias: Optional human-readable name for the API key.

        Raises:
            ConfigurationError: If no API key is provided and env var is not set.
        """
        resolved_api_key = resolve_api_key(api_key, "OPENAI_API_KEY", "OpenAI")
        super().__init__(
            provider="openai",
            model=model,
            input_cost=input_cost,
            output_cost=output_cost,
            supports_temperature_top_p=supports_temperature_top_p,
            api_key=resolved_api_key,
            api_key_alias=api_key_alias,
        )
        self.client = openai.AsyncOpenAI(api_key=resolved_api_key)

    @retry(wait=wait_random_exponential(min=0.2, max=1), stop=stop_after_attempt(3))
    async def get_response(
        self,
        user_prompt: str,
        system_prompt: str | None = None,
        temperature: float = 0.3,
        top_p: float = 1.0,
    ) -> LLMResponse:
        """Get a plain text response from OpenAI."""
        return await self._get_response(user_prompt, system_prompt, temperature, top_p)

    async def _get_response(
        self,
        user_prompt: str,
        system_prompt: str | None = None,
        temperature: float = 0.3,
        top_p: float = 1.0,
    ) -> LLMResponse:
        """Internal method to get a response from OpenAI."""
        start_time = time.time()
        try:
            if self.supports_temperature_top_p:
                response = await self.client.responses.create(
                    model=self.model,
                    instructions=system_prompt,
                    input=user_prompt,
                    temperature=temperature,
                    top_p=top_p,
                )
            else:
                response = await self.client.responses.create(
                    model=self.model,
                    instructions=system_prompt,
                    input=user_prompt,
                )
        except openai.APIError as e:
            raise ProviderError(
                f"OpenAI API error: {e}",
                provider="openai",
                original_error=e,
            ) from e

        execution_time = time.time() - start_time
        input_tokens = response.usage.input_tokens
        output_tokens = response.usage.output_tokens
        input_cost, output_cost, total_cost = self._calculate_costs(input_tokens, output_tokens)

        return LLMResponse(
            content=response.output_text,
            input_tokens=input_tokens,
            output_tokens=output_tokens,
            cached_tokens=response.usage.input_tokens_details.cached_tokens,
            input_cost=input_cost,
            output_cost=output_cost,
            total_cost=total_cost,
            response_time=execution_time,
        )

    async def _get_structured_response(
        self,
        response_model: type[T],
        user_prompt: str,
        system_prompt: str | None = None,
        temperature: float = 0.3,
        top_p: float = 1.0,
    ) -> LLMJSONResponse:
        """OpenAI-specific implementation using structured outputs with JSON Schema."""
        start_time = time.time()

        try:
            if self.supports_temperature_top_p:
                response = await self.client.responses.parse(
                    model=self.model,
                    instructions=system_prompt,
                    input=user_prompt,
                    temperature=temperature,
                    top_p=top_p,
                    text_format=response_model,
                )
            else:
                response = await self.client.responses.parse(
                    model=self.model,
                    instructions=system_prompt,
                    input=user_prompt,
                    text_format=response_model,
                )
        except openai.APIError as e:
            raise ProviderError(
                f"OpenAI API error: {e}",
                provider="openai",
                original_error=e,
            ) from e

        execution_time = time.time() - start_time
        input_tokens = response.usage.input_tokens
        output_tokens = response.usage.output_tokens
        input_cost, output_cost, total_cost = self._calculate_costs(input_tokens, output_tokens)

        return LLMJSONResponse(
            content=response.output_parsed,
            input_tokens=input_tokens,
            output_tokens=output_tokens,
            cached_tokens=response.usage.input_tokens_details.cached_tokens,
            input_cost=input_cost,
            output_cost=output_cost,
            total_cost=total_cost,
            response_time=execution_time,
        )

init ¶

__init__(
    model,
    input_cost,
    output_cost,
    supports_temperature_top_p=True,
    *,
    api_key=None,
    api_key_alias=None,
)

Initialize the OpenAI provider.

Parameters:

Name	Type	Description	Default
`model`	`str`	The GPT model identifier (e.g., "gpt-4o", "gpt-5").	required
`input_cost`	`float`	Cost per million input tokens in USD.	required
`output_cost`	`float`	Cost per million output tokens in USD.	required
`supports_temperature_top_p`	`bool`	Whether temperature/top_p are supported.	`True`
`api_key`	`str \| None`	Optional API key. Defaults to `OPENAI_API_KEY` env var.	`None`
`api_key_alias`	`str \| None`	Optional human-readable name for the API key.	`None`

Raises:

Type	Description
`ConfigurationError`	If no API key is provided and env var is not set.

Source code in src/majordomo_llm/providers/openai.py

def __init__(
    self,
    model: str,
    input_cost: float,
    output_cost: float,
    supports_temperature_top_p: bool = True,
    *,
    api_key: str | None = None,
    api_key_alias: str | None = None,
) -> None:
    """Initialize the OpenAI provider.

    Args:
        model: The GPT model identifier (e.g., "gpt-4o", "gpt-5").
        input_cost: Cost per million input tokens in USD.
        output_cost: Cost per million output tokens in USD.
        supports_temperature_top_p: Whether temperature/top_p are supported.
        api_key: Optional API key. Defaults to ``OPENAI_API_KEY`` env var.
        api_key_alias: Optional human-readable name for the API key.

    Raises:
        ConfigurationError: If no API key is provided and env var is not set.
    """
    resolved_api_key = resolve_api_key(api_key, "OPENAI_API_KEY", "OpenAI")
    super().__init__(
        provider="openai",
        model=model,
        input_cost=input_cost,
        output_cost=output_cost,
        supports_temperature_top_p=supports_temperature_top_p,
        api_key=resolved_api_key,
        api_key_alias=api_key_alias,
    )
    self.client = openai.AsyncOpenAI(api_key=resolved_api_key)

get_response `async` ¶

get_response(
    user_prompt,
    system_prompt=None,
    temperature=0.3,
    top_p=1.0,
)

Get a plain text response from OpenAI.

Source code in src/majordomo_llm/providers/openai.py

@retry(wait=wait_random_exponential(min=0.2, max=1), stop=stop_after_attempt(3))
async def get_response(
    self,
    user_prompt: str,
    system_prompt: str | None = None,
    temperature: float = 0.3,
    top_p: float = 1.0,
) -> LLMResponse:
    """Get a plain text response from OpenAI."""
    return await self._get_response(user_prompt, system_prompt, temperature, top_p)

Bases: LLM

Anthropic (Claude) LLM provider.

Implements the LLM interface for Anthropic's Claude models, including support for tool calling for structured outputs and optional web search.

The API key is read from the ANTHROPIC_API_KEY environment variable.

Attributes:

Name	Type	Description
`client`		The async Anthropic client instance.

Example

llm = Anthropic( ... model="claude-sonnet-4-20250514", ... input_cost=3.0, ... output_cost=15.0, ... ) response = await llm.get_response("Hello, Claude!")

Source code in src/majordomo_llm/providers/anthropic.py

class Anthropic(LLM):
    """Anthropic (Claude) LLM provider.

    Implements the LLM interface for Anthropic's Claude models, including
    support for tool calling for structured outputs and optional web search.

    The API key is read from the ``ANTHROPIC_API_KEY`` environment variable.

    Attributes:
        client: The async Anthropic client instance.

    Example:
        >>> llm = Anthropic(
        ...     model="claude-sonnet-4-20250514",
        ...     input_cost=3.0,
        ...     output_cost=15.0,
        ... )
        >>> response = await llm.get_response("Hello, Claude!")
    """

    def __init__(
        self,
        model: str,
        input_cost: float,
        output_cost: float,
        supports_temperature_top_p: bool = True,
        use_web_search: bool = False,
        *,
        api_key: str | None = None,
        api_key_alias: str | None = None,
    ) -> None:
        """Initialize the Anthropic provider.

        Args:
            model: The Claude model identifier (e.g., "claude-sonnet-4-20250514").
            input_cost: Cost per million input tokens in USD.
            output_cost: Cost per million output tokens in USD.
            supports_temperature_top_p: Whether temperature/top_p are supported.
            use_web_search: Enable web search (requires claude-sonnet-4-5-20250929).
            api_key: Optional API key. Defaults to ``ANTHROPIC_API_KEY`` env var.
            api_key_alias: Optional human-readable name for the API key.

        Raises:
            ConfigurationError: If no API key is provided and env var is not set.
        """
        resolved_api_key = resolve_api_key(api_key, "ANTHROPIC_API_KEY", "Anthropic")
        super().__init__(
            provider="anthropic",
            model=model,
            input_cost=input_cost,
            output_cost=output_cost,
            supports_temperature_top_p=supports_temperature_top_p,
            use_web_search=use_web_search,
            api_key=resolved_api_key,
            api_key_alias=api_key_alias,
        )
        self.client = anthropic.AsyncAnthropic(api_key=resolved_api_key)

    @retry(wait=wait_random_exponential(min=0.2, max=1), stop=stop_after_attempt(3))
    async def get_response(
        self,
        user_prompt: str,
        system_prompt: str | None = None,
        temperature: float = 0.3,
        top_p: float = 1.0,
    ) -> LLMResponse:
        """Get a plain text response from Anthropic."""
        if system_prompt is None:
            system_prompt = "You are a helpful assistant"
        start_time = time.time()

        messages = _anthropic_user_message(user_prompt)
        system_message = _anthropic_system_prompt(system_prompt)

        tools: list = []
        if self.use_web_search:
            tools.append({"type": "web_search_tool", "name": "web_search_20250305"})

        try:
            if self.supports_temperature_top_p:
                response_message = await self.client.messages.create(
                    model=self.model,
                    max_tokens=1024,
                    system=system_message,
                    messages=messages,
                    temperature=temperature,
                    top_p=top_p,
                    tools=tools,
                    tool_choice=ToolChoiceAutoParam(type="auto"),
                )
            else:
                response_message = await self.client.messages.create(
                    model=self.model,
                    max_tokens=1024,
                    system=system_message,
                    messages=messages,
                    tools=tools,
                    tool_choice=ToolChoiceAutoParam(type="auto"),
                )
        except anthropic.APIError as e:
            raise ProviderError(
                f"Anthropic API error: {e}",
                provider="anthropic",
                original_error=e,
            ) from e

        execution_time = time.time() - start_time
        final_response = [c.text for c in response_message.content if c.type == "text"]

        input_tokens = response_message.usage.input_tokens
        output_tokens = response_message.usage.output_tokens
        input_cost, output_cost, total_cost = self._calculate_costs(input_tokens, output_tokens)

        return LLMResponse(
            content="\n".join(final_response),
            input_tokens=input_tokens,
            output_tokens=output_tokens,
            cached_tokens=response_message.usage.cache_read_input_tokens or 0,
            input_cost=input_cost,
            output_cost=output_cost,
            total_cost=total_cost,
            response_time=execution_time,
        )

    @retry(wait=wait_random_exponential(min=0.2, max=1), stop=stop_after_attempt(3))
    async def _get_structured_response(
        self,
        response_model: type[T],
        user_prompt: str,
        system_prompt: str | None = None,
        temperature: float = 0.3,
        top_p: float = 1.0,
    ) -> LLMJSONResponse:
        """Anthropic-specific implementation using tool calling for structured outputs."""
        if self.model == "claude-sonnet-4-5-20250929" and self.use_web_search:
            return await self._get_structured_response_with_web_search(
                response_model=response_model,
                user_prompt=user_prompt,
                system_prompt=system_prompt,
            )

        schema = response_model.model_json_schema()

        tool_instruction = "Use the structured_response tool to provide your answer."
        if system_prompt is None:
            system_prompt = f"You are a helpful assistant. {tool_instruction}"
        else:
            system_prompt = f"{system_prompt}\n\n{tool_instruction}"

        messages = _anthropic_user_message(user_prompt)
        system_message = _anthropic_system_prompt(system_prompt)
        tool_desc = f"Provide a structured response using the {response_model.__name__} format"
        tools = [
            ToolParam(
                name="structured_response",
                description=tool_desc,
                input_schema=schema,
            )
        ]

        start_time = time.time()
        try:
            if self.supports_temperature_top_p:
                response_message = await self.client.messages.create(
                    model=self.model,
                    max_tokens=4096,
                    system=system_message,
                    messages=messages,
                    temperature=temperature,
                    top_p=top_p,
                    tools=tools,
                    tool_choice=ToolChoiceToolParam(type="tool", name="structured_response"),
                )
            else:
                response_message = await self.client.messages.create(
                    model=self.model,
                    max_tokens=8192,
                    system=system_message,
                    messages=messages,
                    tools=tools,
                    tool_choice=ToolChoiceToolParam(type="tool", name="structured_response"),
                )
        except anthropic.APIError as e:
            raise ProviderError(
                f"Anthropic API error: {e}",
                provider="anthropic",
                original_error=e,
            ) from e

        execution_time = time.time() - start_time

        # Extract the tool use content
        content = None
        for block in response_message.content:
            if block.type == "tool_use" and block.name == "structured_response":
                content = block.input
                break

        if content is None:
            raise ResponseParsingError(
                "No structured response tool use found in Anthropic response",
                raw_content=str(response_message.content),
            )

        input_tokens = response_message.usage.input_tokens
        output_tokens = response_message.usage.output_tokens
        input_cost, output_cost, total_cost = self._calculate_costs(input_tokens, output_tokens)

        return LLMJSONResponse(
            content=content,
            input_tokens=input_tokens,
            output_tokens=output_tokens,
            cached_tokens=response_message.usage.cache_read_input_tokens or 0,
            input_cost=input_cost,
            output_cost=output_cost,
            total_cost=total_cost,
            response_time=execution_time,
        )

    async def _get_structured_response_with_web_search(
        self,
        response_model: type[T],
        user_prompt: str,
        system_prompt: str | None = None,
    ) -> LLMJSONResponse:
        """Get structured response with web search enabled."""
        response, execution_time = await self._structured_response_with_web_search_helper(
            response_model=response_model,
            user_prompt=user_prompt,
            system_prompt=system_prompt,
        )

        content = None
        for block in response.content:
            if block.type == "tool_use" and block.name == "structured_response":
                content = block.input
                break

        if content is None:
            raise ResponseParsingError(
                "No structured response tool use found in Anthropic response",
                raw_content=str(response.content),
            )

        input_tokens = response.usage.input_tokens
        output_tokens = response.usage.output_tokens
        input_cost, output_cost, total_cost = self._calculate_costs(input_tokens, output_tokens)

        return LLMJSONResponse(
            content=content,
            input_tokens=input_tokens,
            output_tokens=output_tokens,
            cached_tokens=response.usage.cache_read_input_tokens or 0,
            input_cost=input_cost,
            output_cost=output_cost,
            total_cost=total_cost,
            response_time=execution_time,
        )

    async def _structured_response_with_web_search_helper(
        self,
        response_model: type[T],
        user_prompt: str,
        system_prompt: str | None = None,
    ) -> tuple:
        """Helper for web search with structured response."""
        schema = response_model.model_json_schema()
        structured_response_tool = ToolParam(
            name="structured_response",
            description=f"Provide a structured response using the {response_model.__name__} format",
            input_schema=schema,
        )
        web_search_tool = WebSearchTool20250305Param(
            name="web_search",
            type="web_search_20250305",
        )
        tools = [structured_response_tool, web_search_tool]

        tool_instruction = "Use the structured_response tool to provide your answer."
        if system_prompt is None:
            system_prompt = f"You are a helpful assistant. {tool_instruction}"
        else:
            system_prompt = f"{system_prompt}\n\n{tool_instruction}"

        messages = _anthropic_user_message(user_prompt)
        system_message = _anthropic_system_prompt(system_prompt)

        start_time = time.time()
        current_messages = messages.copy()
        search_count = 0

        try:
            while search_count < 3:
                response = await self.client.messages.create(
                    model=self.model,
                    max_tokens=8192,
                    system=system_message,
                    messages=current_messages,
                    tools=tools,
                    tool_choice=ToolChoiceAutoParam(type="auto"),
                )

                # Check what tool was used
                if response.stop_reason == "tool_use":
                    tool_uses = [b for b in response.content if b.type == "tool_use"]

                    # If structured_response was used, we're done!
                    if any(t.name == "structured_response" for t in tool_uses):
                        execution_time = time.time() - start_time
                        return response, execution_time

                    # If web_search was used, continue conversation
                    if any(t.name == "web_search" for t in tool_uses):
                        logger.info("Web search initiated (turn %d)", search_count + 1)
                        search_count += 1

                        # Add assistant response
                        current_messages.append({
                            "role": "assistant",
                            "content": response.content,
                        })

                        # Add continuation prompt
                        current_messages.append({
                            "role": "user",
                            "content": (
                            "Continue with your analysis. Use the structured_response "
                            "tool when ready to generate the final output."
                        ),
                        })
                        continue
                break

            final_response = await self.client.messages.create(
                model=self.model,
                max_tokens=4096,
                system=_anthropic_system_prompt(system_prompt),
                messages=current_messages,
                tools=[structured_response_tool],
                tool_choice=ToolChoiceToolParam(type="tool", name="structured_response"),
            )
        except anthropic.APIError as e:
            raise ProviderError(
                f"Anthropic API error: {e}",
                provider="anthropic",
                original_error=e,
            ) from e

        execution_time = time.time() - start_time
        return final_response, execution_time

init ¶

__init__(
    model,
    input_cost,
    output_cost,
    supports_temperature_top_p=True,
    use_web_search=False,
    *,
    api_key=None,
    api_key_alias=None,
)

Initialize the Anthropic provider.

Parameters:

Name	Type	Description	Default
`model`	`str`	The Claude model identifier (e.g., "claude-sonnet-4-20250514").	required
`input_cost`	`float`	Cost per million input tokens in USD.	required
`output_cost`	`float`	Cost per million output tokens in USD.	required
`supports_temperature_top_p`	`bool`	Whether temperature/top_p are supported.	`True`
`use_web_search`	`bool`	Enable web search (requires claude-sonnet-4-5-20250929).	`False`
`api_key`	`str \| None`	Optional API key. Defaults to `ANTHROPIC_API_KEY` env var.	`None`
`api_key_alias`	`str \| None`	Optional human-readable name for the API key.	`None`

Raises:

Type	Description
`ConfigurationError`	If no API key is provided and env var is not set.

Source code in src/majordomo_llm/providers/anthropic.py

def __init__(
    self,
    model: str,
    input_cost: float,
    output_cost: float,
    supports_temperature_top_p: bool = True,
    use_web_search: bool = False,
    *,
    api_key: str | None = None,
    api_key_alias: str | None = None,
) -> None:
    """Initialize the Anthropic provider.

    Args:
        model: The Claude model identifier (e.g., "claude-sonnet-4-20250514").
        input_cost: Cost per million input tokens in USD.
        output_cost: Cost per million output tokens in USD.
        supports_temperature_top_p: Whether temperature/top_p are supported.
        use_web_search: Enable web search (requires claude-sonnet-4-5-20250929).
        api_key: Optional API key. Defaults to ``ANTHROPIC_API_KEY`` env var.
        api_key_alias: Optional human-readable name for the API key.

    Raises:
        ConfigurationError: If no API key is provided and env var is not set.
    """
    resolved_api_key = resolve_api_key(api_key, "ANTHROPIC_API_KEY", "Anthropic")
    super().__init__(
        provider="anthropic",
        model=model,
        input_cost=input_cost,
        output_cost=output_cost,
        supports_temperature_top_p=supports_temperature_top_p,
        use_web_search=use_web_search,
        api_key=resolved_api_key,
        api_key_alias=api_key_alias,
    )
    self.client = anthropic.AsyncAnthropic(api_key=resolved_api_key)

get_response `async` ¶

get_response(
    user_prompt,
    system_prompt=None,
    temperature=0.3,
    top_p=1.0,
)

Get a plain text response from Anthropic.

Source code in src/majordomo_llm/providers/anthropic.py

@retry(wait=wait_random_exponential(min=0.2, max=1), stop=stop_after_attempt(3))
async def get_response(
    self,
    user_prompt: str,
    system_prompt: str | None = None,
    temperature: float = 0.3,
    top_p: float = 1.0,
) -> LLMResponse:
    """Get a plain text response from Anthropic."""
    if system_prompt is None:
        system_prompt = "You are a helpful assistant"
    start_time = time.time()

    messages = _anthropic_user_message(user_prompt)
    system_message = _anthropic_system_prompt(system_prompt)

    tools: list = []
    if self.use_web_search:
        tools.append({"type": "web_search_tool", "name": "web_search_20250305"})

    try:
        if self.supports_temperature_top_p:
            response_message = await self.client.messages.create(
                model=self.model,
                max_tokens=1024,
                system=system_message,
                messages=messages,
                temperature=temperature,
                top_p=top_p,
                tools=tools,
                tool_choice=ToolChoiceAutoParam(type="auto"),
            )
        else:
            response_message = await self.client.messages.create(
                model=self.model,
                max_tokens=1024,
                system=system_message,
                messages=messages,
                tools=tools,
                tool_choice=ToolChoiceAutoParam(type="auto"),
            )
    except anthropic.APIError as e:
        raise ProviderError(
            f"Anthropic API error: {e}",
            provider="anthropic",
            original_error=e,
        ) from e

    execution_time = time.time() - start_time
    final_response = [c.text for c in response_message.content if c.type == "text"]

    input_tokens = response_message.usage.input_tokens
    output_tokens = response_message.usage.output_tokens
    input_cost, output_cost, total_cost = self._calculate_costs(input_tokens, output_tokens)

    return LLMResponse(
        content="\n".join(final_response),
        input_tokens=input_tokens,
        output_tokens=output_tokens,
        cached_tokens=response_message.usage.cache_read_input_tokens or 0,
        input_cost=input_cost,
        output_cost=output_cost,
        total_cost=total_cost,
        response_time=execution_time,
    )

Bases: LLM

Google Gemini LLM provider.

Implements the LLM interface for Google's Gemini models, including support for structured outputs via response schemas.

The API key is read from the GEMINI_API_KEY environment variable.

Attributes:

Name	Type	Description
`client`		The Google GenAI client instance.

Example

llm = Gemini( ... model="gemini-2.5-flash", ... input_cost=0.30, ... output_cost=2.50, ... ) response = await llm.get_response("Hello, Gemini!")

Source code in src/majordomo_llm/providers/gemini.py

class Gemini(LLM):
    """Google Gemini LLM provider.

    Implements the LLM interface for Google's Gemini models, including
    support for structured outputs via response schemas.

    The API key is read from the ``GEMINI_API_KEY`` environment variable.

    Attributes:
        client: The Google GenAI client instance.

    Example:
        >>> llm = Gemini(
        ...     model="gemini-2.5-flash",
        ...     input_cost=0.30,
        ...     output_cost=2.50,
        ... )
        >>> response = await llm.get_response("Hello, Gemini!")
    """

    def __init__(
        self,
        model: str,
        input_cost: float,
        output_cost: float,
        *,
        api_key: str | None = None,
        api_key_alias: str | None = None,
    ) -> None:
        """Initialize the Gemini provider.

        Args:
            model: The Gemini model identifier (e.g., "gemini-2.5-flash").
            input_cost: Cost per million input tokens in USD.
            output_cost: Cost per million output tokens in USD.
            api_key: Optional API key. Defaults to ``GEMINI_API_KEY`` env var.
            api_key_alias: Optional human-readable name for the API key.

        Raises:
            ConfigurationError: If no API key is provided and env var is not set.
        """
        resolved_api_key = resolve_api_key(api_key, "GEMINI_API_KEY", "Gemini")
        super().__init__(
            provider="gemini",
            model=model,
            input_cost=input_cost,
            output_cost=output_cost,
            supports_temperature_top_p=True,
            api_key=resolved_api_key,
            api_key_alias=api_key_alias,
        )
        self.client = genai.Client(api_key=resolved_api_key)

    @retry(wait=wait_random_exponential(min=0.2, max=1), stop=stop_after_attempt(3))
    async def get_response(
        self,
        user_prompt: str,
        system_prompt: str | None = None,
        temperature: float = 0.3,
        top_p: float = 1.0,
    ) -> LLMResponse:
        """Get a plain text response from Gemini."""
        return await self._get_response(user_prompt, system_prompt, temperature, top_p)

    async def _get_response(
        self,
        user_prompt: str,
        system_prompt: str | None = None,
        temperature: float = 0.3,
        top_p: float = 1.0,
    ) -> LLMResponse:
        """Internal method to get a response from Gemini."""
        start_time = time.time()
        try:
            response = await self.client.aio.models.generate_content(
                model=self.model,
                config=types.GenerateContentConfig(
                    system_instruction=system_prompt,
                    temperature=temperature,
                    top_p=top_p,
                ),
                contents=user_prompt,
            )
        except genai_errors.APIError as e:
            raise ProviderError(
                f"Gemini API error: {e}",
                provider="gemini",
                original_error=e,
            ) from e
        execution_time = time.time() - start_time

        input_tokens = response.usage_metadata.prompt_token_count
        output_tokens = response.usage_metadata.candidates_token_count
        input_cost, output_cost, total_cost = self._calculate_costs(input_tokens, output_tokens)

        return LLMResponse(
            content=response.text,
            input_tokens=input_tokens,
            output_tokens=output_tokens,
            cached_tokens=0,
            input_cost=input_cost,
            output_cost=output_cost,
            total_cost=total_cost,
            response_time=execution_time,
        )

    async def _get_structured_response(
        self,
        response_model: type[T],
        user_prompt: str,
        system_prompt: str | None = None,
        temperature: float = 0.3,
        top_p: float = 1.0,
    ) -> LLMJSONResponse:
        """Gemini-specific implementation using response schema for structured outputs."""
        schema = response_model.model_json_schema()

        start_time = time.time()
        try:
            response = await self.client.aio.models.generate_content(
                model=self.model,
                config=types.GenerateContentConfig(
                    system_instruction=system_prompt,
                    temperature=temperature,
                    top_p=top_p,
                    response_schema=schema,
                    response_mime_type="application/json",
                ),
                contents=user_prompt,
            )
        except genai_errors.APIError as e:
            raise ProviderError(
                f"Gemini API error: {e}",
                provider="gemini",
                original_error=e,
            ) from e
        execution_time = time.time() - start_time

        try:
            content = json.loads(response.text)
        except json.JSONDecodeError as e:
            raise ResponseParsingError(
                f"Failed to parse JSON response: {e}",
                raw_content=response.text,
            ) from e
        input_tokens = response.usage_metadata.prompt_token_count
        output_tokens = response.usage_metadata.candidates_token_count
        input_cost, output_cost, total_cost = self._calculate_costs(input_tokens, output_tokens)

        return LLMJSONResponse(
            content=content,
            input_tokens=input_tokens,
            output_tokens=output_tokens,
            cached_tokens=0,
            input_cost=input_cost,
            output_cost=output_cost,
            total_cost=total_cost,
            response_time=execution_time,
        )

init ¶

__init__(
    model,
    input_cost,
    output_cost,
    *,
    api_key=None,
    api_key_alias=None,
)

Initialize the Gemini provider.

Parameters:

Name	Type	Description	Default
`model`	`str`	The Gemini model identifier (e.g., "gemini-2.5-flash").	required
`input_cost`	`float`	Cost per million input tokens in USD.	required
`output_cost`	`float`	Cost per million output tokens in USD.	required
`api_key`	`str \| None`	Optional API key. Defaults to `GEMINI_API_KEY` env var.	`None`
`api_key_alias`	`str \| None`	Optional human-readable name for the API key.	`None`

Raises:

Type	Description
`ConfigurationError`	If no API key is provided and env var is not set.

Source code in src/majordomo_llm/providers/gemini.py

def __init__(
    self,
    model: str,
    input_cost: float,
    output_cost: float,
    *,
    api_key: str | None = None,
    api_key_alias: str | None = None,
) -> None:
    """Initialize the Gemini provider.

    Args:
        model: The Gemini model identifier (e.g., "gemini-2.5-flash").
        input_cost: Cost per million input tokens in USD.
        output_cost: Cost per million output tokens in USD.
        api_key: Optional API key. Defaults to ``GEMINI_API_KEY`` env var.
        api_key_alias: Optional human-readable name for the API key.

    Raises:
        ConfigurationError: If no API key is provided and env var is not set.
    """
    resolved_api_key = resolve_api_key(api_key, "GEMINI_API_KEY", "Gemini")
    super().__init__(
        provider="gemini",
        model=model,
        input_cost=input_cost,
        output_cost=output_cost,
        supports_temperature_top_p=True,
        api_key=resolved_api_key,
        api_key_alias=api_key_alias,
    )
    self.client = genai.Client(api_key=resolved_api_key)

get_response `async` ¶

get_response(
    user_prompt,
    system_prompt=None,
    temperature=0.3,
    top_p=1.0,
)

Get a plain text response from Gemini.

Source code in src/majordomo_llm/providers/gemini.py

@retry(wait=wait_random_exponential(min=0.2, max=1), stop=stop_after_attempt(3))
async def get_response(
    self,
    user_prompt: str,
    system_prompt: str | None = None,
    temperature: float = 0.3,
    top_p: float = 1.0,
) -> LLMResponse:
    """Get a plain text response from Gemini."""
    return await self._get_response(user_prompt, system_prompt, temperature, top_p)

Bases: LLM

DeepSeek LLM provider.

Implements the LLM interface for DeepSeek's models using the OpenAI-compatible API. Supports both DeepSeek-V3 (chat) and DeepSeek-R1 (reasoner) models.

The API key is read from the DEEPSEEK_API_KEY environment variable.

Attributes:

Name	Type	Description
`client`		The async OpenAI client instance configured for DeepSeek.

Example

llm = DeepSeek( ... model="deepseek-chat", ... input_cost=0.28, ... output_cost=0.42, ... ) response = await llm.get_response("Hello, DeepSeek!")

Source code in src/majordomo_llm/providers/deepseek.py

class DeepSeek(LLM):
    """DeepSeek LLM provider.

    Implements the LLM interface for DeepSeek's models using the OpenAI-compatible
    API. Supports both DeepSeek-V3 (chat) and DeepSeek-R1 (reasoner) models.

    The API key is read from the ``DEEPSEEK_API_KEY`` environment variable.

    Attributes:
        client: The async OpenAI client instance configured for DeepSeek.

    Example:
        >>> llm = DeepSeek(
        ...     model="deepseek-chat",
        ...     input_cost=0.28,
        ...     output_cost=0.42,
        ... )
        >>> response = await llm.get_response("Hello, DeepSeek!")
    """

    DEEPSEEK_BASE_URL = "https://api.deepseek.com"

    def __init__(
        self,
        model: str,
        input_cost: float,
        output_cost: float,
        supports_temperature_top_p: bool = True,
        *,
        api_key: str | None = None,
        api_key_alias: str | None = None,
    ) -> None:
        """Initialize the DeepSeek provider.

        Args:
            model: The DeepSeek model identifier (e.g., "deepseek-chat", "deepseek-reasoner").
            input_cost: Cost per million input tokens in USD.
            output_cost: Cost per million output tokens in USD.
            supports_temperature_top_p: Whether temperature/top_p are supported.
            api_key: Optional API key. Defaults to ``DEEPSEEK_API_KEY`` env var.
            api_key_alias: Optional human-readable name for the API key.

        Raises:
            ConfigurationError: If no API key is provided and env var is not set.
        """
        resolved_api_key = resolve_api_key(api_key, "DEEPSEEK_API_KEY", "DeepSeek")
        super().__init__(
            provider="deepseek",
            model=model,
            input_cost=input_cost,
            output_cost=output_cost,
            supports_temperature_top_p=supports_temperature_top_p,
            api_key=resolved_api_key,
            api_key_alias=api_key_alias,
        )
        self.client = openai.AsyncOpenAI(
            api_key=resolved_api_key,
            base_url=self.DEEPSEEK_BASE_URL,
        )

    @retry(wait=wait_random_exponential(min=0.2, max=1), stop=stop_after_attempt(3))
    async def get_response(
        self,
        user_prompt: str,
        system_prompt: str | None = None,
        temperature: float = 0.3,
        top_p: float = 1.0,
    ) -> LLMResponse:
        """Get a plain text response from DeepSeek."""
        return await self._get_response(user_prompt, system_prompt, temperature, top_p)

    async def _get_response(
        self,
        user_prompt: str,
        system_prompt: str | None = None,
        temperature: float = 0.3,
        top_p: float = 1.0,
    ) -> LLMResponse:
        """Internal method to get a response from DeepSeek."""
        messages = []
        if system_prompt:
            messages.append({"role": "system", "content": system_prompt})
        messages.append({"role": "user", "content": user_prompt})

        start_time = time.time()
        try:
            if self.supports_temperature_top_p:
                response = await self.client.chat.completions.create(
                    model=self.model,
                    messages=messages,
                    temperature=temperature,
                    top_p=top_p,
                )
            else:
                response = await self.client.chat.completions.create(
                    model=self.model,
                    messages=messages,
                )
        except openai.APIError as e:
            raise ProviderError(
                f"DeepSeek API error: {e}",
                provider="deepseek",
                original_error=e,
            ) from e

        execution_time = time.time() - start_time
        input_tokens = response.usage.prompt_tokens
        output_tokens = response.usage.completion_tokens
        cached_tokens = getattr(
            getattr(response.usage, "prompt_tokens_details", None),
            "cached_tokens",
            0,
        ) or 0
        input_cost, output_cost, total_cost = self._calculate_costs(input_tokens, output_tokens)

        return LLMResponse(
            content=response.choices[0].message.content,
            input_tokens=input_tokens,
            output_tokens=output_tokens,
            cached_tokens=cached_tokens,
            input_cost=input_cost,
            output_cost=output_cost,
            total_cost=total_cost,
            response_time=execution_time,
        )

    async def _get_structured_response(
        self,
        response_model: type[T],
        user_prompt: str,
        system_prompt: str | None = None,
        temperature: float = 0.3,
        top_p: float = 1.0,
    ) -> LLMJSONResponse:
        """DeepSeek-specific implementation using JSON mode for structured outputs."""
        schema = response_model.model_json_schema()
        combined_system_prompt = build_schema_prompt(schema, system_prompt)

        messages = [
            {"role": "system", "content": combined_system_prompt},
            {"role": "user", "content": user_prompt},
        ]

        start_time = time.time()
        try:
            if self.supports_temperature_top_p:
                response = await self.client.chat.completions.create(
                    model=self.model,
                    messages=messages,
                    temperature=temperature,
                    top_p=top_p,
                    response_format={"type": "json_object"},
                )
            else:
                response = await self.client.chat.completions.create(
                    model=self.model,
                    messages=messages,
                    response_format={"type": "json_object"},
                )
        except openai.APIError as e:
            raise ProviderError(
                f"DeepSeek API error: {e}",
                provider="deepseek",
                original_error=e,
            ) from e

        execution_time = time.time() - start_time

        try:
            content = json.loads(response.choices[0].message.content)
        except json.JSONDecodeError as e:
            raise ResponseParsingError(
                f"Failed to parse JSON response: {e}",
                raw_content=response.choices[0].message.content,
            ) from e

        input_tokens = response.usage.prompt_tokens
        output_tokens = response.usage.completion_tokens
        cached_tokens = getattr(
            getattr(response.usage, "prompt_tokens_details", None),
            "cached_tokens",
            0,
        ) or 0
        input_cost, output_cost, total_cost = self._calculate_costs(input_tokens, output_tokens)

        return LLMJSONResponse(
            content=content,
            input_tokens=input_tokens,
            output_tokens=output_tokens,
            cached_tokens=cached_tokens,
            input_cost=input_cost,
            output_cost=output_cost,
            total_cost=total_cost,
            response_time=execution_time,
        )

init ¶

__init__(
    model,
    input_cost,
    output_cost,
    supports_temperature_top_p=True,
    *,
    api_key=None,
    api_key_alias=None,
)

Initialize the DeepSeek provider.

Parameters:

Name	Type	Description	Default
`model`	`str`	The DeepSeek model identifier (e.g., "deepseek-chat", "deepseek-reasoner").	required
`input_cost`	`float`	Cost per million input tokens in USD.	required
`output_cost`	`float`	Cost per million output tokens in USD.	required
`supports_temperature_top_p`	`bool`	Whether temperature/top_p are supported.	`True`
`api_key`	`str \| None`	Optional API key. Defaults to `DEEPSEEK_API_KEY` env var.	`None`
`api_key_alias`	`str \| None`	Optional human-readable name for the API key.	`None`

Raises:

Type	Description
`ConfigurationError`	If no API key is provided and env var is not set.

Source code in src/majordomo_llm/providers/deepseek.py

def __init__(
    self,
    model: str,
    input_cost: float,
    output_cost: float,
    supports_temperature_top_p: bool = True,
    *,
    api_key: str | None = None,
    api_key_alias: str | None = None,
) -> None:
    """Initialize the DeepSeek provider.

    Args:
        model: The DeepSeek model identifier (e.g., "deepseek-chat", "deepseek-reasoner").
        input_cost: Cost per million input tokens in USD.
        output_cost: Cost per million output tokens in USD.
        supports_temperature_top_p: Whether temperature/top_p are supported.
        api_key: Optional API key. Defaults to ``DEEPSEEK_API_KEY`` env var.
        api_key_alias: Optional human-readable name for the API key.

    Raises:
        ConfigurationError: If no API key is provided and env var is not set.
    """
    resolved_api_key = resolve_api_key(api_key, "DEEPSEEK_API_KEY", "DeepSeek")
    super().__init__(
        provider="deepseek",
        model=model,
        input_cost=input_cost,
        output_cost=output_cost,
        supports_temperature_top_p=supports_temperature_top_p,
        api_key=resolved_api_key,
        api_key_alias=api_key_alias,
    )
    self.client = openai.AsyncOpenAI(
        api_key=resolved_api_key,
        base_url=self.DEEPSEEK_BASE_URL,
    )

get_response `async` ¶

get_response(
    user_prompt,
    system_prompt=None,
    temperature=0.3,
    top_p=1.0,
)

Get a plain text response from DeepSeek.

Source code in src/majordomo_llm/providers/deepseek.py

@retry(wait=wait_random_exponential(min=0.2, max=1), stop=stop_after_attempt(3))
async def get_response(
    self,
    user_prompt: str,
    system_prompt: str | None = None,
    temperature: float = 0.3,
    top_p: float = 1.0,
) -> LLMResponse:
    """Get a plain text response from DeepSeek."""
    return await self._get_response(user_prompt, system_prompt, temperature, top_p)

Bases: LLM

Cohere LLM provider.

Implements the LLM interface for Cohere's models using the V2 API. Supports Command A, Command R+, Command R, and Command R7B models.

The API key is read from the CO_API_KEY environment variable.

Attributes:

Name	Type	Description
`client`		The async Cohere client instance.

Example

llm = Cohere( ... model="command-a-03-2025", ... input_cost=2.50, ... output_cost=10.00, ... ) response = await llm.get_response("Hello, Cohere!")

Source code in src/majordomo_llm/providers/cohere.py

class Cohere(LLM):
    """Cohere LLM provider.

    Implements the LLM interface for Cohere's models using the V2 API.
    Supports Command A, Command R+, Command R, and Command R7B models.

    The API key is read from the ``CO_API_KEY`` environment variable.

    Attributes:
        client: The async Cohere client instance.

    Example:
        >>> llm = Cohere(
        ...     model="command-a-03-2025",
        ...     input_cost=2.50,
        ...     output_cost=10.00,
        ... )
        >>> response = await llm.get_response("Hello, Cohere!")
    """

    def __init__(
        self,
        model: str,
        input_cost: float,
        output_cost: float,
        supports_temperature_top_p: bool = True,
        *,
        api_key: str | None = None,
        api_key_alias: str | None = None,
    ) -> None:
        """Initialize the Cohere provider.

        Args:
            model: The Cohere model identifier (e.g., "command-a-03-2025").
            input_cost: Cost per million input tokens in USD.
            output_cost: Cost per million output tokens in USD.
            supports_temperature_top_p: Whether temperature/top_p are supported.
            api_key: Optional API key. Defaults to ``CO_API_KEY`` env var.
            api_key_alias: Optional human-readable name for the API key.

        Raises:
            ConfigurationError: If no API key is provided and env var is not set.
        """
        resolved_api_key = resolve_api_key(api_key, "CO_API_KEY", "Cohere")
        super().__init__(
            provider="cohere",
            model=model,
            input_cost=input_cost,
            output_cost=output_cost,
            supports_temperature_top_p=supports_temperature_top_p,
            api_key=resolved_api_key,
            api_key_alias=api_key_alias,
        )
        self.client = cohere.AsyncClientV2(api_key=resolved_api_key)

    @retry(wait=wait_random_exponential(min=0.2, max=1), stop=stop_after_attempt(3))
    async def get_response(
        self,
        user_prompt: str,
        system_prompt: str | None = None,
        temperature: float = 0.3,
        top_p: float = 1.0,
    ) -> LLMResponse:
        """Get a plain text response from Cohere."""
        return await self._get_response(user_prompt, system_prompt, temperature, top_p)

    async def _get_response(
        self,
        user_prompt: str,
        system_prompt: str | None = None,
        temperature: float = 0.3,
        top_p: float = 1.0,
    ) -> LLMResponse:
        """Internal method to get a response from Cohere."""
        messages = []
        if system_prompt:
            messages.append({"role": "system", "content": system_prompt})
        messages.append({"role": "user", "content": user_prompt})

        start_time = time.time()
        try:
            if self.supports_temperature_top_p:
                response = await self.client.chat(
                    model=self.model,
                    messages=messages,
                    temperature=temperature,
                    p=top_p,
                )
            else:
                response = await self.client.chat(
                    model=self.model,
                    messages=messages,
                )
        except cohere.core.api_error.ApiError as e:
            raise ProviderError(
                f"Cohere API error: {e}",
                provider="cohere",
                original_error=e,
            ) from e

        execution_time = time.time() - start_time
        input_tokens = response.usage.tokens.input_tokens
        output_tokens = response.usage.tokens.output_tokens
        cached_tokens = 0
        input_cost, output_cost, total_cost = self._calculate_costs(input_tokens, output_tokens)

        return LLMResponse(
            content=response.message.content[0].text,
            input_tokens=input_tokens,
            output_tokens=output_tokens,
            cached_tokens=cached_tokens,
            input_cost=input_cost,
            output_cost=output_cost,
            total_cost=total_cost,
            response_time=execution_time,
        )

    async def _get_structured_response(
        self,
        response_model: type[T],
        user_prompt: str,
        system_prompt: str | None = None,
        temperature: float = 0.3,
        top_p: float = 1.0,
    ) -> LLMJSONResponse:
        """Cohere-specific implementation using JSON mode for structured outputs.

        Uses prompt-based schema injection with json_object mode since Cohere's
        json_schema validation doesn't support all JSON Schema constraints
        (e.g., minimum/maximum for numbers, enum values).

        The schema is flattened via inline_schema_refs() to remove $defs/$ref
        which Cohere's model handles poorly.
        """
        schema = response_model.model_json_schema()
        # Inline $refs to flatten the schema - Cohere struggles with $defs/$ref
        schema = inline_schema_refs(schema)
        combined_system_prompt = build_schema_prompt(schema, system_prompt)

        messages = [
            {"role": "system", "content": combined_system_prompt},
            {"role": "user", "content": user_prompt},
        ]

        start_time = time.time()
        try:
            if self.supports_temperature_top_p:
                response = await self.client.chat(
                    model=self.model,
                    messages=messages,
                    temperature=temperature,
                    p=top_p,
                    response_format={"type": "json_object"},
                )
            else:
                response = await self.client.chat(
                    model=self.model,
                    messages=messages,
                    response_format={"type": "json_object"},
                )
        except cohere.core.api_error.ApiError as e:
            raise ProviderError(
                f"Cohere API error: {e}",
                provider="cohere",
                original_error=e,
            ) from e

        execution_time = time.time() - start_time

        try:
            content = json.loads(response.message.content[0].text)
        except json.JSONDecodeError as e:
            raise ResponseParsingError(
                f"Failed to parse JSON response: {e}",
                raw_content=response.message.content[0].text,
            ) from e

        input_tokens = response.usage.tokens.input_tokens
        output_tokens = response.usage.tokens.output_tokens
        cached_tokens = 0
        input_cost, output_cost, total_cost = self._calculate_costs(input_tokens, output_tokens)

        return LLMJSONResponse(
            content=content,
            input_tokens=input_tokens,
            output_tokens=output_tokens,
            cached_tokens=cached_tokens,
            input_cost=input_cost,
            output_cost=output_cost,
            total_cost=total_cost,
            response_time=execution_time,
        )

init ¶

__init__(
    model,
    input_cost,
    output_cost,
    supports_temperature_top_p=True,
    *,
    api_key=None,
    api_key_alias=None,
)

Initialize the Cohere provider.

Parameters:

Name	Type	Description	Default
`model`	`str`	The Cohere model identifier (e.g., "command-a-03-2025").	required
`input_cost`	`float`	Cost per million input tokens in USD.	required
`output_cost`	`float`	Cost per million output tokens in USD.	required
`supports_temperature_top_p`	`bool`	Whether temperature/top_p are supported.	`True`
`api_key`	`str \| None`	Optional API key. Defaults to `CO_API_KEY` env var.	`None`
`api_key_alias`	`str \| None`	Optional human-readable name for the API key.	`None`

Raises:

Type	Description
`ConfigurationError`	If no API key is provided and env var is not set.

Source code in src/majordomo_llm/providers/cohere.py

def __init__(
    self,
    model: str,
    input_cost: float,
    output_cost: float,
    supports_temperature_top_p: bool = True,
    *,
    api_key: str | None = None,
    api_key_alias: str | None = None,
) -> None:
    """Initialize the Cohere provider.

    Args:
        model: The Cohere model identifier (e.g., "command-a-03-2025").
        input_cost: Cost per million input tokens in USD.
        output_cost: Cost per million output tokens in USD.
        supports_temperature_top_p: Whether temperature/top_p are supported.
        api_key: Optional API key. Defaults to ``CO_API_KEY`` env var.
        api_key_alias: Optional human-readable name for the API key.

    Raises:
        ConfigurationError: If no API key is provided and env var is not set.
    """
    resolved_api_key = resolve_api_key(api_key, "CO_API_KEY", "Cohere")
    super().__init__(
        provider="cohere",
        model=model,
        input_cost=input_cost,
        output_cost=output_cost,
        supports_temperature_top_p=supports_temperature_top_p,
        api_key=resolved_api_key,
        api_key_alias=api_key_alias,
    )
    self.client = cohere.AsyncClientV2(api_key=resolved_api_key)

get_response `async` ¶

get_response(
    user_prompt,
    system_prompt=None,
    temperature=0.3,
    top_p=1.0,
)

Get a plain text response from Cohere.

Source code in src/majordomo_llm/providers/cohere.py

@retry(wait=wait_random_exponential(min=0.2, max=1), stop=stop_after_attempt(3))
async def get_response(
    self,
    user_prompt: str,
    system_prompt: str | None = None,
    temperature: float = 0.3,
    top_p: float = 1.0,
) -> LLMResponse:
    """Get a plain text response from Cohere."""
    return await self._get_response(user_prompt, system_prompt, temperature, top_p)

Providers¶

__init__ ¶

get_response async ¶

__init__ ¶

get_response async ¶

__init__ ¶

get_response async ¶

__init__ ¶

get_response async ¶

__init__ ¶

get_response async ¶

init ¶

get_response `async` ¶

init ¶

get_response `async` ¶

init ¶

get_response `async` ¶

init ¶

get_response `async` ¶

init ¶

get_response `async` ¶