Factory & Cascade¶

Create an LLM instance for the specified provider and model.

This is the primary factory function for creating LLM instances. It handles provider-specific initialization and configuration lookup.

Parameters:

Name	Type	Description	Default
`provider`	`str`	The LLM provider name. One of: "openai", "anthropic", "gemini", "deepseek", "cohere".	required
`model`	`str`	The model identifier (e.g., "gpt-4o", "claude-sonnet-4-20250514").	required

Returns:

Type	Description
`LLM`	An LLM instance configured for the specified provider and model.

Raises:

Type	Description
`ConfigurationError`	If the provider or model is not recognized.

Example

llm = get_llm_instance("anthropic", "claude-sonnet-4-20250514") response = await llm.get_response("Hello!")

Source code in src/majordomo_llm/factory.py

def get_llm_instance(provider: str, model: str) -> LLM:
    """Create an LLM instance for the specified provider and model.

    This is the primary factory function for creating LLM instances. It handles
    provider-specific initialization and configuration lookup.

    Args:
        provider: The LLM provider name. One of: "openai", "anthropic", "gemini",
            "deepseek", "cohere".
        model: The model identifier (e.g., "gpt-4o", "claude-sonnet-4-20250514").

    Returns:
        An LLM instance configured for the specified provider and model.

    Raises:
        ConfigurationError: If the provider or model is not recognized.

    Example:
        >>> llm = get_llm_instance("anthropic", "claude-sonnet-4-20250514")
        >>> response = await llm.get_response("Hello!")
    """
    llm_config_entry = LLM_CONFIG.get(provider)
    if llm_config_entry is None:
        available = ", ".join(LLM_CONFIG.keys())
        raise ConfigurationError(f"Unknown LLM provider '{provider}'. Available: {available}")

    llm_models = llm_config_entry["models"]
    model_attributes = llm_models.get(model)
    if model_attributes is None:
        available = ", ".join(llm_models.keys())
        raise ConfigurationError(
            f"Unknown model '{model}' for provider '{provider}'. Available: {available}"
        )

    if provider == "openai":
        return OpenAI(
            model=model,
            input_cost=model_attributes["input_cost"],
            output_cost=model_attributes["output_cost"],
            supports_temperature_top_p=model_attributes.get("supports_temperature_top_p", True),
        )
    elif provider == "anthropic":
        return Anthropic(
            model=model,
            input_cost=model_attributes["input_cost"],
            output_cost=model_attributes["output_cost"],
            supports_temperature_top_p=model_attributes.get("supports_temperature_top_p", True),
        )
    elif provider == "gemini":
        return Gemini(
            model=model,
            input_cost=model_attributes["input_cost"],
            output_cost=model_attributes["output_cost"],
        )
    elif provider == "deepseek":
        return DeepSeek(
            model=model,
            input_cost=model_attributes["input_cost"],
            output_cost=model_attributes["output_cost"],
            supports_temperature_top_p=model_attributes.get("supports_temperature_top_p", True),
        )
    elif provider == "cohere":
        return Cohere(
            model=model,
            input_cost=model_attributes["input_cost"],
            output_cost=model_attributes["output_cost"],
            supports_temperature_top_p=model_attributes.get("supports_temperature_top_p", True),
        )
    else:
        raise ConfigurationError(f"Unknown LLM provider '{provider}'")

Create LLM instances for all configured providers and models.

Yields LLM instances one at a time, which is useful for initialization or testing all available models.

Yields:

Type	Description
`LLM`	LLM instances for each configured provider/model combination.

Example

for llm in get_all_llm_instances(): ... print(llm.get_full_model_name())

Source code in src/majordomo_llm/factory.py

def get_all_llm_instances() -> Iterator[LLM]:
    """Create LLM instances for all configured providers and models.

    Yields LLM instances one at a time, which is useful for initialization
    or testing all available models.

    Yields:
        LLM instances for each configured provider/model combination.

    Example:
        >>> for llm in get_all_llm_instances():
        ...     print(llm.get_full_model_name())
    """
    for provider, provider_config in LLM_CONFIG.items():
        models = provider_config.get("models", {})
        for model in models:
            logger.debug("Creating LLM instance: %s/%s", provider, model)
            yield get_llm_instance(provider, model)

Bases: LLM

LLM wrapper that tries multiple providers in priority order.

When a provider fails with a ProviderError, the next provider in the cascade is tried. This provides automatic failover for resilience.

The providers list defines priority order - first provider is tried first.

Attributes:

Name	Type	Description
`llms`		List of LLM instances in priority order.

Example

cascade = LLMCascade([ ... ("anthropic", "claude-sonnet-4-20250514"), # Primary ... ("openai", "gpt-4o"), # First fallback ... ("gemini", "gemini-2.5-flash"), # Last resort ... ]) response = await cascade.get_response("Hello!")

Source code in src/majordomo_llm/cascade.py

class LLMCascade(LLM):
    """LLM wrapper that tries multiple providers in priority order.

    When a provider fails with a ProviderError, the next provider in the
    cascade is tried. This provides automatic failover for resilience.

    The providers list defines priority order - first provider is tried first.

    Attributes:
        llms: List of LLM instances in priority order.

    Example:
        >>> cascade = LLMCascade([
        ...     ("anthropic", "claude-sonnet-4-20250514"),  # Primary
        ...     ("openai", "gpt-4o"),                       # First fallback
        ...     ("gemini", "gemini-2.5-flash"),             # Last resort
        ... ])
        >>> response = await cascade.get_response("Hello!")
    """

    def __init__(self, providers: list[tuple[str, str]]) -> None:
        """Initialize the cascade with a list of providers.

        Args:
            providers: List of (provider, model) tuples in priority order.
                First provider is tried first.

        Raises:
            ValueError: If providers list is empty.
        """
        if not providers:
            raise ValueError("LLMCascade requires at least one provider")

        self.llms = [get_llm_instance(p, m) for p, m in providers]

        # Use primary provider's attributes for metadata
        primary = self.llms[0]
        super().__init__(
            provider="cascade",
            model=primary.model,
            input_cost=primary.input_cost,
            output_cost=primary.output_cost,
            supports_temperature_top_p=primary.supports_temperature_top_p,
        )

    async def get_response(
        self,
        user_prompt: str,
        system_prompt: str | None = None,
        temperature: float = 0.3,
        top_p: float = 1.0,
    ) -> LLMResponse:
        """Get a response, falling back to next provider on failure."""
        return await self._cascade_call(
            "get_response",
            user_prompt=user_prompt,
            system_prompt=system_prompt,
            temperature=temperature,
            top_p=top_p,
        )

    async def get_json_response(
        self,
        user_prompt: str,
        system_prompt: str | None = None,
        temperature: float = 0.3,
        top_p: float = 1.0,
    ) -> LLMJSONResponse:
        """Get a JSON response, falling back to next provider on failure."""
        return await self._cascade_call(
            "get_json_response",
            user_prompt=user_prompt,
            system_prompt=system_prompt,
            temperature=temperature,
            top_p=top_p,
        )

    async def _get_structured_response(
        self,
        response_model: type[T],
        user_prompt: str,
        system_prompt: str | None = None,
        temperature: float = 0.3,
        top_p: float = 1.0,
    ) -> LLMJSONResponse:
        """Get a structured response, falling back to next provider on failure."""
        return await self._cascade_call(
            "get_structured_json_response",
            response_model=response_model,
            user_prompt=user_prompt,
            system_prompt=system_prompt,
            temperature=temperature,
            top_p=top_p,
        )

    async def _cascade_call(self, method_name: str, **kwargs) -> LLMResponse | LLMJSONResponse:
        """Try each provider in order until one succeeds.

        Args:
            method_name: The LLM method to call.
            **kwargs: Arguments to pass to the method.

        Returns:
            The response from the first successful provider.

        Raises:
            ProviderError: If all providers fail.
        """
        last_error: ProviderError | None = None

        for llm in self.llms:
            try:
                method = getattr(llm, method_name)
                return await method(**kwargs)
            except ProviderError as e:
                logger.warning(
                    "Provider %s/%s failed: %s. Trying next provider.",
                    llm.provider,
                    llm.model,
                    e,
                )
                last_error = e
                continue

        raise ProviderError(
            f"All providers in cascade failed. Last error: {last_error}",
            provider="cascade",
            original_error=last_error,
        )

init ¶

__init__(providers)

Initialize the cascade with a list of providers.

Parameters:

Name	Type	Description	Default
`providers`	`list[tuple[str, str]]`	List of (provider, model) tuples in priority order. First provider is tried first.	required

Raises:

Type	Description
`ValueError`	If providers list is empty.

Source code in src/majordomo_llm/cascade.py

def __init__(self, providers: list[tuple[str, str]]) -> None:
    """Initialize the cascade with a list of providers.

    Args:
        providers: List of (provider, model) tuples in priority order.
            First provider is tried first.

    Raises:
        ValueError: If providers list is empty.
    """
    if not providers:
        raise ValueError("LLMCascade requires at least one provider")

    self.llms = [get_llm_instance(p, m) for p, m in providers]

    # Use primary provider's attributes for metadata
    primary = self.llms[0]
    super().__init__(
        provider="cascade",
        model=primary.model,
        input_cost=primary.input_cost,
        output_cost=primary.output_cost,
        supports_temperature_top_p=primary.supports_temperature_top_p,
    )

get_json_response `async` ¶

get_json_response(
    user_prompt,
    system_prompt=None,
    temperature=0.3,
    top_p=1.0,
)

Get a JSON response, falling back to next provider on failure.

Source code in src/majordomo_llm/cascade.py

async def get_json_response(
    self,
    user_prompt: str,
    system_prompt: str | None = None,
    temperature: float = 0.3,
    top_p: float = 1.0,
) -> LLMJSONResponse:
    """Get a JSON response, falling back to next provider on failure."""
    return await self._cascade_call(
        "get_json_response",
        user_prompt=user_prompt,
        system_prompt=system_prompt,
        temperature=temperature,
        top_p=top_p,
    )

get_response `async` ¶

get_response(
    user_prompt,
    system_prompt=None,
    temperature=0.3,
    top_p=1.0,
)

Get a response, falling back to next provider on failure.

Source code in src/majordomo_llm/cascade.py

async def get_response(
    self,
    user_prompt: str,
    system_prompt: str | None = None,
    temperature: float = 0.3,
    top_p: float = 1.0,
) -> LLMResponse:
    """Get a response, falling back to next provider on failure."""
    return await self._cascade_call(
        "get_response",
        user_prompt=user_prompt,
        system_prompt=system_prompt,
        temperature=temperature,
        top_p=top_p,
    )

Factory & Cascade¶

__init__ ¶

get_json_response async ¶

get_response async ¶

init ¶

get_json_response `async` ¶

get_response `async` ¶