Skip to content

Providers

Bases: LLM

OpenAI LLM provider.

Implements the LLM interface for OpenAI's GPT models, including support for structured outputs via the responses.parse API.

The API key is read from the OPENAI_API_KEY environment variable.

Attributes:

Name Type Description
client

The async OpenAI client instance.

Example

llm = OpenAI( ... model="gpt-4o", ... input_cost=2.5, ... output_cost=10.0, ... ) response = await llm.get_response("Hello, GPT!")

Source code in src/majordomo_llm/providers/openai.py
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
class OpenAI(LLM):
    """OpenAI LLM provider.

    Implements the LLM interface for OpenAI's GPT models, including
    support for structured outputs via the responses.parse API.

    The API key is read from the ``OPENAI_API_KEY`` environment variable.

    Attributes:
        client: The async OpenAI client instance.

    Example:
        >>> llm = OpenAI(
        ...     model="gpt-4o",
        ...     input_cost=2.5,
        ...     output_cost=10.0,
        ... )
        >>> response = await llm.get_response("Hello, GPT!")
    """

    def __init__(
        self,
        model: str,
        input_cost: float,
        output_cost: float,
        supports_temperature_top_p: bool = True,
        *,
        api_key: str | None = None,
        api_key_alias: str | None = None,
    ) -> None:
        """Initialize the OpenAI provider.

        Args:
            model: The GPT model identifier (e.g., "gpt-4o", "gpt-5").
            input_cost: Cost per million input tokens in USD.
            output_cost: Cost per million output tokens in USD.
            supports_temperature_top_p: Whether temperature/top_p are supported.
            api_key: Optional API key. Defaults to ``OPENAI_API_KEY`` env var.
            api_key_alias: Optional human-readable name for the API key.

        Raises:
            ConfigurationError: If no API key is provided and env var is not set.
        """
        resolved_api_key = resolve_api_key(api_key, "OPENAI_API_KEY", "OpenAI")
        super().__init__(
            provider="openai",
            model=model,
            input_cost=input_cost,
            output_cost=output_cost,
            supports_temperature_top_p=supports_temperature_top_p,
            api_key=resolved_api_key,
            api_key_alias=api_key_alias,
        )
        self.client = openai.AsyncOpenAI(api_key=resolved_api_key)

    @retry(wait=wait_random_exponential(min=0.2, max=1), stop=stop_after_attempt(3))
    async def get_response(
        self,
        user_prompt: str,
        system_prompt: str | None = None,
        temperature: float = 0.3,
        top_p: float = 1.0,
    ) -> LLMResponse:
        """Get a plain text response from OpenAI."""
        return await self._get_response(user_prompt, system_prompt, temperature, top_p)

    async def _get_response(
        self,
        user_prompt: str,
        system_prompt: str | None = None,
        temperature: float = 0.3,
        top_p: float = 1.0,
    ) -> LLMResponse:
        """Internal method to get a response from OpenAI."""
        start_time = time.time()
        try:
            if self.supports_temperature_top_p:
                response = await self.client.responses.create(
                    model=self.model,
                    instructions=system_prompt,
                    input=user_prompt,
                    temperature=temperature,
                    top_p=top_p,
                )
            else:
                response = await self.client.responses.create(
                    model=self.model,
                    instructions=system_prompt,
                    input=user_prompt,
                )
        except openai.APIError as e:
            raise ProviderError(
                f"OpenAI API error: {e}",
                provider="openai",
                original_error=e,
            ) from e

        execution_time = time.time() - start_time
        input_tokens = response.usage.input_tokens
        output_tokens = response.usage.output_tokens
        input_cost, output_cost, total_cost = self._calculate_costs(input_tokens, output_tokens)

        return LLMResponse(
            content=response.output_text,
            input_tokens=input_tokens,
            output_tokens=output_tokens,
            cached_tokens=response.usage.input_tokens_details.cached_tokens,
            input_cost=input_cost,
            output_cost=output_cost,
            total_cost=total_cost,
            response_time=execution_time,
        )

    async def _get_structured_response(
        self,
        response_model: type[T],
        user_prompt: str,
        system_prompt: str | None = None,
        temperature: float = 0.3,
        top_p: float = 1.0,
    ) -> LLMJSONResponse:
        """OpenAI-specific implementation using structured outputs with JSON Schema."""
        start_time = time.time()

        try:
            if self.supports_temperature_top_p:
                response = await self.client.responses.parse(
                    model=self.model,
                    instructions=system_prompt,
                    input=user_prompt,
                    temperature=temperature,
                    top_p=top_p,
                    text_format=response_model,
                )
            else:
                response = await self.client.responses.parse(
                    model=self.model,
                    instructions=system_prompt,
                    input=user_prompt,
                    text_format=response_model,
                )
        except openai.APIError as e:
            raise ProviderError(
                f"OpenAI API error: {e}",
                provider="openai",
                original_error=e,
            ) from e

        execution_time = time.time() - start_time
        input_tokens = response.usage.input_tokens
        output_tokens = response.usage.output_tokens
        input_cost, output_cost, total_cost = self._calculate_costs(input_tokens, output_tokens)

        return LLMJSONResponse(
            content=response.output_parsed,
            input_tokens=input_tokens,
            output_tokens=output_tokens,
            cached_tokens=response.usage.input_tokens_details.cached_tokens,
            input_cost=input_cost,
            output_cost=output_cost,
            total_cost=total_cost,
            response_time=execution_time,
        )

__init__

__init__(
    model,
    input_cost,
    output_cost,
    supports_temperature_top_p=True,
    *,
    api_key=None,
    api_key_alias=None,
)

Initialize the OpenAI provider.

Parameters:

Name Type Description Default
model str

The GPT model identifier (e.g., "gpt-4o", "gpt-5").

required
input_cost float

Cost per million input tokens in USD.

required
output_cost float

Cost per million output tokens in USD.

required
supports_temperature_top_p bool

Whether temperature/top_p are supported.

True
api_key str | None

Optional API key. Defaults to OPENAI_API_KEY env var.

None
api_key_alias str | None

Optional human-readable name for the API key.

None

Raises:

Type Description
ConfigurationError

If no API key is provided and env var is not set.

Source code in src/majordomo_llm/providers/openai.py
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
def __init__(
    self,
    model: str,
    input_cost: float,
    output_cost: float,
    supports_temperature_top_p: bool = True,
    *,
    api_key: str | None = None,
    api_key_alias: str | None = None,
) -> None:
    """Initialize the OpenAI provider.

    Args:
        model: The GPT model identifier (e.g., "gpt-4o", "gpt-5").
        input_cost: Cost per million input tokens in USD.
        output_cost: Cost per million output tokens in USD.
        supports_temperature_top_p: Whether temperature/top_p are supported.
        api_key: Optional API key. Defaults to ``OPENAI_API_KEY`` env var.
        api_key_alias: Optional human-readable name for the API key.

    Raises:
        ConfigurationError: If no API key is provided and env var is not set.
    """
    resolved_api_key = resolve_api_key(api_key, "OPENAI_API_KEY", "OpenAI")
    super().__init__(
        provider="openai",
        model=model,
        input_cost=input_cost,
        output_cost=output_cost,
        supports_temperature_top_p=supports_temperature_top_p,
        api_key=resolved_api_key,
        api_key_alias=api_key_alias,
    )
    self.client = openai.AsyncOpenAI(api_key=resolved_api_key)

get_response async

get_response(
    user_prompt,
    system_prompt=None,
    temperature=0.3,
    top_p=1.0,
)

Get a plain text response from OpenAI.

Source code in src/majordomo_llm/providers/openai.py
71
72
73
74
75
76
77
78
79
80
@retry(wait=wait_random_exponential(min=0.2, max=1), stop=stop_after_attempt(3))
async def get_response(
    self,
    user_prompt: str,
    system_prompt: str | None = None,
    temperature: float = 0.3,
    top_p: float = 1.0,
) -> LLMResponse:
    """Get a plain text response from OpenAI."""
    return await self._get_response(user_prompt, system_prompt, temperature, top_p)

Bases: LLM

Anthropic (Claude) LLM provider.

Implements the LLM interface for Anthropic's Claude models, including support for tool calling for structured outputs and optional web search.

The API key is read from the ANTHROPIC_API_KEY environment variable.

Attributes:

Name Type Description
client

The async Anthropic client instance.

Example

llm = Anthropic( ... model="claude-sonnet-4-20250514", ... input_cost=3.0, ... output_cost=15.0, ... ) response = await llm.get_response("Hello, Claude!")

Source code in src/majordomo_llm/providers/anthropic.py
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
class Anthropic(LLM):
    """Anthropic (Claude) LLM provider.

    Implements the LLM interface for Anthropic's Claude models, including
    support for tool calling for structured outputs and optional web search.

    The API key is read from the ``ANTHROPIC_API_KEY`` environment variable.

    Attributes:
        client: The async Anthropic client instance.

    Example:
        >>> llm = Anthropic(
        ...     model="claude-sonnet-4-20250514",
        ...     input_cost=3.0,
        ...     output_cost=15.0,
        ... )
        >>> response = await llm.get_response("Hello, Claude!")
    """

    def __init__(
        self,
        model: str,
        input_cost: float,
        output_cost: float,
        supports_temperature_top_p: bool = True,
        use_web_search: bool = False,
        *,
        api_key: str | None = None,
        api_key_alias: str | None = None,
    ) -> None:
        """Initialize the Anthropic provider.

        Args:
            model: The Claude model identifier (e.g., "claude-sonnet-4-20250514").
            input_cost: Cost per million input tokens in USD.
            output_cost: Cost per million output tokens in USD.
            supports_temperature_top_p: Whether temperature/top_p are supported.
            use_web_search: Enable web search (requires claude-sonnet-4-5-20250929).
            api_key: Optional API key. Defaults to ``ANTHROPIC_API_KEY`` env var.
            api_key_alias: Optional human-readable name for the API key.

        Raises:
            ConfigurationError: If no API key is provided and env var is not set.
        """
        resolved_api_key = resolve_api_key(api_key, "ANTHROPIC_API_KEY", "Anthropic")
        super().__init__(
            provider="anthropic",
            model=model,
            input_cost=input_cost,
            output_cost=output_cost,
            supports_temperature_top_p=supports_temperature_top_p,
            use_web_search=use_web_search,
            api_key=resolved_api_key,
            api_key_alias=api_key_alias,
        )
        self.client = anthropic.AsyncAnthropic(api_key=resolved_api_key)

    @retry(wait=wait_random_exponential(min=0.2, max=1), stop=stop_after_attempt(3))
    async def get_response(
        self,
        user_prompt: str,
        system_prompt: str | None = None,
        temperature: float = 0.3,
        top_p: float = 1.0,
    ) -> LLMResponse:
        """Get a plain text response from Anthropic."""
        if system_prompt is None:
            system_prompt = "You are a helpful assistant"
        start_time = time.time()

        messages = _anthropic_user_message(user_prompt)
        system_message = _anthropic_system_prompt(system_prompt)

        tools: list = []
        if self.use_web_search:
            tools.append({"type": "web_search_tool", "name": "web_search_20250305"})

        try:
            if self.supports_temperature_top_p:
                response_message = await self.client.messages.create(
                    model=self.model,
                    max_tokens=1024,
                    system=system_message,
                    messages=messages,
                    temperature=temperature,
                    top_p=top_p,
                    tools=tools,
                    tool_choice=ToolChoiceAutoParam(type="auto"),
                )
            else:
                response_message = await self.client.messages.create(
                    model=self.model,
                    max_tokens=1024,
                    system=system_message,
                    messages=messages,
                    tools=tools,
                    tool_choice=ToolChoiceAutoParam(type="auto"),
                )
        except anthropic.APIError as e:
            raise ProviderError(
                f"Anthropic API error: {e}",
                provider="anthropic",
                original_error=e,
            ) from e

        execution_time = time.time() - start_time
        final_response = [c.text for c in response_message.content if c.type == "text"]

        input_tokens = response_message.usage.input_tokens
        output_tokens = response_message.usage.output_tokens
        input_cost, output_cost, total_cost = self._calculate_costs(input_tokens, output_tokens)

        return LLMResponse(
            content="\n".join(final_response),
            input_tokens=input_tokens,
            output_tokens=output_tokens,
            cached_tokens=response_message.usage.cache_read_input_tokens or 0,
            input_cost=input_cost,
            output_cost=output_cost,
            total_cost=total_cost,
            response_time=execution_time,
        )

    @retry(wait=wait_random_exponential(min=0.2, max=1), stop=stop_after_attempt(3))
    async def _get_structured_response(
        self,
        response_model: type[T],
        user_prompt: str,
        system_prompt: str | None = None,
        temperature: float = 0.3,
        top_p: float = 1.0,
    ) -> LLMJSONResponse:
        """Anthropic-specific implementation using tool calling for structured outputs."""
        if self.model == "claude-sonnet-4-5-20250929" and self.use_web_search:
            return await self._get_structured_response_with_web_search(
                response_model=response_model,
                user_prompt=user_prompt,
                system_prompt=system_prompt,
            )

        schema = response_model.model_json_schema()

        tool_instruction = "Use the structured_response tool to provide your answer."
        if system_prompt is None:
            system_prompt = f"You are a helpful assistant. {tool_instruction}"
        else:
            system_prompt = f"{system_prompt}\n\n{tool_instruction}"

        messages = _anthropic_user_message(user_prompt)
        system_message = _anthropic_system_prompt(system_prompt)
        tool_desc = f"Provide a structured response using the {response_model.__name__} format"
        tools = [
            ToolParam(
                name="structured_response",
                description=tool_desc,
                input_schema=schema,
            )
        ]

        start_time = time.time()
        try:
            if self.supports_temperature_top_p:
                response_message = await self.client.messages.create(
                    model=self.model,
                    max_tokens=4096,
                    system=system_message,
                    messages=messages,
                    temperature=temperature,
                    top_p=top_p,
                    tools=tools,
                    tool_choice=ToolChoiceToolParam(type="tool", name="structured_response"),
                )
            else:
                response_message = await self.client.messages.create(
                    model=self.model,
                    max_tokens=8192,
                    system=system_message,
                    messages=messages,
                    tools=tools,
                    tool_choice=ToolChoiceToolParam(type="tool", name="structured_response"),
                )
        except anthropic.APIError as e:
            raise ProviderError(
                f"Anthropic API error: {e}",
                provider="anthropic",
                original_error=e,
            ) from e

        execution_time = time.time() - start_time

        # Extract the tool use content
        content = None
        for block in response_message.content:
            if block.type == "tool_use" and block.name == "structured_response":
                content = block.input
                break

        if content is None:
            raise ResponseParsingError(
                "No structured response tool use found in Anthropic response",
                raw_content=str(response_message.content),
            )

        input_tokens = response_message.usage.input_tokens
        output_tokens = response_message.usage.output_tokens
        input_cost, output_cost, total_cost = self._calculate_costs(input_tokens, output_tokens)

        return LLMJSONResponse(
            content=content,
            input_tokens=input_tokens,
            output_tokens=output_tokens,
            cached_tokens=response_message.usage.cache_read_input_tokens or 0,
            input_cost=input_cost,
            output_cost=output_cost,
            total_cost=total_cost,
            response_time=execution_time,
        )

    async def _get_structured_response_with_web_search(
        self,
        response_model: type[T],
        user_prompt: str,
        system_prompt: str | None = None,
    ) -> LLMJSONResponse:
        """Get structured response with web search enabled."""
        response, execution_time = await self._structured_response_with_web_search_helper(
            response_model=response_model,
            user_prompt=user_prompt,
            system_prompt=system_prompt,
        )

        content = None
        for block in response.content:
            if block.type == "tool_use" and block.name == "structured_response":
                content = block.input
                break

        if content is None:
            raise ResponseParsingError(
                "No structured response tool use found in Anthropic response",
                raw_content=str(response.content),
            )

        input_tokens = response.usage.input_tokens
        output_tokens = response.usage.output_tokens
        input_cost, output_cost, total_cost = self._calculate_costs(input_tokens, output_tokens)

        return LLMJSONResponse(
            content=content,
            input_tokens=input_tokens,
            output_tokens=output_tokens,
            cached_tokens=response.usage.cache_read_input_tokens or 0,
            input_cost=input_cost,
            output_cost=output_cost,
            total_cost=total_cost,
            response_time=execution_time,
        )

    async def _structured_response_with_web_search_helper(
        self,
        response_model: type[T],
        user_prompt: str,
        system_prompt: str | None = None,
    ) -> tuple:
        """Helper for web search with structured response."""
        schema = response_model.model_json_schema()
        structured_response_tool = ToolParam(
            name="structured_response",
            description=f"Provide a structured response using the {response_model.__name__} format",
            input_schema=schema,
        )
        web_search_tool = WebSearchTool20250305Param(
            name="web_search",
            type="web_search_20250305",
        )
        tools = [structured_response_tool, web_search_tool]

        tool_instruction = "Use the structured_response tool to provide your answer."
        if system_prompt is None:
            system_prompt = f"You are a helpful assistant. {tool_instruction}"
        else:
            system_prompt = f"{system_prompt}\n\n{tool_instruction}"

        messages = _anthropic_user_message(user_prompt)
        system_message = _anthropic_system_prompt(system_prompt)

        start_time = time.time()
        current_messages = messages.copy()
        search_count = 0

        try:
            while search_count < 3:
                response = await self.client.messages.create(
                    model=self.model,
                    max_tokens=8192,
                    system=system_message,
                    messages=current_messages,
                    tools=tools,
                    tool_choice=ToolChoiceAutoParam(type="auto"),
                )

                # Check what tool was used
                if response.stop_reason == "tool_use":
                    tool_uses = [b for b in response.content if b.type == "tool_use"]

                    # If structured_response was used, we're done!
                    if any(t.name == "structured_response" for t in tool_uses):
                        execution_time = time.time() - start_time
                        return response, execution_time

                    # If web_search was used, continue conversation
                    if any(t.name == "web_search" for t in tool_uses):
                        logger.info("Web search initiated (turn %d)", search_count + 1)
                        search_count += 1

                        # Add assistant response
                        current_messages.append({
                            "role": "assistant",
                            "content": response.content,
                        })

                        # Add continuation prompt
                        current_messages.append({
                            "role": "user",
                            "content": (
                            "Continue with your analysis. Use the structured_response "
                            "tool when ready to generate the final output."
                        ),
                        })
                        continue
                break

            final_response = await self.client.messages.create(
                model=self.model,
                max_tokens=4096,
                system=_anthropic_system_prompt(system_prompt),
                messages=current_messages,
                tools=[structured_response_tool],
                tool_choice=ToolChoiceToolParam(type="tool", name="structured_response"),
            )
        except anthropic.APIError as e:
            raise ProviderError(
                f"Anthropic API error: {e}",
                provider="anthropic",
                original_error=e,
            ) from e

        execution_time = time.time() - start_time
        return final_response, execution_time

__init__

__init__(
    model,
    input_cost,
    output_cost,
    supports_temperature_top_p=True,
    use_web_search=False,
    *,
    api_key=None,
    api_key_alias=None,
)

Initialize the Anthropic provider.

Parameters:

Name Type Description Default
model str

The Claude model identifier (e.g., "claude-sonnet-4-20250514").

required
input_cost float

Cost per million input tokens in USD.

required
output_cost float

Cost per million output tokens in USD.

required
supports_temperature_top_p bool

Whether temperature/top_p are supported.

True
use_web_search bool

Enable web search (requires claude-sonnet-4-5-20250929).

False
api_key str | None

Optional API key. Defaults to ANTHROPIC_API_KEY env var.

None
api_key_alias str | None

Optional human-readable name for the API key.

None

Raises:

Type Description
ConfigurationError

If no API key is provided and env var is not set.

Source code in src/majordomo_llm/providers/anthropic.py
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
def __init__(
    self,
    model: str,
    input_cost: float,
    output_cost: float,
    supports_temperature_top_p: bool = True,
    use_web_search: bool = False,
    *,
    api_key: str | None = None,
    api_key_alias: str | None = None,
) -> None:
    """Initialize the Anthropic provider.

    Args:
        model: The Claude model identifier (e.g., "claude-sonnet-4-20250514").
        input_cost: Cost per million input tokens in USD.
        output_cost: Cost per million output tokens in USD.
        supports_temperature_top_p: Whether temperature/top_p are supported.
        use_web_search: Enable web search (requires claude-sonnet-4-5-20250929).
        api_key: Optional API key. Defaults to ``ANTHROPIC_API_KEY`` env var.
        api_key_alias: Optional human-readable name for the API key.

    Raises:
        ConfigurationError: If no API key is provided and env var is not set.
    """
    resolved_api_key = resolve_api_key(api_key, "ANTHROPIC_API_KEY", "Anthropic")
    super().__init__(
        provider="anthropic",
        model=model,
        input_cost=input_cost,
        output_cost=output_cost,
        supports_temperature_top_p=supports_temperature_top_p,
        use_web_search=use_web_search,
        api_key=resolved_api_key,
        api_key_alias=api_key_alias,
    )
    self.client = anthropic.AsyncAnthropic(api_key=resolved_api_key)

get_response async

get_response(
    user_prompt,
    system_prompt=None,
    temperature=0.3,
    top_p=1.0,
)

Get a plain text response from Anthropic.

Source code in src/majordomo_llm/providers/anthropic.py
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
@retry(wait=wait_random_exponential(min=0.2, max=1), stop=stop_after_attempt(3))
async def get_response(
    self,
    user_prompt: str,
    system_prompt: str | None = None,
    temperature: float = 0.3,
    top_p: float = 1.0,
) -> LLMResponse:
    """Get a plain text response from Anthropic."""
    if system_prompt is None:
        system_prompt = "You are a helpful assistant"
    start_time = time.time()

    messages = _anthropic_user_message(user_prompt)
    system_message = _anthropic_system_prompt(system_prompt)

    tools: list = []
    if self.use_web_search:
        tools.append({"type": "web_search_tool", "name": "web_search_20250305"})

    try:
        if self.supports_temperature_top_p:
            response_message = await self.client.messages.create(
                model=self.model,
                max_tokens=1024,
                system=system_message,
                messages=messages,
                temperature=temperature,
                top_p=top_p,
                tools=tools,
                tool_choice=ToolChoiceAutoParam(type="auto"),
            )
        else:
            response_message = await self.client.messages.create(
                model=self.model,
                max_tokens=1024,
                system=system_message,
                messages=messages,
                tools=tools,
                tool_choice=ToolChoiceAutoParam(type="auto"),
            )
    except anthropic.APIError as e:
        raise ProviderError(
            f"Anthropic API error: {e}",
            provider="anthropic",
            original_error=e,
        ) from e

    execution_time = time.time() - start_time
    final_response = [c.text for c in response_message.content if c.type == "text"]

    input_tokens = response_message.usage.input_tokens
    output_tokens = response_message.usage.output_tokens
    input_cost, output_cost, total_cost = self._calculate_costs(input_tokens, output_tokens)

    return LLMResponse(
        content="\n".join(final_response),
        input_tokens=input_tokens,
        output_tokens=output_tokens,
        cached_tokens=response_message.usage.cache_read_input_tokens or 0,
        input_cost=input_cost,
        output_cost=output_cost,
        total_cost=total_cost,
        response_time=execution_time,
    )

Bases: LLM

Google Gemini LLM provider.

Implements the LLM interface for Google's Gemini models, including support for structured outputs via response schemas.

The API key is read from the GEMINI_API_KEY environment variable.

Attributes:

Name Type Description
client

The Google GenAI client instance.

Example

llm = Gemini( ... model="gemini-2.5-flash", ... input_cost=0.30, ... output_cost=2.50, ... ) response = await llm.get_response("Hello, Gemini!")

Source code in src/majordomo_llm/providers/gemini.py
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
class Gemini(LLM):
    """Google Gemini LLM provider.

    Implements the LLM interface for Google's Gemini models, including
    support for structured outputs via response schemas.

    The API key is read from the ``GEMINI_API_KEY`` environment variable.

    Attributes:
        client: The Google GenAI client instance.

    Example:
        >>> llm = Gemini(
        ...     model="gemini-2.5-flash",
        ...     input_cost=0.30,
        ...     output_cost=2.50,
        ... )
        >>> response = await llm.get_response("Hello, Gemini!")
    """

    def __init__(
        self,
        model: str,
        input_cost: float,
        output_cost: float,
        *,
        api_key: str | None = None,
        api_key_alias: str | None = None,
    ) -> None:
        """Initialize the Gemini provider.

        Args:
            model: The Gemini model identifier (e.g., "gemini-2.5-flash").
            input_cost: Cost per million input tokens in USD.
            output_cost: Cost per million output tokens in USD.
            api_key: Optional API key. Defaults to ``GEMINI_API_KEY`` env var.
            api_key_alias: Optional human-readable name for the API key.

        Raises:
            ConfigurationError: If no API key is provided and env var is not set.
        """
        resolved_api_key = resolve_api_key(api_key, "GEMINI_API_KEY", "Gemini")
        super().__init__(
            provider="gemini",
            model=model,
            input_cost=input_cost,
            output_cost=output_cost,
            supports_temperature_top_p=True,
            api_key=resolved_api_key,
            api_key_alias=api_key_alias,
        )
        self.client = genai.Client(api_key=resolved_api_key)

    @retry(wait=wait_random_exponential(min=0.2, max=1), stop=stop_after_attempt(3))
    async def get_response(
        self,
        user_prompt: str,
        system_prompt: str | None = None,
        temperature: float = 0.3,
        top_p: float = 1.0,
    ) -> LLMResponse:
        """Get a plain text response from Gemini."""
        return await self._get_response(user_prompt, system_prompt, temperature, top_p)

    async def _get_response(
        self,
        user_prompt: str,
        system_prompt: str | None = None,
        temperature: float = 0.3,
        top_p: float = 1.0,
    ) -> LLMResponse:
        """Internal method to get a response from Gemini."""
        start_time = time.time()
        try:
            response = await self.client.aio.models.generate_content(
                model=self.model,
                config=types.GenerateContentConfig(
                    system_instruction=system_prompt,
                    temperature=temperature,
                    top_p=top_p,
                ),
                contents=user_prompt,
            )
        except genai_errors.APIError as e:
            raise ProviderError(
                f"Gemini API error: {e}",
                provider="gemini",
                original_error=e,
            ) from e
        execution_time = time.time() - start_time

        input_tokens = response.usage_metadata.prompt_token_count
        output_tokens = response.usage_metadata.candidates_token_count
        input_cost, output_cost, total_cost = self._calculate_costs(input_tokens, output_tokens)

        return LLMResponse(
            content=response.text,
            input_tokens=input_tokens,
            output_tokens=output_tokens,
            cached_tokens=0,
            input_cost=input_cost,
            output_cost=output_cost,
            total_cost=total_cost,
            response_time=execution_time,
        )

    async def _get_structured_response(
        self,
        response_model: type[T],
        user_prompt: str,
        system_prompt: str | None = None,
        temperature: float = 0.3,
        top_p: float = 1.0,
    ) -> LLMJSONResponse:
        """Gemini-specific implementation using response schema for structured outputs."""
        schema = response_model.model_json_schema()

        start_time = time.time()
        try:
            response = await self.client.aio.models.generate_content(
                model=self.model,
                config=types.GenerateContentConfig(
                    system_instruction=system_prompt,
                    temperature=temperature,
                    top_p=top_p,
                    response_schema=schema,
                    response_mime_type="application/json",
                ),
                contents=user_prompt,
            )
        except genai_errors.APIError as e:
            raise ProviderError(
                f"Gemini API error: {e}",
                provider="gemini",
                original_error=e,
            ) from e
        execution_time = time.time() - start_time

        try:
            content = json.loads(response.text)
        except json.JSONDecodeError as e:
            raise ResponseParsingError(
                f"Failed to parse JSON response: {e}",
                raw_content=response.text,
            ) from e
        input_tokens = response.usage_metadata.prompt_token_count
        output_tokens = response.usage_metadata.candidates_token_count
        input_cost, output_cost, total_cost = self._calculate_costs(input_tokens, output_tokens)

        return LLMJSONResponse(
            content=content,
            input_tokens=input_tokens,
            output_tokens=output_tokens,
            cached_tokens=0,
            input_cost=input_cost,
            output_cost=output_cost,
            total_cost=total_cost,
            response_time=execution_time,
        )

__init__

__init__(
    model,
    input_cost,
    output_cost,
    *,
    api_key=None,
    api_key_alias=None,
)

Initialize the Gemini provider.

Parameters:

Name Type Description Default
model str

The Gemini model identifier (e.g., "gemini-2.5-flash").

required
input_cost float

Cost per million input tokens in USD.

required
output_cost float

Cost per million output tokens in USD.

required
api_key str | None

Optional API key. Defaults to GEMINI_API_KEY env var.

None
api_key_alias str | None

Optional human-readable name for the API key.

None

Raises:

Type Description
ConfigurationError

If no API key is provided and env var is not set.

Source code in src/majordomo_llm/providers/gemini.py
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
def __init__(
    self,
    model: str,
    input_cost: float,
    output_cost: float,
    *,
    api_key: str | None = None,
    api_key_alias: str | None = None,
) -> None:
    """Initialize the Gemini provider.

    Args:
        model: The Gemini model identifier (e.g., "gemini-2.5-flash").
        input_cost: Cost per million input tokens in USD.
        output_cost: Cost per million output tokens in USD.
        api_key: Optional API key. Defaults to ``GEMINI_API_KEY`` env var.
        api_key_alias: Optional human-readable name for the API key.

    Raises:
        ConfigurationError: If no API key is provided and env var is not set.
    """
    resolved_api_key = resolve_api_key(api_key, "GEMINI_API_KEY", "Gemini")
    super().__init__(
        provider="gemini",
        model=model,
        input_cost=input_cost,
        output_cost=output_cost,
        supports_temperature_top_p=True,
        api_key=resolved_api_key,
        api_key_alias=api_key_alias,
    )
    self.client = genai.Client(api_key=resolved_api_key)

get_response async

get_response(
    user_prompt,
    system_prompt=None,
    temperature=0.3,
    top_p=1.0,
)

Get a plain text response from Gemini.

Source code in src/majordomo_llm/providers/gemini.py
72
73
74
75
76
77
78
79
80
81
@retry(wait=wait_random_exponential(min=0.2, max=1), stop=stop_after_attempt(3))
async def get_response(
    self,
    user_prompt: str,
    system_prompt: str | None = None,
    temperature: float = 0.3,
    top_p: float = 1.0,
) -> LLMResponse:
    """Get a plain text response from Gemini."""
    return await self._get_response(user_prompt, system_prompt, temperature, top_p)

Bases: LLM

DeepSeek LLM provider.

Implements the LLM interface for DeepSeek's models using the OpenAI-compatible API. Supports both DeepSeek-V3 (chat) and DeepSeek-R1 (reasoner) models.

The API key is read from the DEEPSEEK_API_KEY environment variable.

Attributes:

Name Type Description
client

The async OpenAI client instance configured for DeepSeek.

Example

llm = DeepSeek( ... model="deepseek-chat", ... input_cost=0.28, ... output_cost=0.42, ... ) response = await llm.get_response("Hello, DeepSeek!")

Source code in src/majordomo_llm/providers/deepseek.py
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
class DeepSeek(LLM):
    """DeepSeek LLM provider.

    Implements the LLM interface for DeepSeek's models using the OpenAI-compatible
    API. Supports both DeepSeek-V3 (chat) and DeepSeek-R1 (reasoner) models.

    The API key is read from the ``DEEPSEEK_API_KEY`` environment variable.

    Attributes:
        client: The async OpenAI client instance configured for DeepSeek.

    Example:
        >>> llm = DeepSeek(
        ...     model="deepseek-chat",
        ...     input_cost=0.28,
        ...     output_cost=0.42,
        ... )
        >>> response = await llm.get_response("Hello, DeepSeek!")
    """

    DEEPSEEK_BASE_URL = "https://api.deepseek.com"

    def __init__(
        self,
        model: str,
        input_cost: float,
        output_cost: float,
        supports_temperature_top_p: bool = True,
        *,
        api_key: str | None = None,
        api_key_alias: str | None = None,
    ) -> None:
        """Initialize the DeepSeek provider.

        Args:
            model: The DeepSeek model identifier (e.g., "deepseek-chat", "deepseek-reasoner").
            input_cost: Cost per million input tokens in USD.
            output_cost: Cost per million output tokens in USD.
            supports_temperature_top_p: Whether temperature/top_p are supported.
            api_key: Optional API key. Defaults to ``DEEPSEEK_API_KEY`` env var.
            api_key_alias: Optional human-readable name for the API key.

        Raises:
            ConfigurationError: If no API key is provided and env var is not set.
        """
        resolved_api_key = resolve_api_key(api_key, "DEEPSEEK_API_KEY", "DeepSeek")
        super().__init__(
            provider="deepseek",
            model=model,
            input_cost=input_cost,
            output_cost=output_cost,
            supports_temperature_top_p=supports_temperature_top_p,
            api_key=resolved_api_key,
            api_key_alias=api_key_alias,
        )
        self.client = openai.AsyncOpenAI(
            api_key=resolved_api_key,
            base_url=self.DEEPSEEK_BASE_URL,
        )

    @retry(wait=wait_random_exponential(min=0.2, max=1), stop=stop_after_attempt(3))
    async def get_response(
        self,
        user_prompt: str,
        system_prompt: str | None = None,
        temperature: float = 0.3,
        top_p: float = 1.0,
    ) -> LLMResponse:
        """Get a plain text response from DeepSeek."""
        return await self._get_response(user_prompt, system_prompt, temperature, top_p)

    async def _get_response(
        self,
        user_prompt: str,
        system_prompt: str | None = None,
        temperature: float = 0.3,
        top_p: float = 1.0,
    ) -> LLMResponse:
        """Internal method to get a response from DeepSeek."""
        messages = []
        if system_prompt:
            messages.append({"role": "system", "content": system_prompt})
        messages.append({"role": "user", "content": user_prompt})

        start_time = time.time()
        try:
            if self.supports_temperature_top_p:
                response = await self.client.chat.completions.create(
                    model=self.model,
                    messages=messages,
                    temperature=temperature,
                    top_p=top_p,
                )
            else:
                response = await self.client.chat.completions.create(
                    model=self.model,
                    messages=messages,
                )
        except openai.APIError as e:
            raise ProviderError(
                f"DeepSeek API error: {e}",
                provider="deepseek",
                original_error=e,
            ) from e

        execution_time = time.time() - start_time
        input_tokens = response.usage.prompt_tokens
        output_tokens = response.usage.completion_tokens
        cached_tokens = getattr(
            getattr(response.usage, "prompt_tokens_details", None),
            "cached_tokens",
            0,
        ) or 0
        input_cost, output_cost, total_cost = self._calculate_costs(input_tokens, output_tokens)

        return LLMResponse(
            content=response.choices[0].message.content,
            input_tokens=input_tokens,
            output_tokens=output_tokens,
            cached_tokens=cached_tokens,
            input_cost=input_cost,
            output_cost=output_cost,
            total_cost=total_cost,
            response_time=execution_time,
        )

    async def _get_structured_response(
        self,
        response_model: type[T],
        user_prompt: str,
        system_prompt: str | None = None,
        temperature: float = 0.3,
        top_p: float = 1.0,
    ) -> LLMJSONResponse:
        """DeepSeek-specific implementation using JSON mode for structured outputs."""
        schema = response_model.model_json_schema()
        combined_system_prompt = build_schema_prompt(schema, system_prompt)

        messages = [
            {"role": "system", "content": combined_system_prompt},
            {"role": "user", "content": user_prompt},
        ]

        start_time = time.time()
        try:
            if self.supports_temperature_top_p:
                response = await self.client.chat.completions.create(
                    model=self.model,
                    messages=messages,
                    temperature=temperature,
                    top_p=top_p,
                    response_format={"type": "json_object"},
                )
            else:
                response = await self.client.chat.completions.create(
                    model=self.model,
                    messages=messages,
                    response_format={"type": "json_object"},
                )
        except openai.APIError as e:
            raise ProviderError(
                f"DeepSeek API error: {e}",
                provider="deepseek",
                original_error=e,
            ) from e

        execution_time = time.time() - start_time

        try:
            content = json.loads(response.choices[0].message.content)
        except json.JSONDecodeError as e:
            raise ResponseParsingError(
                f"Failed to parse JSON response: {e}",
                raw_content=response.choices[0].message.content,
            ) from e

        input_tokens = response.usage.prompt_tokens
        output_tokens = response.usage.completion_tokens
        cached_tokens = getattr(
            getattr(response.usage, "prompt_tokens_details", None),
            "cached_tokens",
            0,
        ) or 0
        input_cost, output_cost, total_cost = self._calculate_costs(input_tokens, output_tokens)

        return LLMJSONResponse(
            content=content,
            input_tokens=input_tokens,
            output_tokens=output_tokens,
            cached_tokens=cached_tokens,
            input_cost=input_cost,
            output_cost=output_cost,
            total_cost=total_cost,
            response_time=execution_time,
        )

__init__

__init__(
    model,
    input_cost,
    output_cost,
    supports_temperature_top_p=True,
    *,
    api_key=None,
    api_key_alias=None,
)

Initialize the DeepSeek provider.

Parameters:

Name Type Description Default
model str

The DeepSeek model identifier (e.g., "deepseek-chat", "deepseek-reasoner").

required
input_cost float

Cost per million input tokens in USD.

required
output_cost float

Cost per million output tokens in USD.

required
supports_temperature_top_p bool

Whether temperature/top_p are supported.

True
api_key str | None

Optional API key. Defaults to DEEPSEEK_API_KEY env var.

None
api_key_alias str | None

Optional human-readable name for the API key.

None

Raises:

Type Description
ConfigurationError

If no API key is provided and env var is not set.

Source code in src/majordomo_llm/providers/deepseek.py
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
def __init__(
    self,
    model: str,
    input_cost: float,
    output_cost: float,
    supports_temperature_top_p: bool = True,
    *,
    api_key: str | None = None,
    api_key_alias: str | None = None,
) -> None:
    """Initialize the DeepSeek provider.

    Args:
        model: The DeepSeek model identifier (e.g., "deepseek-chat", "deepseek-reasoner").
        input_cost: Cost per million input tokens in USD.
        output_cost: Cost per million output tokens in USD.
        supports_temperature_top_p: Whether temperature/top_p are supported.
        api_key: Optional API key. Defaults to ``DEEPSEEK_API_KEY`` env var.
        api_key_alias: Optional human-readable name for the API key.

    Raises:
        ConfigurationError: If no API key is provided and env var is not set.
    """
    resolved_api_key = resolve_api_key(api_key, "DEEPSEEK_API_KEY", "DeepSeek")
    super().__init__(
        provider="deepseek",
        model=model,
        input_cost=input_cost,
        output_cost=output_cost,
        supports_temperature_top_p=supports_temperature_top_p,
        api_key=resolved_api_key,
        api_key_alias=api_key_alias,
    )
    self.client = openai.AsyncOpenAI(
        api_key=resolved_api_key,
        base_url=self.DEEPSEEK_BASE_URL,
    )

get_response async

get_response(
    user_prompt,
    system_prompt=None,
    temperature=0.3,
    top_p=1.0,
)

Get a plain text response from DeepSeek.

Source code in src/majordomo_llm/providers/deepseek.py
84
85
86
87
88
89
90
91
92
93
@retry(wait=wait_random_exponential(min=0.2, max=1), stop=stop_after_attempt(3))
async def get_response(
    self,
    user_prompt: str,
    system_prompt: str | None = None,
    temperature: float = 0.3,
    top_p: float = 1.0,
) -> LLMResponse:
    """Get a plain text response from DeepSeek."""
    return await self._get_response(user_prompt, system_prompt, temperature, top_p)

Bases: LLM

Cohere LLM provider.

Implements the LLM interface for Cohere's models using the V2 API. Supports Command A, Command R+, Command R, and Command R7B models.

The API key is read from the CO_API_KEY environment variable.

Attributes:

Name Type Description
client

The async Cohere client instance.

Example

llm = Cohere( ... model="command-a-03-2025", ... input_cost=2.50, ... output_cost=10.00, ... ) response = await llm.get_response("Hello, Cohere!")

Source code in src/majordomo_llm/providers/cohere.py
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
class Cohere(LLM):
    """Cohere LLM provider.

    Implements the LLM interface for Cohere's models using the V2 API.
    Supports Command A, Command R+, Command R, and Command R7B models.

    The API key is read from the ``CO_API_KEY`` environment variable.

    Attributes:
        client: The async Cohere client instance.

    Example:
        >>> llm = Cohere(
        ...     model="command-a-03-2025",
        ...     input_cost=2.50,
        ...     output_cost=10.00,
        ... )
        >>> response = await llm.get_response("Hello, Cohere!")
    """

    def __init__(
        self,
        model: str,
        input_cost: float,
        output_cost: float,
        supports_temperature_top_p: bool = True,
        *,
        api_key: str | None = None,
        api_key_alias: str | None = None,
    ) -> None:
        """Initialize the Cohere provider.

        Args:
            model: The Cohere model identifier (e.g., "command-a-03-2025").
            input_cost: Cost per million input tokens in USD.
            output_cost: Cost per million output tokens in USD.
            supports_temperature_top_p: Whether temperature/top_p are supported.
            api_key: Optional API key. Defaults to ``CO_API_KEY`` env var.
            api_key_alias: Optional human-readable name for the API key.

        Raises:
            ConfigurationError: If no API key is provided and env var is not set.
        """
        resolved_api_key = resolve_api_key(api_key, "CO_API_KEY", "Cohere")
        super().__init__(
            provider="cohere",
            model=model,
            input_cost=input_cost,
            output_cost=output_cost,
            supports_temperature_top_p=supports_temperature_top_p,
            api_key=resolved_api_key,
            api_key_alias=api_key_alias,
        )
        self.client = cohere.AsyncClientV2(api_key=resolved_api_key)

    @retry(wait=wait_random_exponential(min=0.2, max=1), stop=stop_after_attempt(3))
    async def get_response(
        self,
        user_prompt: str,
        system_prompt: str | None = None,
        temperature: float = 0.3,
        top_p: float = 1.0,
    ) -> LLMResponse:
        """Get a plain text response from Cohere."""
        return await self._get_response(user_prompt, system_prompt, temperature, top_p)

    async def _get_response(
        self,
        user_prompt: str,
        system_prompt: str | None = None,
        temperature: float = 0.3,
        top_p: float = 1.0,
    ) -> LLMResponse:
        """Internal method to get a response from Cohere."""
        messages = []
        if system_prompt:
            messages.append({"role": "system", "content": system_prompt})
        messages.append({"role": "user", "content": user_prompt})

        start_time = time.time()
        try:
            if self.supports_temperature_top_p:
                response = await self.client.chat(
                    model=self.model,
                    messages=messages,
                    temperature=temperature,
                    p=top_p,
                )
            else:
                response = await self.client.chat(
                    model=self.model,
                    messages=messages,
                )
        except cohere.core.api_error.ApiError as e:
            raise ProviderError(
                f"Cohere API error: {e}",
                provider="cohere",
                original_error=e,
            ) from e

        execution_time = time.time() - start_time
        input_tokens = response.usage.tokens.input_tokens
        output_tokens = response.usage.tokens.output_tokens
        cached_tokens = 0
        input_cost, output_cost, total_cost = self._calculate_costs(input_tokens, output_tokens)

        return LLMResponse(
            content=response.message.content[0].text,
            input_tokens=input_tokens,
            output_tokens=output_tokens,
            cached_tokens=cached_tokens,
            input_cost=input_cost,
            output_cost=output_cost,
            total_cost=total_cost,
            response_time=execution_time,
        )

    async def _get_structured_response(
        self,
        response_model: type[T],
        user_prompt: str,
        system_prompt: str | None = None,
        temperature: float = 0.3,
        top_p: float = 1.0,
    ) -> LLMJSONResponse:
        """Cohere-specific implementation using JSON mode for structured outputs.

        Uses prompt-based schema injection with json_object mode since Cohere's
        json_schema validation doesn't support all JSON Schema constraints
        (e.g., minimum/maximum for numbers, enum values).

        The schema is flattened via inline_schema_refs() to remove $defs/$ref
        which Cohere's model handles poorly.
        """
        schema = response_model.model_json_schema()
        # Inline $refs to flatten the schema - Cohere struggles with $defs/$ref
        schema = inline_schema_refs(schema)
        combined_system_prompt = build_schema_prompt(schema, system_prompt)

        messages = [
            {"role": "system", "content": combined_system_prompt},
            {"role": "user", "content": user_prompt},
        ]

        start_time = time.time()
        try:
            if self.supports_temperature_top_p:
                response = await self.client.chat(
                    model=self.model,
                    messages=messages,
                    temperature=temperature,
                    p=top_p,
                    response_format={"type": "json_object"},
                )
            else:
                response = await self.client.chat(
                    model=self.model,
                    messages=messages,
                    response_format={"type": "json_object"},
                )
        except cohere.core.api_error.ApiError as e:
            raise ProviderError(
                f"Cohere API error: {e}",
                provider="cohere",
                original_error=e,
            ) from e

        execution_time = time.time() - start_time

        try:
            content = json.loads(response.message.content[0].text)
        except json.JSONDecodeError as e:
            raise ResponseParsingError(
                f"Failed to parse JSON response: {e}",
                raw_content=response.message.content[0].text,
            ) from e

        input_tokens = response.usage.tokens.input_tokens
        output_tokens = response.usage.tokens.output_tokens
        cached_tokens = 0
        input_cost, output_cost, total_cost = self._calculate_costs(input_tokens, output_tokens)

        return LLMJSONResponse(
            content=content,
            input_tokens=input_tokens,
            output_tokens=output_tokens,
            cached_tokens=cached_tokens,
            input_cost=input_cost,
            output_cost=output_cost,
            total_cost=total_cost,
            response_time=execution_time,
        )

__init__

__init__(
    model,
    input_cost,
    output_cost,
    supports_temperature_top_p=True,
    *,
    api_key=None,
    api_key_alias=None,
)

Initialize the Cohere provider.

Parameters:

Name Type Description Default
model str

The Cohere model identifier (e.g., "command-a-03-2025").

required
input_cost float

Cost per million input tokens in USD.

required
output_cost float

Cost per million output tokens in USD.

required
supports_temperature_top_p bool

Whether temperature/top_p are supported.

True
api_key str | None

Optional API key. Defaults to CO_API_KEY env var.

None
api_key_alias str | None

Optional human-readable name for the API key.

None

Raises:

Type Description
ConfigurationError

If no API key is provided and env var is not set.

Source code in src/majordomo_llm/providers/cohere.py
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
def __init__(
    self,
    model: str,
    input_cost: float,
    output_cost: float,
    supports_temperature_top_p: bool = True,
    *,
    api_key: str | None = None,
    api_key_alias: str | None = None,
) -> None:
    """Initialize the Cohere provider.

    Args:
        model: The Cohere model identifier (e.g., "command-a-03-2025").
        input_cost: Cost per million input tokens in USD.
        output_cost: Cost per million output tokens in USD.
        supports_temperature_top_p: Whether temperature/top_p are supported.
        api_key: Optional API key. Defaults to ``CO_API_KEY`` env var.
        api_key_alias: Optional human-readable name for the API key.

    Raises:
        ConfigurationError: If no API key is provided and env var is not set.
    """
    resolved_api_key = resolve_api_key(api_key, "CO_API_KEY", "Cohere")
    super().__init__(
        provider="cohere",
        model=model,
        input_cost=input_cost,
        output_cost=output_cost,
        supports_temperature_top_p=supports_temperature_top_p,
        api_key=resolved_api_key,
        api_key_alias=api_key_alias,
    )
    self.client = cohere.AsyncClientV2(api_key=resolved_api_key)

get_response async

get_response(
    user_prompt,
    system_prompt=None,
    temperature=0.3,
    top_p=1.0,
)

Get a plain text response from Cohere.

Source code in src/majordomo_llm/providers/cohere.py
80
81
82
83
84
85
86
87
88
89
@retry(wait=wait_random_exponential(min=0.2, max=1), stop=stop_after_attempt(3))
async def get_response(
    self,
    user_prompt: str,
    system_prompt: str | None = None,
    temperature: float = 0.3,
    top_p: float = 1.0,
) -> LLMResponse:
    """Get a plain text response from Cohere."""
    return await self._get_response(user_prompt, system_prompt, temperature, top_p)