"""Ollama Cloud provider profile with reasoning_effort forwarding. Ollama Cloud (ollama.com) provides an OpenAI-compatible API that accepts a top-level ``reasoning_effort`` parameter with values: high, medium, low, max, none. The API rejects undocumented values like "xhigh". This profile overrides :meth:`build_api_kwargs_extras` to forward the configured effort level, clamping xhigh → high (ollama.com has no xhigh; its "max" is heavier than "high" and typically overkill). Deploy to: /opt/data/plugins/model-providers/ollama-cloud/__init__.py User plugins load after bundled ones with last-writer-wins, so this overrides the bare ProviderProfile stub that ships with the gateway image. """ from __future__ import annotations from typing import Any from providers import register_provider from providers.base import ProviderProfile # Valid reasoning_effort values per ollama.com API _OLLAMA_VALID_EFFORTS = frozenset({"high", "medium", "low", "max", "none"}) class OllamaCloudProfile(ProviderProfile): """Ollama Cloud — top-level reasoning_effort forwarding.""" def build_api_kwargs_extras( self, *, reasoning_config: dict | None = None, model: str | None = None, **context ) -> tuple[dict[str, Any], dict[str, Any]]: top_level: dict[str, Any] = {} if not isinstance(reasoning_config, dict): return {}, top_level effort = (reasoning_config.get("effort") or "").strip().lower() # xhigh → high (not max; max is too heavy for routine use) if effort == "xhigh": effort = "high" if effort in _OLLAMA_VALID_EFFORTS: top_level["reasoning_effort"] = effort return {}, top_level ollama_cloud = OllamaCloudProfile( name="ollama-cloud", aliases=("ollama_cloud",), default_aux_model="nemotron-3-nano:30b", env_vars=("OLLAMA_API_KEY",), base_url="https://ollama.com/v1", ) register_provider(ollama_cloud)