diff --git a/README.md b/README.md deleted file mode 100644 index b526823..0000000 --- a/README.md +++ /dev/null @@ -1,3 +0,0 @@ -# skill-ollama-cloud-provider - -Ollama Cloud provider profile plugin with reasoning_effort forwarding for Hermes \ No newline at end of file diff --git a/SKILL.md b/SKILL.md new file mode 100644 index 0000000..6f73450 --- /dev/null +++ b/SKILL.md @@ -0,0 +1,42 @@ +# Ollama Cloud Provider Plugin + +Overrides the bundled `ollama-cloud` provider profile to properly forward +`reasoning_effort` to the ollama.com API. + +## Problem + +The bundled profile is a bare `ProviderProfile` with no `build_api_kwargs_extras` +override. The gateway parses `reasoning_effort` from config but the base class +returns `({}, {})` — nothing reaches the API. Agents start every session at +the provider's default reasoning level (medium) regardless of their config. + +## Fix + +This plugin provides `OllamaCloudProfile` which overrides +`build_api_kwargs_extras` to forward `reasoning_effort` as a top-level API +parameter. + +**Value mapping:** `xhigh → high` (ollama.com has no xhigh; its `max` is heavier +than `high` and typically overkill). + +## Deploy + +```bash +hermes-plugin-deploy --plugin ollama-cloud --restart +``` + +Or manually: copy `__init__.py` to +`/opt/data/plugins/model-providers/ollama-cloud/__init__.py` and restart the gateway. + +User plugins load after bundled ones with last-writer-wins, so this overrides +the stub without modifying the bundled file. + +## Requires + +- `model.reasoning_effort: high` (or `medium`/`low`/`max`/`none`) in `config.yaml` + under the `model:` section. + +## Applies to + +Any agent using `provider: ollama-cloud` (Bastion, Atlas, and any future fleet +agent on this provider). \ No newline at end of file diff --git a/model-providers/ollama-cloud/__init__.py b/model-providers/ollama-cloud/__init__.py new file mode 100644 index 0000000..7e2b1bb --- /dev/null +++ b/model-providers/ollama-cloud/__init__.py @@ -0,0 +1,58 @@ +"""Ollama Cloud provider profile with reasoning_effort forwarding. + +Ollama Cloud (ollama.com) provides an OpenAI-compatible API that accepts +a top-level ``reasoning_effort`` parameter with values: high, medium, low, +max, none. The API rejects undocumented values like "xhigh". + +This profile overrides :meth:`build_api_kwargs_extras` to forward the +configured effort level, clamping xhigh → high (ollama.com has no xhigh; +its "max" is heavier than "high" and typically overkill). + +Deploy to: /opt/data/plugins/model-providers/ollama-cloud/__init__.py +User plugins load after bundled ones with last-writer-wins, so this +overrides the bare ProviderProfile stub that ships with the gateway image. +""" + +from __future__ import annotations + +from typing import Any + +from providers import register_provider +from providers.base import ProviderProfile + +# Valid reasoning_effort values per ollama.com API +_OLLAMA_VALID_EFFORTS = frozenset({"high", "medium", "low", "max", "none"}) + + +class OllamaCloudProfile(ProviderProfile): + """Ollama Cloud — top-level reasoning_effort forwarding.""" + + def build_api_kwargs_extras( + self, *, reasoning_config: dict | None = None, model: str | None = None, **context + ) -> tuple[dict[str, Any], dict[str, Any]]: + top_level: dict[str, Any] = {} + + if not isinstance(reasoning_config, dict): + return {}, top_level + + effort = (reasoning_config.get("effort") or "").strip().lower() + + # xhigh → high (not max; max is too heavy for routine use) + if effort == "xhigh": + effort = "high" + + if effort in _OLLAMA_VALID_EFFORTS: + top_level["reasoning_effort"] = effort + + return {}, top_level + + +ollama_cloud = OllamaCloudProfile( + name="ollama-cloud", + aliases=("ollama_cloud",), + default_aux_model="nemotron-3-nano:30b", + env_vars=("OLLAMA_API_KEY",), + base_url="https://ollama.com/v1", +) + +register_provider(ollama_cloud) \ No newline at end of file diff --git a/plugin.yaml b/plugin.yaml new file mode 100644 index 0000000..b30d2ee --- /dev/null +++ b/plugin.yaml @@ -0,0 +1,5 @@ +name: ollama-cloud +version: 1.0.0 +description: "Ollama Cloud provider profile with reasoning_effort forwarding. Overrides the bundled stub to properly send reasoning_effort to the ollama.com API." +type: model-provider +deploy_path: model-providers/ollama-cloud \ No newline at end of file