feat: ollama-cloud provider profile with reasoning_effort forwarding

2026-05-30 12:32:33 -07:00 · 2026-05-30 12:32:33 -07:00 · 086e188372
commit 086e188372
parent 911b8d045e
4 changed files with 105 additions and 3 deletions
--- a/README.md
+++ b/README.md
@ -1,3 +0,0 @@
-# skill-ollama-cloud-provider
-
-Ollama Cloud provider profile plugin with reasoning_effort forwarding for Hermes
--- a/SKILL.md
+++ b/SKILL.md
@ -0,0 +1,42 @@
+# Ollama Cloud Provider Plugin
+
+Overrides the bundled `ollama-cloud` provider profile to properly forward
+`reasoning_effort` to the ollama.com API.
+
+## Problem
+
+The bundled profile is a bare `ProviderProfile` with no `build_api_kwargs_extras`
+override. The gateway parses `reasoning_effort` from config but the base class
+returns `({}, {})` — nothing reaches the API. Agents start every session at
+the provider's default reasoning level (medium) regardless of their config.
+
+## Fix
+
+This plugin provides `OllamaCloudProfile` which overrides
+`build_api_kwargs_extras` to forward `reasoning_effort` as a top-level API
+parameter.
+
+**Value mapping:** `xhigh → high` (ollama.com has no xhigh; its `max` is heavier
+than `high` and typically overkill).
+
+## Deploy
+
+```bash
+hermes-plugin-deploy <agent> --plugin ollama-cloud --restart
+```
+
+Or manually: copy `__init__.py` to
+`/opt/data/plugins/model-providers/ollama-cloud/__init__.py` and restart the gateway.
+
+User plugins load after bundled ones with last-writer-wins, so this overrides
+the stub without modifying the bundled file.
+
+## Requires
+
+- `model.reasoning_effort: high` (or `medium`/`low`/`max`/`none`) in `config.yaml`
+  under the `model:` section.
+
+## Applies to
+
+Any agent using `provider: ollama-cloud` (Bastion, Atlas, and any future fleet
+agent on this provider).
--- a/model-providers/ollama-cloud/init.py
+++ b/model-providers/ollama-cloud/init.py
@ -0,0 +1,58 @@
+"""Ollama Cloud provider profile with reasoning_effort forwarding.
+
+Ollama Cloud (ollama.com) provides an OpenAI-compatible API that accepts
+a top-level ``reasoning_effort`` parameter with values: high, medium, low,
+max, none.  The API rejects undocumented values like "xhigh".
+
+This profile overrides :meth:`build_api_kwargs_extras` to forward the
+configured effort level, clamping xhigh → high (ollama.com has no xhigh;
+its "max" is heavier than "high" and typically overkill).
+
+Deploy to: /opt/data/plugins/model-providers/ollama-cloud/__init__.py
+User plugins load after bundled ones with last-writer-wins, so this
+overrides the bare ProviderProfile stub that ships with the gateway image.
+"""
+
+from __future__ import annotations
+
+from typing import Any
+
+from providers import register_provider
+from providers.base import ProviderProfile
+
+# Valid reasoning_effort values per ollama.com API
+_OLLAMA_VALID_EFFORTS = frozenset({"high", "medium", "low", "max", "none"})
+
+
+class OllamaCloudProfile(ProviderProfile):
+    """Ollama Cloud — top-level reasoning_effort forwarding."""
+
+    def build_api_kwargs_extras(
+        self, *, reasoning_config: dict | None = None, model: str | None = None, **context
+    ) -> tuple[dict[str, Any], dict[str, Any]]:
+        top_level: dict[str, Any] = {}
+
+        if not isinstance(reasoning_config, dict):
+            return {}, top_level
+
+        effort = (reasoning_config.get("effort") or "").strip().lower()
+
+        # xhigh → high (not max; max is too heavy for routine use)
+        if effort == "xhigh":
+            effort = "high"
+
+        if effort in _OLLAMA_VALID_EFFORTS:
+            top_level["reasoning_effort"] = effort
+
+        return {}, top_level
+
+
+ollama_cloud = OllamaCloudProfile(
+    name="ollama-cloud",
+    aliases=("ollama_cloud",),
+    default_aux_model="nemotron-3-nano:30b",
+    env_vars=("OLLAMA_API_KEY",),
+    base_url="https://ollama.com/v1",
+)
+
+register_provider(ollama_cloud)
--- a/plugin.yaml
+++ b/plugin.yaml
@ -0,0 +1,5 @@
+name: ollama-cloud
+version: 1.0.0
+description: "Ollama Cloud provider profile with reasoning_effort forwarding. Overrides the bundled stub to properly send reasoning_effort to the ollama.com API."
+type: model-provider
+deploy_path: model-providers/ollama-cloud