feat(reusable): add the 4090 Ti (qwen3.6-27b via llama-swap) to the default swarm (#13)
Build & push image / build-and-push (push) Successful in 7s
Build & push image / build-and-push (push) Successful in 7s
This commit was merged in pull request #13.
This commit is contained in:
@@ -13,7 +13,8 @@
|
||||
# with: { allowed_users: "..." } # config inputs are optional (see below)
|
||||
#
|
||||
# Inputs ship the DEFAULT swarm (see the inputs block): 3 cloud models + the
|
||||
# Claude Code engine, 5-lens suite (3 claude models concurrent, 5 lenses each). A consumer
|
||||
# Claude Code engine + a local 4090 Ti (qwen3.6-27b via llama-swap), 5-lens suite
|
||||
# (3 claude models concurrent / 5 lenses each; the 4090 Ti runs 1 model × 1 lens). A consumer
|
||||
# inherits it by omitting `with:` entirely, or overrides any field (e.g.
|
||||
# `models:` for a cloud-only / different-provider setup; "" falls back to the
|
||||
# image's built-in default). Secrets are DECLARED below (workflow_call.secrets) so a
|
||||
@@ -35,9 +36,11 @@ on:
|
||||
workflow_call:
|
||||
# Inputs ship the DEFAULT Gadfly swarm so a consumer can just call this
|
||||
# workflow (no `with:` block) and inherit it. The default is opinionated —
|
||||
# 3 strong cloud models + the Claude Code engine (sonnet/opus/opus:max), the
|
||||
# 3 strong cloud models + the Claude Code engine (sonnet/opus/opus:max) + a
|
||||
# local 4090 Ti (qwen3.6-27b via llama-swap at GADFLY_ENDPOINT_RAGNAROS), the
|
||||
# 5-lens suite, with all 3 claude models concurrent and each running its 5
|
||||
# lenses at once. It needs OLLAMA_CLOUD_API_KEY and CLAUDE_CODE_OAUTH_TOKEN; a consumer
|
||||
# lenses at once (the 4090 Ti runs 1 model × 1 lens — a single local GPU). It
|
||||
# needs OLLAMA_CLOUD_API_KEY and CLAUDE_CODE_OAUTH_TOKEN; a consumer
|
||||
# with only one (or a different provider) overrides `models:` (and forwards
|
||||
# just the secrets it uses). Set any input to "" to fall back to the
|
||||
# image/entrypoint built-in default.
|
||||
@@ -46,12 +49,12 @@ on:
|
||||
# (3 models × 5 lenses = up to 15 concurrent `claude -p` per pass). If you hit
|
||||
# subscription rate limits or runner load, dial claude-code down in either knob.
|
||||
inputs:
|
||||
models: { type: string, default: "minimax-m3:cloud,glm-5.2:cloud,deepseek-v4-pro:cloud,claude-code/sonnet,claude-code/opus,claude-code/opus:max" } # GADFLY_MODELS (csv)
|
||||
models: { type: string, default: "minimax-m3:cloud,glm-5.2:cloud,deepseek-v4-pro:cloud,claude-code/sonnet,claude-code/opus,claude-code/opus:max,ragnaros/qwen3.6-27b" } # GADFLY_MODELS (csv); ragnaros/* = the 4090 Ti via llama-swap (see GADFLY_ENDPOINT_RAGNAROS)
|
||||
specialists: { type: string, default: "security,correctness,maintainability,performance,error-handling" } # GADFLY_SPECIALISTS (5-lens default suite)
|
||||
provider: { type: string, default: "" } # GADFLY_PROVIDER
|
||||
base_url: { type: string, default: "" } # GADFLY_BASE_URL
|
||||
provider_concurrency: { type: string, default: "ollama-cloud=3,claude-code=3" } # GADFLY_PROVIDER_CONCURRENCY (all 3 claude models at once)
|
||||
provider_lens_concurrency: { type: string, default: "ollama-cloud=3,claude-code=5" } # GADFLY_PROVIDER_LENS_CONCURRENCY (each claude runs all 5 lenses at once)
|
||||
provider_concurrency: { type: string, default: "ollama-cloud=3,claude-code=3,ragnaros=1" } # GADFLY_PROVIDER_CONCURRENCY (claude all 3 at once; ragnaros 4090 Ti one model at a time)
|
||||
provider_lens_concurrency: { type: string, default: "ollama-cloud=3,claude-code=5,ragnaros=1" } # GADFLY_PROVIDER_LENS_CONCURRENCY (claude 5 lenses at once; ragnaros 1 lens at a time)
|
||||
timeout_secs: { type: string, default: "600" } # GADFLY_TIMEOUT_SECS (per lens)
|
||||
max_steps: { type: string, default: "14" } # GADFLY_MAX_STEPS
|
||||
worker_model: { type: string, default: "" } # GADFLY_WORKER_MODEL
|
||||
@@ -119,6 +122,12 @@ jobs:
|
||||
# reusable workflow can't enumerate arbitrary names.
|
||||
GADFLY_ENDPOINT_M1: ${{ secrets.GADFLY_ENDPOINT_M1 }}
|
||||
GADFLY_ENDPOINT_M5: ${{ secrets.GADFLY_ENDPOINT_M5 }}
|
||||
# ragnaros = the 4090 Ti, reached over the LAN through its llama-swap
|
||||
# proxy (lazy-loads models on demand). Plain https URL, no credential —
|
||||
# set here so the default `ragnaros/qwen3.6-27b` model resolves for all
|
||||
# consumers. Registers provider "ragnaros". NB: use the un-hyphenated
|
||||
# `llamaswap` provider spelling — the pinned image accepts that form.
|
||||
GADFLY_ENDPOINT_RAGNAROS: "llamaswap|https://llama-swap.ragnaros.dudenhoeffer.casa"
|
||||
# --- findings telemetry (optional) --------------------------------
|
||||
GADFLY_FINDINGS_URL: ${{ secrets.GADFLY_FINDINGS_URL }}
|
||||
GADFLY_FINDINGS_TOKEN: ${{ secrets.GADFLY_FINDINGS_TOKEN }}
|
||||
|
||||
Reference in New Issue
Block a user