chore: add deployment docs, model script, and finalize env config
Phase 6 deployment infrastructure: finalize Dockerfile with OCI labels, improve .env.example with grouped config keys, add scripts/pull-models.sh for Mac-side model setup, and add docs/deploy.md covering the full deployment topology, prerequisites, security model, and troubleshooting. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Executable
+34
@@ -0,0 +1,34 @@
|
||||
#!/usr/bin/env bash
|
||||
# Pull the recommended model roster on the Mac.
|
||||
# Run this ON the Mac where Ollama is installed.
|
||||
set -euo pipefail
|
||||
|
||||
OLLAMA_HOST="${OLLAMA_HOST:-http://localhost:11434}"
|
||||
|
||||
echo "=== Pulling models to ${OLLAMA_HOST} ==="
|
||||
|
||||
# Embedder (always resident, slot 1)
|
||||
echo "--- Embedder: nomic-embed-text ---"
|
||||
curl -s "${OLLAMA_HOST}/api/pull" -d '{"name":"nomic-embed-text"}' | jq -r '.status // empty'
|
||||
|
||||
# Worker models (rotate through slot 2)
|
||||
echo "--- Worker: qwen3:14b (parse/data) ---"
|
||||
curl -s "${OLLAMA_HOST}/api/pull" -d '{"name":"qwen3:14b"}' | jq -r '.status // empty'
|
||||
|
||||
echo "--- Worker: qwen3:30b (agent+code, default) ---"
|
||||
curl -s "${OLLAMA_HOST}/api/pull" -d '{"name":"qwen3:30b"}' | jq -r '.status // empty'
|
||||
|
||||
# Optional — uncomment if needed:
|
||||
# echo "--- Worker: gpt-oss:20b (fast coder) ---"
|
||||
# curl -s "${OLLAMA_HOST}/api/pull" -d '{"name":"gpt-oss:20b"}' | jq -r '.status // empty'
|
||||
# echo "--- Worker: qwen2.5-coder:32b (quality coder, slow) ---"
|
||||
# curl -s "${OLLAMA_HOST}/api/pull" -d '{"name":"qwen2.5-coder:32b"}' | jq -r '.status // empty'
|
||||
|
||||
echo ""
|
||||
echo "=== Mac-side Ollama environment (set via launchctl or .zshrc) ==="
|
||||
echo " OLLAMA_MAX_LOADED_MODELS=2"
|
||||
echo " OLLAMA_KEEP_ALIVE=-1 # for the embedder slot"
|
||||
echo " OLLAMA_CONTEXT_LENGTH=8192 # minimum recommended"
|
||||
echo ""
|
||||
echo " Example: launchctl setenv OLLAMA_MAX_LOADED_MODELS 2"
|
||||
echo " Then restart Ollama."
|
||||
Reference in New Issue
Block a user