e119ed325b
Phase 6 deployment infrastructure: finalize Dockerfile with OCI labels, improve .env.example with grouped config keys, add scripts/pull-models.sh for Mac-side model setup, and add docs/deploy.md covering the full deployment topology, prerequisites, security model, and troubleshooting. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
35 lines
1.4 KiB
Bash
Executable File
35 lines
1.4 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
# Pull the recommended model roster on the Mac.
|
|
# Run this ON the Mac where Ollama is installed.
|
|
set -euo pipefail
|
|
|
|
OLLAMA_HOST="${OLLAMA_HOST:-http://localhost:11434}"
|
|
|
|
echo "=== Pulling models to ${OLLAMA_HOST} ==="
|
|
|
|
# Embedder (always resident, slot 1)
|
|
echo "--- Embedder: nomic-embed-text ---"
|
|
curl -s "${OLLAMA_HOST}/api/pull" -d '{"name":"nomic-embed-text"}' | jq -r '.status // empty'
|
|
|
|
# Worker models (rotate through slot 2)
|
|
echo "--- Worker: qwen3:14b (parse/data) ---"
|
|
curl -s "${OLLAMA_HOST}/api/pull" -d '{"name":"qwen3:14b"}' | jq -r '.status // empty'
|
|
|
|
echo "--- Worker: qwen3:30b (agent+code, default) ---"
|
|
curl -s "${OLLAMA_HOST}/api/pull" -d '{"name":"qwen3:30b"}' | jq -r '.status // empty'
|
|
|
|
# Optional — uncomment if needed:
|
|
# echo "--- Worker: gpt-oss:20b (fast coder) ---"
|
|
# curl -s "${OLLAMA_HOST}/api/pull" -d '{"name":"gpt-oss:20b"}' | jq -r '.status // empty'
|
|
# echo "--- Worker: qwen2.5-coder:32b (quality coder, slow) ---"
|
|
# curl -s "${OLLAMA_HOST}/api/pull" -d '{"name":"qwen2.5-coder:32b"}' | jq -r '.status // empty'
|
|
|
|
echo ""
|
|
echo "=== Mac-side Ollama environment (set via launchctl or .zshrc) ==="
|
|
echo " OLLAMA_MAX_LOADED_MODELS=2"
|
|
echo " OLLAMA_KEEP_ALIVE=-1 # for the embedder slot"
|
|
echo " OLLAMA_CONTEXT_LENGTH=8192 # minimum recommended"
|
|
echo ""
|
|
echo " Example: launchctl setenv OLLAMA_MAX_LOADED_MODELS 2"
|
|
echo " Then restart Ollama."
|