.github,docker/unified: improve caching and fix bugs (#598)

- set up a GHA scheduled job to build the container nightly - enabling pushing a llama-swap:unified and a llama-swap:unified-Y-M-D image to ghcr.io - tidy up Dockerfile to use a non-root user and llama-swap as an entry point
2026-03-23 22:24:40 +09:00
parent 916d13f5bd
commit 2c282dccad
5 changed files with 100 additions and 34 deletions
@@ -0,0 +1,33 @@
+# placeholder example configuration
+healthCheckTimeout: 300
+logRequests: true
+
+models:
+  "llama":
+    cmd: >
+      llama-server
+      -hf bartowski/Qwen2.5-0.5B-Instruct-GGUF:Q4_K_M
+      --port ${PORT}
+
+  "whisper":
+    checkEndpoint: /v1/audio/transcriptions/
+    cmd: >
+      whisper-server
+      --port ${PORT}
+      --m /models/whisper.bin
+      --flash-attn
+      --request-path /v1/audio/transcriptions --inference-path ""
+
+  "image":
+    checkEndpoint: /
+    cmd: |
+      /app/sd-server
+      --listen-port 9999
+      --diffusion-fa
+      --diffusion-model /models/z_image_turbo-Q8_0.gguf
+      --vae /models/ae.safetensors
+      --llm /models/qwen3-4b-instruct-2507-q8_0.gguf
+      --offload-to-cpu
+      --cfg-scale 1.0
+      --height 512 --width 512
+      --steps 8