2c282dccad
- set up a GHA scheduled job to build the container nightly - enabling pushing a llama-swap:unified and a llama-swap:unified-Y-M-D image to ghcr.io - tidy up Dockerfile to use a non-root user and llama-swap as an entry point
34 lines
777 B
YAML
34 lines
777 B
YAML
# placeholder example configuration
|
|
healthCheckTimeout: 300
|
|
logRequests: true
|
|
|
|
models:
|
|
"llama":
|
|
cmd: >
|
|
llama-server
|
|
-hf bartowski/Qwen2.5-0.5B-Instruct-GGUF:Q4_K_M
|
|
--port ${PORT}
|
|
|
|
"whisper":
|
|
checkEndpoint: /v1/audio/transcriptions/
|
|
cmd: >
|
|
whisper-server
|
|
--port ${PORT}
|
|
--m /models/whisper.bin
|
|
--flash-attn
|
|
--request-path /v1/audio/transcriptions --inference-path ""
|
|
|
|
"image":
|
|
checkEndpoint: /
|
|
cmd: |
|
|
/app/sd-server
|
|
--listen-port 9999
|
|
--diffusion-fa
|
|
--diffusion-model /models/z_image_turbo-Q8_0.gguf
|
|
--vae /models/ae.safetensors
|
|
--llm /models/qwen3-4b-instruct-2507-q8_0.gguf
|
|
--offload-to-cpu
|
|
--cfg-scale 1.0
|
|
--height 512 --width 512
|
|
--steps 8
|