services: tts: build: context: . dockerfile: TTS.dockerfile ports: - "8000:8000" # TTS server. command: ["bash", "-c", "source venv/bin/activate && cd RealtimeTTS/example_fast_api && python server.py"] env_file: .env deploy: resources: reservations: devices: - driver: nvidia count: 1 capabilities: [gpu] stt: build: context: . dockerfile: STT.dockerfile ports: - "8011:8011" # STT server control. - "8012:8012" # STT server data. command: ["bash", "-c", "source venv/bin/activate && stt-server --silero_deactivity_detection"] deploy: resources: reservations: devices: - driver: nvidia count: 1 capabilities: [gpu] llm: build: context: . dockerfile: LLM.dockerfile ports: - "8013:8012" # STT server data. - "65432:65432" # TTS client server. volumes: - .:/app command: ["bash", "-c", "source venv/bin/activate && python /app/main.py"] depends_on: - tts - stt - ollama deploy: resources: reservations: devices: - driver: nvidia count: 1 capabilities: [gpu] ollama: volumes: - ./ollama/ollama:/root/.ollama image: ollama/ollama:latest ports: - 7869:11434 environment: - OLLAMA_KEEP_ALIVE=24h deploy: resources: reservations: devices: - driver: nvidia count: 1 capabilities: [gpu] ollama-webui: image: ghcr.io/open-webui/open-webui:main volumes: - ./ollama/ollama-webui:/app/backend/data depends_on: - ollama ports: - 8080:8080 environment: - OLLAMA_BASE_URLS=http://host.docker.internal:7869 - ENV=dev - WEBUI_AUTH=False - WEBUI_NAME=WebUI - WEBUI_URL=http://localhost:8080 - WEBUI_SECRET_KEY=t0p-s3cr3t extra_hosts: - host.docker.internal:host-gateway