{self, inputs, ...}:{ flake.nixosModules.llama = { lib, pkgs, ... }: let llama_cmd = "${pkgs.llama-cpp-cuda}/bin/llama-server --port \${PORT} --mmap"; sd_cmd = "${pkgs.stable-diffusion-cpp-cuda}/bin/sd-server --listen-port \${PORT}"; chat_models = "/var/AI/Models/Chat/ggufs"; sd_models = "/var/AI/Models/Art"; in { nixpkgs.overlays = [ (final: prev: { llama-cpp-cuda = prev.llama-cpp.override { cudaSupport = true; blasSupport = true; }; }) ]; services.llama-swap = { enable = true; port = 9001; openFirewall = true; listenAddress = "0.0.0.0"; settings = { models = { "magidonia" = { ttl = 3600; cmd = "${llama_cmd} -m ${chat_models}/Magidonia-24B-v4.3-Q4_K_M.gguf --ctx-size 131072"; }; "cyberrealistic" = { ttl = 3600; cmd = "${sd_cmd} --model ${sd_models}/cyberrealisticPony_semiRealV45.safetensors"; }; }; }; }; environment.systemPackages = with pkgs; [ llama-cpp-cuda stable-diffusion-cpp-cuda ]; }; }