{self, inputs, ...}: { flake.nixosModules.llama = { lib, pkgs, ... }: { nixpkgs.overlays = [ (final: prev: { llama-cpp-cuda = prev.llama-cpp.override { cudaSupport = true; blasSupport = true; }; }) ]; services.llama-swap = { enable = true; port = 9001; openFirewall = true; listenAddress = "0.0.0.0"; settings = { models = { "magidonia" = { ttl = 3600; cmd = "${pkgs.llama-cpp-cuda}/bin/llama-server --port \${PORT} -m /var/AI/Models/Chat/ggufs/Magidonia-24B-v4.3-Q4_K_M.gguf"; }; }; }; }; environment.systemPackages = with pkgs; [ llama-cpp-cuda ]; }; }