ctx size and mmap

This commit is contained in:
2026-02-14 20:28:57 -06:00
parent c37a25dddc
commit e8e2284d7f

View File

@@ -1,6 +1,6 @@
{self, inputs, ...}:{
flake.nixosModules.llama = { lib, pkgs, ... }: let
llama_cmd = "${pkgs.llama-cpp-cuda}/bin/llama-server --port \${PORT}";
llama_cmd = "${pkgs.llama-cpp-cuda}/bin/llama-server --port \${PORT} --mmap";
sd_cmd = "${pkgs.stable-diffusion-cpp-cuda}/bin/sd-server --listen-port \${PORT}";
chat_models = "/var/AI/Models/Chat/ggufs";
sd_models = "/var/AI/Models/Art";
@@ -22,7 +22,7 @@ in {
models = {
"magidonia" = {
ttl = 3600;
cmd = "${llama_cmd} -m ${chat_models}/Magidonia-24B-v4.3-Q4_K_M.gguf";
cmd = "${llama_cmd} -m ${chat_models}/Magidonia-24B-v4.3-Q4_K_M.gguf --ctx-size 131072";
};
"cyberrealistic" = {
ttl = 3600;