ctx size and mmap
This commit is contained in:
@@ -1,6 +1,6 @@
|
||||
{self, inputs, ...}:{
|
||||
flake.nixosModules.llama = { lib, pkgs, ... }: let
|
||||
llama_cmd = "${pkgs.llama-cpp-cuda}/bin/llama-server --port \${PORT}";
|
||||
llama_cmd = "${pkgs.llama-cpp-cuda}/bin/llama-server --port \${PORT} --mmap";
|
||||
sd_cmd = "${pkgs.stable-diffusion-cpp-cuda}/bin/sd-server --listen-port \${PORT}";
|
||||
chat_models = "/var/AI/Models/Chat/ggufs";
|
||||
sd_models = "/var/AI/Models/Art";
|
||||
@@ -22,7 +22,7 @@ in {
|
||||
models = {
|
||||
"magidonia" = {
|
||||
ttl = 3600;
|
||||
cmd = "${llama_cmd} -m ${chat_models}/Magidonia-24B-v4.3-Q4_K_M.gguf";
|
||||
cmd = "${llama_cmd} -m ${chat_models}/Magidonia-24B-v4.3-Q4_K_M.gguf --ctx-size 131072";
|
||||
};
|
||||
"cyberrealistic" = {
|
||||
ttl = 3600;
|
||||
|
||||
Reference in New Issue
Block a user