ctx size and mmap
This commit is contained in:
@@ -1,6 +1,6 @@
|
|||||||
{self, inputs, ...}:{
|
{self, inputs, ...}:{
|
||||||
flake.nixosModules.llama = { lib, pkgs, ... }: let
|
flake.nixosModules.llama = { lib, pkgs, ... }: let
|
||||||
llama_cmd = "${pkgs.llama-cpp-cuda}/bin/llama-server --port \${PORT}";
|
llama_cmd = "${pkgs.llama-cpp-cuda}/bin/llama-server --port \${PORT} --mmap";
|
||||||
sd_cmd = "${pkgs.stable-diffusion-cpp-cuda}/bin/sd-server --listen-port \${PORT}";
|
sd_cmd = "${pkgs.stable-diffusion-cpp-cuda}/bin/sd-server --listen-port \${PORT}";
|
||||||
chat_models = "/var/AI/Models/Chat/ggufs";
|
chat_models = "/var/AI/Models/Chat/ggufs";
|
||||||
sd_models = "/var/AI/Models/Art";
|
sd_models = "/var/AI/Models/Art";
|
||||||
@@ -22,7 +22,7 @@ in {
|
|||||||
models = {
|
models = {
|
||||||
"magidonia" = {
|
"magidonia" = {
|
||||||
ttl = 3600;
|
ttl = 3600;
|
||||||
cmd = "${llama_cmd} -m ${chat_models}/Magidonia-24B-v4.3-Q4_K_M.gguf";
|
cmd = "${llama_cmd} -m ${chat_models}/Magidonia-24B-v4.3-Q4_K_M.gguf --ctx-size 131072";
|
||||||
};
|
};
|
||||||
"cyberrealistic" = {
|
"cyberrealistic" = {
|
||||||
ttl = 3600;
|
ttl = 3600;
|
||||||
|
|||||||
Reference in New Issue
Block a user