diff --git a/model.cpp b/model.cpp index 71b3c1bb..3adbec9f 100644 --- a/model.cpp +++ b/model.cpp @@ -1296,7 +1296,7 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb) { if (backend == NULL || ggml_backend_is_cpu(backend)) { // for the CPU and Metal backend, we can copy directly into the tensor if (tensor_storage.type == dst_tensor->type) { - GGML_ASSERT(ggml_nbytes(dst_tensor) == nbytes_to_read); + GGML_ASSERT(ggml_nbytes(dst_tensor) == tensor_storage.nbytes()); read_data(tensor_storage, (char*)dst_tensor->data, nbytes_to_read); if (tensor_storage.is_bf16) { @@ -1349,16 +1349,23 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb) { return success; } -int64_t ModelLoader::cal_mem_size() { +int64_t ModelLoader::cal_mem_size(ggml_backend_t backend) { + size_t alignment = 128; + if (backend != NULL) { + alignment = ggml_backend_get_alignment(backend); + } int64_t mem_size = 0; + std::vector processed_tensor_storages; for (auto& tensor_storage : tensor_storages) { if (is_unused_tensor(tensor_storage.name)) { continue; } + preprocess_tensor(tensor_storage, processed_tensor_storages); + } - mem_size += tensor_storage.nbytes(); - mem_size += GGML_MEM_ALIGN * 2; // for lora alphas + for (auto& tensor_storage : processed_tensor_storages) { + mem_size += tensor_storage.nbytes() + alignment; } - return mem_size + 10 * 1024 * 1024; + return mem_size; } diff --git a/model.h b/model.h index 349a0279..6f27cdbf 100644 --- a/model.h +++ b/model.h @@ -8,6 +8,7 @@ #include #include "ggml/ggml.h" +#include "ggml/ggml-backend.h" #include "json.hpp" #include "zip.h" @@ -116,7 +117,7 @@ class ModelLoader { ggml_type get_sd_wtype(); bool load_vocab(on_new_token_cb_t on_new_token_cb); bool load_tensors(on_new_tensor_cb_t on_new_tensor_cb); - int64_t cal_mem_size(); + int64_t cal_mem_size(ggml_backend_t backend); ~ModelLoader() = default; }; #endif // __MODEL_H__ \ No newline at end of file diff --git a/stable-diffusion.cpp b/stable-diffusion.cpp index 6d97cbfa..53609c87 100644 --- a/stable-diffusion.cpp +++ b/stable-diffusion.cpp @@ -4016,7 +4016,7 @@ struct LoraModel { ggml_type wtype = model_loader.get_sd_wtype(); LOG_DEBUG("calculating buffer size"); - int64_t memory_buffer_size = model_loader.cal_mem_size(); + int64_t memory_buffer_size = model_loader.cal_mem_size(backend); LOG_DEBUG("lora params backend buffer size = % 6.2f MB", memory_buffer_size / (1024.0 * 1024.0)); params_buffer_lora = ggml_backend_alloc_buffer(backend, memory_buffer_size);