fix cal mem size

leejet · Dec 4, 2023 · 689bc26 · 689bc26
1 parent 40f1c4a
commit 689bc26
Show file tree

Hide file tree

Showing 3 changed files with 15 additions and 7 deletions.
diff --git a/model.cpp b/model.cpp
@@ -1296,7 +1296,7 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb) {
             if (backend == NULL || ggml_backend_is_cpu(backend)) {
                 // for the CPU and Metal backend, we can copy directly into the tensor
                 if (tensor_storage.type == dst_tensor->type) {
-                    GGML_ASSERT(ggml_nbytes(dst_tensor) == nbytes_to_read);
+                    GGML_ASSERT(ggml_nbytes(dst_tensor) == tensor_storage.nbytes());
                     read_data(tensor_storage, (char*)dst_tensor->data, nbytes_to_read);
 
                     if (tensor_storage.is_bf16) {
@@ -1349,16 +1349,23 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb) {
     return success;
 }
 
-int64_t ModelLoader::cal_mem_size() {
+int64_t ModelLoader::cal_mem_size(ggml_backend_t backend) {
+    size_t alignment = 128;
+    if (backend != NULL) {
+        alignment = ggml_backend_get_alignment(backend);
+    }
     int64_t mem_size = 0;
+    std::vector<TensorStorage> processed_tensor_storages;
     for (auto& tensor_storage : tensor_storages) {
         if (is_unused_tensor(tensor_storage.name)) {
             continue;
         }
+        preprocess_tensor(tensor_storage, processed_tensor_storages);
+    }
 
-        mem_size += tensor_storage.nbytes();
-        mem_size += GGML_MEM_ALIGN * 2;  // for lora alphas
+    for (auto& tensor_storage : processed_tensor_storages) {
+        mem_size += tensor_storage.nbytes() + alignment;
     }
 
-    return mem_size + 10 * 1024 * 1024;
+    return mem_size;
 }
diff --git a/model.h b/model.h
@@ -8,6 +8,7 @@
 #include <vector>
 
 #include "ggml/ggml.h"
+#include "ggml/ggml-backend.h"
 #include "json.hpp"
 #include "zip.h"
 
@@ -116,7 +117,7 @@ class ModelLoader {
     ggml_type get_sd_wtype();
     bool load_vocab(on_new_token_cb_t on_new_token_cb);
     bool load_tensors(on_new_tensor_cb_t on_new_tensor_cb);
-    int64_t cal_mem_size();
+    int64_t cal_mem_size(ggml_backend_t backend);
     ~ModelLoader() = default;
 };
 #endif  // __MODEL_H__
diff --git a/stable-diffusion.cpp b/stable-diffusion.cpp
@@ -4016,7 +4016,7 @@ struct LoraModel {
         ggml_type wtype = model_loader.get_sd_wtype();
 
         LOG_DEBUG("calculating buffer size");
-        int64_t memory_buffer_size = model_loader.cal_mem_size();
+        int64_t memory_buffer_size = model_loader.cal_mem_size(backend);
         LOG_DEBUG("lora params backend buffer size = % 6.2f MB", memory_buffer_size / (1024.0 * 1024.0));
 
         params_buffer_lora = ggml_backend_alloc_buffer(backend, memory_buffer_size);