Skip to content

Commit

Permalink
fix cal mem size
Browse files Browse the repository at this point in the history
  • Loading branch information
leejet committed Dec 4, 2023
1 parent 40f1c4a commit 689bc26
Show file tree
Hide file tree
Showing 3 changed files with 15 additions and 7 deletions.
17 changes: 12 additions & 5 deletions model.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1296,7 +1296,7 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb) {
if (backend == NULL || ggml_backend_is_cpu(backend)) {
// for the CPU and Metal backend, we can copy directly into the tensor
if (tensor_storage.type == dst_tensor->type) {
GGML_ASSERT(ggml_nbytes(dst_tensor) == nbytes_to_read);
GGML_ASSERT(ggml_nbytes(dst_tensor) == tensor_storage.nbytes());
read_data(tensor_storage, (char*)dst_tensor->data, nbytes_to_read);

if (tensor_storage.is_bf16) {
Expand Down Expand Up @@ -1349,16 +1349,23 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb) {
return success;
}

int64_t ModelLoader::cal_mem_size() {
int64_t ModelLoader::cal_mem_size(ggml_backend_t backend) {
size_t alignment = 128;
if (backend != NULL) {
alignment = ggml_backend_get_alignment(backend);
}
int64_t mem_size = 0;
std::vector<TensorStorage> processed_tensor_storages;
for (auto& tensor_storage : tensor_storages) {
if (is_unused_tensor(tensor_storage.name)) {
continue;
}
preprocess_tensor(tensor_storage, processed_tensor_storages);
}

mem_size += tensor_storage.nbytes();
mem_size += GGML_MEM_ALIGN * 2; // for lora alphas
for (auto& tensor_storage : processed_tensor_storages) {
mem_size += tensor_storage.nbytes() + alignment;
}

return mem_size + 10 * 1024 * 1024;
return mem_size;
}
3 changes: 2 additions & 1 deletion model.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#include <vector>

#include "ggml/ggml.h"
#include "ggml/ggml-backend.h"
#include "json.hpp"
#include "zip.h"

Expand Down Expand Up @@ -116,7 +117,7 @@ class ModelLoader {
ggml_type get_sd_wtype();
bool load_vocab(on_new_token_cb_t on_new_token_cb);
bool load_tensors(on_new_tensor_cb_t on_new_tensor_cb);
int64_t cal_mem_size();
int64_t cal_mem_size(ggml_backend_t backend);
~ModelLoader() = default;
};
#endif // __MODEL_H__
2 changes: 1 addition & 1 deletion stable-diffusion.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4016,7 +4016,7 @@ struct LoraModel {
ggml_type wtype = model_loader.get_sd_wtype();

LOG_DEBUG("calculating buffer size");
int64_t memory_buffer_size = model_loader.cal_mem_size();
int64_t memory_buffer_size = model_loader.cal_mem_size(backend);
LOG_DEBUG("lora params backend buffer size = % 6.2f MB", memory_buffer_size / (1024.0 * 1024.0));

params_buffer_lora = ggml_backend_alloc_buffer(backend, memory_buffer_size);
Expand Down

0 comments on commit 689bc26

Please sign in to comment.