Skip to content

Commit

Permalink
Merge pull request #89 from noskill/ref
Browse files Browse the repository at this point in the history
use sequential cpu offload for limited gpu ram
  • Loading branch information
Necr0x0Der authored Dec 17, 2024
2 parents f112432 + 7154d5a commit 05f2c07
Show file tree
Hide file tree
Showing 3 changed files with 11 additions and 5 deletions.
12 changes: 9 additions & 3 deletions multigen/worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,9 +64,15 @@ def _get_pipeline(self, pipe_class, model_id, model_type, cnet=None, quantize_dt
# use quantisation by default for now
cls = pipe_class._classflux
if device.type == 'cuda':
quantize_dtype = qfloat8
# offload_device = device.index
# device = torch.device('cpu')
mb = torch.cuda.get_device_properties(device.index).total_memory / 1024 / 1024
# quantize if there is more than 23 GB of memory
# if less use cpu offload
if 23000 < mb:
self.logger.debug(f"set quantisation for the pipe on cuda:{device.index} has {mb}Mb")
quantize_dtype = qfloat8
else:
offload_device = device.index
device = torch.device('cpu')
else:
cls = pipe_class._class
pipeline = self._loader.load_pipeline(cls, model_id, torch_dtype=torch.bfloat16,
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,4 @@ psutil
sentencepiece
protobuf
accelerate
optimum-quanto
optimum-quanto==0.2.4
2 changes: 1 addition & 1 deletion tests/test_worker_flux.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ def on_new_image(*args, **kwargs):
nonlocal c
c += 1

num_runs = 25
num_runs = 15
for i in range(num_runs):
if len(sessions) - 1 < i:
i %= len(sessions)
Expand Down

0 comments on commit 05f2c07

Please sign in to comment.