From dae7041a401c48984863db622919d84603469848 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alejandro=20Rodr=C3=ADguez=20Salamanca?= Date: Mon, 15 May 2023 22:33:16 +0200 Subject: [PATCH 01/18] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 727a86cb5..5cf1e6ee0 100644 --- a/README.md +++ b/README.md @@ -102,7 +102,7 @@ For straight Int8 matrix multiplication with mixed precision decomposition you c bnb.matmul(..., threshold=6.0) ``` -For instructions how to use LLM.int8() inference layers in your own code, see the TL;DR above or for extended instruction see [this blog post](https://github.com/huggingface/transformers). +For instructions how to use LLM.int8() inference layers in your own code, see the TL;DR above or for extended instruction see [this blog post](https://huggingface.co/blog/hf-bitsandbytes-integration). ### Using the 8-bit Optimizers From 6b26402b793525ec317fadff260548f5b36d936f Mon Sep 17 00:00:00 2001 From: Aarni Koskela Date: Thu, 25 May 2023 15:25:58 +0300 Subject: [PATCH 02/18] Fix typo "quanitze" --- benchmarking/switchback/speed_benchmark.py | 6 +++--- bitsandbytes/nn/triton_based_modules.py | 12 ++++++------ ...dequanitze.py => int8_matmul_mixed_dequantize.py} | 4 ++-- 3 files changed, 11 insertions(+), 11 deletions(-) rename bitsandbytes/triton/{int8_matmul_mixed_dequanitze.py => int8_matmul_mixed_dequantize.py} (98%) diff --git a/benchmarking/switchback/speed_benchmark.py b/benchmarking/switchback/speed_benchmark.py index 9ad991194..b0983d0b8 100644 --- a/benchmarking/switchback/speed_benchmark.py +++ b/benchmarking/switchback/speed_benchmark.py @@ -8,7 +8,7 @@ from bitsandbytes.triton.quantize_columnwise_and_transpose import quantize_columnwise_and_transpose from bitsandbytes.triton.int8_matmul_rowwise_dequantize import int8_matmul_rowwise_dequantize from bitsandbytes.triton.quantize_global import quantize_global, quantize_global_transpose -from bitsandbytes.triton.int8_matmul_mixed_dequanitze import int8_matmul_mixed_dequanitze +from bitsandbytes.triton.int8_matmul_mixed_dequantize import int8_matmul_mixed_dequantize # KNOW ISSUE: need to optimize "w_quantize_colwise_transpose" when embeddim is too large. @@ -72,8 +72,8 @@ def get_time(k, fn, info_dict): get_time('standard_gx', lambda : g.matmul(w), info) get_time('rowwise_fwd', lambda : int8_matmul_rowwise_dequantize(x_int8, w_int8.t(), state_x_rowwise, state_w_columnwise, None), info) get_time('rowwise_bwd', lambda : int8_matmul_rowwise_dequantize(g_int8, wt_int8.t(), state_x_rowwise, state_w_rowwise, None), info) - get_time('global_fwd', lambda : int8_matmul_mixed_dequanitze(x_int8, w_int8.t(), state_x_rowwise, state_w_global, None), info) - get_time('global_bwd', lambda : int8_matmul_mixed_dequanitze(g_int8, wt_int8.t(), state_x_rowwise, state_w_global, None), info) + get_time('global_fwd', lambda : int8_matmul_mixed_dequantize(x_int8, w_int8.t(), state_x_rowwise, state_w_global, None), info) + get_time('global_bwd', lambda : int8_matmul_mixed_dequantize(g_int8, wt_int8.t(), state_x_rowwise, state_w_global, None), info) get_time('x_quantize_rowwise', lambda : quantize_rowwise(x), info) get_time('g_quantize_rowwise', lambda : quantize_rowwise(g), info) get_time('w_quantize_rowwise', lambda : quantize_rowwise(w), info) diff --git a/bitsandbytes/nn/triton_based_modules.py b/bitsandbytes/nn/triton_based_modules.py index 6fbf583b9..de07ac647 100644 --- a/bitsandbytes/nn/triton_based_modules.py +++ b/bitsandbytes/nn/triton_based_modules.py @@ -10,7 +10,7 @@ from bitsandbytes.triton.quantize_columnwise_and_transpose import quantize_columnwise_and_transpose from bitsandbytes.triton.int8_matmul_rowwise_dequantize import int8_matmul_rowwise_dequantize from bitsandbytes.triton.quantize_global import quantize_global, quantize_global_transpose -from bitsandbytes.triton.int8_matmul_mixed_dequanitze import int8_matmul_mixed_dequanitze +from bitsandbytes.triton.int8_matmul_mixed_dequantize import int8_matmul_mixed_dequantize class _switchback_global(torch.autograd.Function): @@ -29,7 +29,7 @@ def forward(ctx, X_3D, W, bias): # matmult, fused dequant and add bias # call "mixed" because we are mixing rowwise quantized and global quantized - return int8_matmul_mixed_dequanitze( + return int8_matmul_mixed_dequantize( X_int8, W_int8.t(), state_X, state_W, bias ).view(*X_3D.size()[:-1], -1) @@ -47,7 +47,7 @@ def backward(ctx, G_3D): # so we transpose once then call .t() in the matmul G_int8, state_G = quantize_rowwise(G) W_int8, state_W = quantize_global_transpose(W) - grad_X = int8_matmul_mixed_dequanitze(G_int8, W_int8.t(), state_G, state_W, None).view( + grad_X = int8_matmul_mixed_dequantize(G_int8, W_int8.t(), state_G, state_W, None).view( *G_3D.size()[:-1], -1 ) if ctx.needs_input_grad[1]: @@ -119,7 +119,7 @@ def forward(ctx, X_3D, W, bias): # matmult, fused dequant and add bias # call "mixed" because we are mixing rowwise quantized and global quantized - return int8_matmul_mixed_dequanitze( + return int8_matmul_mixed_dequantize( X_int8, W_int8.t(), state_X, state_W, bias ).view(*X_3D_sz[:-1], -1) @@ -143,7 +143,7 @@ def backward(ctx, G_3D): G_int8, state_G = quantize_rowwise(G) del G W_int8 = W_int8.t().contiguous() - grad_X = int8_matmul_mixed_dequanitze(G_int8, W_int8.t(), state_G, state_W, None).view( + grad_X = int8_matmul_mixed_dequantize(G_int8, W_int8.t(), state_G, state_W, None).view( *G_3D_sz[:-1], -1 ) @@ -215,7 +215,7 @@ def forward(self, x): X_int8, self.W_int8.t(), state_X, self.state_W, self.bias ).view(*x.size()[:-1], -1) else: - return int8_matmul_mixed_dequanitze( + return int8_matmul_mixed_dequantize( X_int8, self.W_int8.t(), state_X, self.state_W, self.bias ).view(*x.size()[:-1], -1) diff --git a/bitsandbytes/triton/int8_matmul_mixed_dequanitze.py b/bitsandbytes/triton/int8_matmul_mixed_dequantize.py similarity index 98% rename from bitsandbytes/triton/int8_matmul_mixed_dequanitze.py rename to bitsandbytes/triton/int8_matmul_mixed_dequantize.py index 60a56e698..b0961f558 100644 --- a/bitsandbytes/triton/int8_matmul_mixed_dequanitze.py +++ b/bitsandbytes/triton/int8_matmul_mixed_dequantize.py @@ -2,7 +2,7 @@ from bitsandbytes.triton.triton_utils import is_triton_available if not is_triton_available(): - def int8_matmul_mixed_dequanitze(a, b, state_x, state_w, bias): return None + def int8_matmul_mixed_dequantize(a, b, state_x, state_w, bias): return None else: import triton @@ -136,7 +136,7 @@ def _int8_matmul_mixed_dequantize(A, B, C, bias, state_x_ptr, state_w_ptr, M, N, tl.atomic_add(C, acc, mask=mask) - def int8_matmul_mixed_dequanitze(a, b, state_x, state_w, bias): + def int8_matmul_mixed_dequantize(a, b, state_x, state_w, bias): device = a.device divfactor = 1. / (127. * 127.) has_bias = 0 if bias is None else 1 From ea0f7935717aa33aee56c4e5c7f149d35e08724a Mon Sep 17 00:00:00 2001 From: Pranav Date: Wed, 7 Jun 2023 12:51:31 -0700 Subject: [PATCH 03/18] Update README.md Changed misleading Hardware requirements from "2018 or older" to "2018 or newer" --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 727a86cb5..0488007b5 100644 --- a/README.md +++ b/README.md @@ -80,7 +80,7 @@ out = linear(x.to(torch.float16)) Requirements: anaconda, cudatoolkit, pytorch Hardware requirements: - - LLM.int8(): NVIDIA Turing (RTX 20xx; T4) or Ampere GPU (RTX 30xx; A4-A100); (a GPU from 2018 or older). + - LLM.int8(): NVIDIA Turing (RTX 20xx; T4) or Ampere GPU (RTX 30xx; A4-A100); (a GPU from 2018 or newer). - 8-bit optimizers and quantization: NVIDIA Kepler GPU or newer (>=GTX 78X). Supported CUDA versions: 10.2 - 12.0 From 237ad4920552dc04cbb4e6eac27aa57ad2c4456e Mon Sep 17 00:00:00 2001 From: dulalbert <113252426+dulalbert@users.noreply.github.com> Date: Tue, 20 Jun 2023 10:53:30 +0800 Subject: [PATCH 04/18] Added scipy to requirements.txt Added scipy to requirements.txt as it is used but not added to requirements --- requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements.txt b/requirements.txt index 883b2e42e..3bde2dc6a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,3 @@ lion-pytorch pytest +scipy From c2494a61a3810b307873e6bab08402c4d463c42a Mon Sep 17 00:00:00 2001 From: Neel Gupta Date: Tue, 4 Jul 2023 14:54:15 +0100 Subject: [PATCH 05/18] Fixed missing `Embedding` export --- bitsandbytes/nn/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bitsandbytes/nn/__init__.py b/bitsandbytes/nn/__init__.py index 49d7b5ced..6fa6d1183 100644 --- a/bitsandbytes/nn/__init__.py +++ b/bitsandbytes/nn/__init__.py @@ -2,5 +2,5 @@ # # This source code is licensed under the MIT license found in the # LICENSE file in the root directory of this source tree. -from .modules import Int8Params, Linear8bitLt, StableEmbedding, Linear4bit, LinearNF4, LinearFP4, Params4bit, OutlierAwareLinear, SwitchBackLinearBnb +from .modules import Int8Params, Linear8bitLt, StableEmbedding, Linear4bit, LinearNF4, LinearFP4, Params4bit, OutlierAwareLinear, SwitchBackLinearBnb, Embedding from .triton_based_modules import SwitchBackLinear, SwitchBackLinearGlobal, SwitchBackLinearVectorwise, StandardLinear From 5e266532a97f69b799c72a596d0ee4333aaae0c8 Mon Sep 17 00:00:00 2001 From: dulalbert <113252426+dulalbert@users.noreply.github.com> Date: Wed, 12 Jul 2023 10:22:02 +0800 Subject: [PATCH 06/18] Added scipy to install_requires --- setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.py b/setup.py index 51e747c65..91a63fddf 100644 --- a/setup.py +++ b/setup.py @@ -25,6 +25,7 @@ def read(fname): license="MIT", keywords="gpu optimizers optimization 8-bit quantization compression", url="https://github.com/TimDettmers/bitsandbytes", + install_requires=['scipy'], packages=find_packages(), package_data={"": libs}, long_description=read("README.md"), From 87816e4e9c90cfd559d42ad8bdcfe3b5fde18407 Mon Sep 17 00:00:00 2001 From: Ikko Eltociear Ashimine Date: Tue, 18 Jul 2023 00:44:17 +0900 Subject: [PATCH 07/18] Fix typo in test_optim.py paramters -> parameters --- tests/test_optim.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_optim.py b/tests/test_optim.py index 9e90083a9..49d4f442a 100644 --- a/tests/test_optim.py +++ b/tests/test_optim.py @@ -169,7 +169,7 @@ def test_optimizer32bit(dim1, dim2, gtype, optim_name): if gtype != torch.float32: # the adam buffers should also be close because they are 32-bit - # but the paramters can diverge because they are 16-bit + # but the parameters can diverge because they are 16-bit # the difference grow larger and larger with each update # --> copy the state to keep weights close p1.data = p1.data.to(p2.dtype).float() From 4b0e401c6575dd521374d1da4dcfc977f4d056e8 Mon Sep 17 00:00:00 2001 From: Cebtenzzre Date: Thu, 20 Jul 2023 21:33:51 -0400 Subject: [PATCH 08/18] improve `make clean` target Make `make clean` remove all build artifacts, and do not warn about CUDA_VERSION when simply running 'clean'. Fixes #532 --- Makefile | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/Makefile b/Makefile index 7ccbcb191..5f997a122 100644 --- a/Makefile +++ b/Makefile @@ -8,9 +8,11 @@ ifeq ($(CUDA_HOME),) endif ifndef CUDA_VERSION +ifneq ($(MAKECMDGOALS),clean) $(warning WARNING: CUDA_VERSION not set. Call make with CUDA string, for example: make cuda11x CUDA_VERSION=115 or make cpuonly CUDA_VERSION=CPU) CUDA_VERSION:= endif +endif @@ -135,10 +137,5 @@ $(ROOT_DIR)/dependencies/cub: cd dependencies/cub; git checkout 1.11.0 clean: - rm build/* - -cleaneggs: - rm -rf *.egg* - -cleanlibs: - rm ./bitsandbytes/libbitsandbytes*.so + rm -rf build/* *.egg* + rm -f bitsandbytes/libbitsandbytes*.so From b4bc3369a0831269e82051ecbefec6b9262bba66 Mon Sep 17 00:00:00 2001 From: Keith Stevens Date: Fri, 21 Jul 2023 14:30:03 +0900 Subject: [PATCH 09/18] Make sure bitsandbytes handles permission errors in the right order --- bitsandbytes/cuda_setup/main.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/bitsandbytes/cuda_setup/main.py b/bitsandbytes/cuda_setup/main.py index 3b0097100..3dd16ff9f 100644 --- a/bitsandbytes/cuda_setup/main.py +++ b/bitsandbytes/cuda_setup/main.py @@ -196,11 +196,13 @@ def remove_non_existent_dirs(candidate_paths: Set[Path]) -> Set[Path]: try: if path.exists(): existent_directories.add(path) + except PermissionError as pex: + # Handle the PermissionError first as it is a subtype of OSError + # https://docs.python.org/3/library/exceptions.html#exception-hierarchy + pass except OSError as exc: if exc.errno != errno.ENAMETOOLONG: raise exc - except PermissionError as pex: - pass non_existent_directories: Set[Path] = candidate_paths - existent_directories if non_existent_directories: @@ -361,4 +363,4 @@ def evaluate_cuda_setup(): "if not has_cublaslt (CC < 7.5), then we have to choose _nocublaslt.so" binary_name = f"libbitsandbytes_cuda{cuda_version_string}_nocublaslt.so" - return binary_name, cudart_path, cc, cuda_version_string \ No newline at end of file + return binary_name, cudart_path, cc, cuda_version_string From d76b6ca91b827b5c522bb794d96628d290ee29f6 Mon Sep 17 00:00:00 2001 From: Apoorva Kulkarni Date: Fri, 21 Jul 2023 23:20:44 -0700 Subject: [PATCH 10/18] doc: Fix typo in how_to_use_nonpytorch_cuda.md --- how_to_use_nonpytorch_cuda.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/how_to_use_nonpytorch_cuda.md b/how_to_use_nonpytorch_cuda.md index 7b717f41c..09dcac7c7 100644 --- a/how_to_use_nonpytorch_cuda.md +++ b/how_to_use_nonpytorch_cuda.md @@ -21,7 +21,7 @@ wget https://raw.githubusercontent.com/TimDettmers/bitsandbytes/main/cuda_instal # EXPORT_TO_BASH in {0, 1} with 0=False and 1=True # For example, the following installs CUDA 11.7 to ~/local/cuda-11.7 and exports the path to your .bashrc -bash cuda install 117 ~/local 1 +bash cuda_install.sh 117 ~/local 1 ``` ## Setting the environmental variables BNB_CUDA_VERSION, and LD_LIBRARY_PATH From a51840faa4c305951eb83c088f3e9c484eff6e87 Mon Sep 17 00:00:00 2001 From: Osma Suominen Date: Fri, 4 Aug 2023 10:32:05 +0300 Subject: [PATCH 11/18] Robustness fix: don't break in case of directories without read permission --- bitsandbytes/cuda_setup/main.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/bitsandbytes/cuda_setup/main.py b/bitsandbytes/cuda_setup/main.py index f3edf4c73..0a0a6defa 100644 --- a/bitsandbytes/cuda_setup/main.py +++ b/bitsandbytes/cuda_setup/main.py @@ -214,8 +214,11 @@ def get_cuda_runtime_lib_paths(candidate_paths: Set[Path]) -> Set[Path]: paths = set() for libname in CUDA_RUNTIME_LIBS: for path in candidate_paths: - if (path / libname).is_file(): - paths.add(path / libname) + try: + if (path / libname).is_file(): + paths.add(path / libname) + except PermissionError: + pass return paths From 91c7518dd8a18572dbb8cc471784c24ac1c0e351 Mon Sep 17 00:00:00 2001 From: rasbt Date: Tue, 15 Aug 2023 19:14:09 -0500 Subject: [PATCH 12/18] add version attribute as per Python convention --- bitsandbytes/__init__.py | 2 ++ setup.py | 5 ++++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/bitsandbytes/__init__.py b/bitsandbytes/__init__.py index f35a3b582..3608de1ec 100644 --- a/bitsandbytes/__init__.py +++ b/bitsandbytes/__init__.py @@ -24,4 +24,6 @@ "optim.optimizer.MockArgs": False, } +__version__ = "0.41.1" + PACKAGE_GITHUB_URL = "https://github.com/TimDettmers/bitsandbytes" diff --git a/setup.py b/setup.py index d6267088e..405c161a7 100644 --- a/setup.py +++ b/setup.py @@ -6,6 +6,9 @@ import os from setuptools import find_packages, setup +import bitsandbytes as bnb + +VERSION = bnb.__version__ libs = list(glob.glob("./bitsandbytes/libbitsandbytes*.so")) libs = [os.path.basename(p) for p in libs] @@ -18,7 +21,7 @@ def read(fname): setup( name=f"bitsandbytes", - version=f"0.41.1", + version=VERSION, author="Tim Dettmers", author_email="dettmers@cs.washington.edu", description="k-bit optimizers and matrix multiplication routines.", From 09cd03d70032f9c0b8319d9eaa79b45602d81912 Mon Sep 17 00:00:00 2001 From: Alex Pilon Date: Thu, 17 Aug 2023 12:51:23 -0400 Subject: [PATCH 13/18] Add env var related to google systems to ignored list --- bitsandbytes/cuda_setup/env_vars.py | 1 + 1 file changed, 1 insertion(+) diff --git a/bitsandbytes/cuda_setup/env_vars.py b/bitsandbytes/cuda_setup/env_vars.py index 4fcb643ee..214ae9c58 100644 --- a/bitsandbytes/cuda_setup/env_vars.py +++ b/bitsandbytes/cuda_setup/env_vars.py @@ -19,6 +19,7 @@ def to_be_ignored(env_var: str, value: str) -> bool: "PATH", # this is for finding binaries, not libraries "LESSOPEN", # related to the `less` command "LESSCLOSE", + "GOOGLE_VM_CONFIG_LOCK_FILE", # Google Cloud stuff, contains root only paths "_", # current Python interpreter } return env_var in ignorable From 9f293ffc92400fffc6da38b84328cbaca2ebc40a Mon Sep 17 00:00:00 2001 From: Merve Noyan Date: Thu, 24 Aug 2023 13:10:45 +0300 Subject: [PATCH 14/18] Small fix to README --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 727a86cb5..63cf3c1f4 100644 --- a/README.md +++ b/README.md @@ -38,7 +38,7 @@ python setup.py install ```python from transformers import AutoModelForCausalLM model = AutoModelForCausalLM.from_pretrained( - 'decapoda-research/llama-7b-hf, + 'decapoda-research/llama-7b-hf', device_map='auto', load_in_8bit=True, max_memory=f'{int(torch.cuda.mem_get_info()[0]/1024**3)-2}GB') @@ -119,7 +119,7 @@ torch.nn.Embedding(...) -> bnb.nn.StableEmbedding(...) # recommended for NLP mo ``` Note that by default all parameter tensors with less than 4096 elements are kept at 32-bit even if you initialize those parameters with 8-bit optimizers. This is done since such small tensors do not save much memory and often contain highly variable parameters (biases) or parameters that require high precision (batch norm, layer norm). You can change this behavior like so: -``` +```python # parameter tensors with less than 16384 values are optimized in 32-bit # it is recommended to use multiplies of 4096 adam = bnb.optim.Adam8bit(model.parameters(), min_8bit_size=16384) From fea5bc7b83deb971f9aa520d0fcc93fbaa588110 Mon Sep 17 00:00:00 2001 From: Donato Riccio <71320919+reese3222@users.noreply.github.com> Date: Sat, 26 Aug 2023 09:42:19 +0200 Subject: [PATCH 15/18] Fixed wget link for installing cuda Wget now downloads the correct raw file from github --- bitsandbytes/cuda_setup/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bitsandbytes/cuda_setup/main.py b/bitsandbytes/cuda_setup/main.py index f3edf4c73..1ffcde352 100644 --- a/bitsandbytes/cuda_setup/main.py +++ b/bitsandbytes/cuda_setup/main.py @@ -64,7 +64,7 @@ def generate_instructions(self): self.add_log_entry('CUDA SETUP: Solution 1b): Once the library is found add it to the LD_LIBRARY_PATH: export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:FOUND_PATH_FROM_1a') self.add_log_entry('CUDA SETUP: Solution 1c): For a permanent solution add the export from 1b into your .bashrc file, located at ~/.bashrc') self.add_log_entry('CUDA SETUP: Solution 2: If no library was found in step 1a) you need to install CUDA.') - self.add_log_entry('CUDA SETUP: Solution 2a): Download CUDA install script: wget https://github.com/TimDettmers/bitsandbytes/blob/main/cuda_install.sh') + self.add_log_entry('CUDA SETUP: Solution 2a): Download CUDA install script: wget https://raw.githubusercontent.com/TimDettmers/bitsandbytes/main/cuda_install.sh') self.add_log_entry('CUDA SETUP: Solution 2b): Install desired CUDA version to desired location. The syntax is bash cuda_install.sh CUDA_VERSION PATH_TO_INSTALL_INTO.') self.add_log_entry('CUDA SETUP: Solution 2b): For example, "bash cuda_install.sh 113 ~/local/" will download CUDA 11.3 and install into the folder ~/local') return From 57cfbcb710aa752c08c7617f873cc5ba89005ca7 Mon Sep 17 00:00:00 2001 From: John B Nelson Date: Sun, 3 Sep 2023 12:09:26 -0700 Subject: [PATCH 16/18] FIX missing closing quote in README example --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 727a86cb5..a2b1b53d6 100644 --- a/README.md +++ b/README.md @@ -38,7 +38,7 @@ python setup.py install ```python from transformers import AutoModelForCausalLM model = AutoModelForCausalLM.from_pretrained( - 'decapoda-research/llama-7b-hf, + 'decapoda-research/llama-7b-hf', device_map='auto', load_in_8bit=True, max_memory=f'{int(torch.cuda.mem_get_info()[0]/1024**3)-2}GB') From 04e20899a9cf4cc10915d89f651ed9f5df976ccc Mon Sep 17 00:00:00 2001 From: Michael Mior Date: Fri, 20 Oct 2023 14:48:40 -0400 Subject: [PATCH 17/18] Fix parameter name in error message --- bitsandbytes/nn/modules.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bitsandbytes/nn/modules.py b/bitsandbytes/nn/modules.py index 3d34bb45f..0e0e816be 100644 --- a/bitsandbytes/nn/modules.py +++ b/bitsandbytes/nn/modules.py @@ -218,10 +218,10 @@ def set_compute_type(self, x): if self.compute_dtype == torch.float32 and (x.numel() == x.shape[-1]): # single batch inference with input torch.float16 and compute_dtype float32 -> slow inference when it could be fast # warn the user about this - warnings.warn(f'Input type into Linear4bit is torch.float16, but bnb_4bit_compute_type=torch.float32 (default). This will lead to slow inference.') + warnings.warn(f'Input type into Linear4bit is torch.float16, but bnb_4bit_compute_dtype=torch.float32 (default). This will lead to slow inference.') warnings.filterwarnings('ignore', message='.*inference.') if self.compute_dtype == torch.float32 and (x.numel() != x.shape[-1]): - warnings.warn(f'Input type into Linear4bit is torch.float16, but bnb_4bit_compute_type=torch.float32 (default). This will lead to slow inference or training speed.') + warnings.warn(f'Input type into Linear4bit is torch.float16, but bnb_4bit_compute_dtype=torch.float32 (default). This will lead to slow inference or training speed.') warnings.filterwarnings('ignore', message='.*inference or training') From b5800d35775eed3696326b5307b257fd5066873e Mon Sep 17 00:00:00 2001 From: Sebastian Raschka Date: Mon, 11 Dec 2023 16:39:33 -0600 Subject: [PATCH 18/18] Update __init__.py --- bitsandbytes/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bitsandbytes/__init__.py b/bitsandbytes/__init__.py index 3608de1ec..d77116849 100644 --- a/bitsandbytes/__init__.py +++ b/bitsandbytes/__init__.py @@ -24,6 +24,6 @@ "optim.optimizer.MockArgs": False, } -__version__ = "0.41.1" +__version__ = "0.41.3.post1" PACKAGE_GITHUB_URL = "https://github.com/TimDettmers/bitsandbytes"