rapidsai · oliverholworthy · Oct 21, 2023 · Oct 24, 2023 · Oct 24, 2023 · Oct 24, 2023
diff --git a/.github/workflows/base.yml b/.github/workflows/base.yml
@@ -28,5 +28,4 @@ jobs:
         pip install -e '.[dev]'
     - name: Run tests
       run: |
-        pytest --cov="crossfit"
-
+        pytest -m "not (singlegpu or multigpu)" --cov="crossfit"
diff --git a/.github/workflows/cf_backends.yml b/.github/workflows/cf_backends.yml
@@ -37,7 +37,7 @@ jobs:
           python setup.py develop
       - name: Run Pytorch tests
         run: |
-          pytest --cov="crossfit/array" -m "pytorch"
+          pytest --cov="crossfit/array" -m "pytorch and not (singlegpu or multigpu)"
 
   # jax:
   #   runs-on: ${{ matrix.os }}
@@ -66,4 +66,4 @@ jobs:
   #         python setup.py develop
   #     - name: Run Jax tests
   #       run: |
-  #         pytest --cov="crossfit/array" -m "jax"
+  #         pytest --cov="crossfit/array" -m "jax"
diff --git a/.github/workflows/gpu-ci.yml b/.github/workflows/gpu-ci.yml
@@ -25,8 +25,33 @@ jobs:
       - uses: actions/checkout@v3
         with:
           fetch-depth: 0
+      - name: Install dependencies
+        run: |
+          python -m pip install "torch${{ matrix.torch-version }}"
+          python -m pip install .[pytorch-dev]
       - name: Run tests
         run: |
-          python -m pip install -r requirements/base.txt
-          python -m pip install -r requirements/pytorch.txt
-          pytest tests/
+          pytest -m singlegpu tests/
+
+  #multi-gpu-ci:
+  #  runs-on: linux-amd64-gpu-p100-latest-2
+  #  container:
+  #    image: nvcr.io/nvidia/pytorch:23.09-py3
+  #    env:
+  #      NVIDIA_VISIBLE_DEVICES: ${{ env.NVIDIA_VISIBLE_DEVICES }}
+  #    options: --shm-size=1G
+  #    credentials:
+  #      username: $oauthtoken
+  #      password: ${{ secrets.NGC_TOKEN }}
+  #
+  #  steps:
+  #    - uses: actions/checkout@v3
+  #      with:
+  #        fetch-depth: 0
+  #    - name: Install dependencies
+  #      run: |
+  #        python -m pip install "torch${{ matrix.torch-version }}"
+  #        python -m pip install .[pytorch-dev]
+  #    - name: Run tests
+  #      run: |
+  #        pytest -m multigpu tests/
diff --git a/crossfit/backend/torch/hf/model.py b/crossfit/backend/torch/hf/model.py
@@ -1,16 +1,20 @@
 from functools import lru_cache
 import gc
+import logging
 import os
 from crossfit.dataset.home import CF_HOME
 import joblib
 
 import numpy as np
 import torch
+import torch.nn.functional as F
 from tqdm import tqdm
 from transformers import AutoConfig, AutoModel, AutoTokenizer
 from sklearn.linear_model import LinearRegression
 from crossfit.backend.torch.model import Model
 
+logger = logging.getLogger(__name__)
+
 
 class HFModel(Model):
     def __init__(self, path_or_name: str, max_mem_gb: int = 16, training=False):
@@ -67,7 +71,7 @@ def fit_memory_estimate_curve(self, model=None):
                 }
 
                 try:
-                    outputs = model(batch)
+                    outputs = model(**batch)
                     memory_used = torch.cuda.max_memory_allocated() / (1024**2)  # Convert to MB
                     X.append([batch_size, seq_len, seq_len**2])
                     y.append(memory_used)
@@ -100,9 +104,34 @@ def max_seq_length(self) -> int:
 
 class SentenceTransformerModel(HFModel):
     def load_model(self, device="cuda"):
-        from sentence_transformers import SentenceTransformer
-
-        return SentenceTransformer(self.path_or_name, device="cuda").to(device)
+        model = AutoModel.from_pretrained(self.path_or_name).to(device)
+        if device == "cuda":
+            try:
+                from optimum.bettertransformer import BetterTransformer
+
+                model = BetterTransformer.transform(model.to(torch.float16))
+            except ImportError:
+                logging.warning(
+                    "Loading embedding model without BetterTransformer. "
+                    "Install the 'optimum' to make embedding inference faster.  "
+                )
+        return model
+
+    def get_embedding(self, inputs, outputs):
+        embeddings = self.average_pool(
+            outputs["last_hidden_state"], inputs["attention_mask"]
+        )
+        embeddings = F.normalize(embeddings, p=2, dim=1)
+        return embeddings
+
+    @staticmethod
+    def average_pool(
+        last_hidden_states: torch.Tensor, attention_mask: torch.Tensor
+    ) -> torch.Tensor:
+        last_hidden = last_hidden_states.masked_fill(
+            ~attention_mask[..., None].bool(), 0.0
+        )
+        return last_hidden.sum(dim=1) / attention_mask.sum(dim=1)[..., None]
 
     def load_cfg(self):
         return AutoConfig.from_pretrained("sentence-transformers/" + self.path_or_name)
diff --git a/crossfit/backend/torch/op/embed.py b/crossfit/backend/torch/op/embed.py
@@ -17,13 +17,13 @@ def __init__(
         pre=None,
         cols=False,
         keep_cols=None,
-        default_batch_size=1024,
+        batch_size=1024,
         max_mem: str = "16GB",
         sorted_data_loader: bool = True,
     ):
         super().__init__(pre=pre, cols=cols, keep_cols=keep_cols)
         self.model = model
-        self.batch_size = default_batch_size
+        self.batch_size = batch_size
         self.max_mem = max_mem
         self.max_mem_gb = int(self.max_mem.split("GB")[0]) / 2.5
         self.sorted_data_loader = sorted_data_loader
@@ -50,8 +50,10 @@ def call(self, data, partition_info=None):
             )
 
         all_embeddings_ls = []
-        for output in loader.map(self.model.get_model(self)):
-            all_embeddings_ls.append(output["sentence_embedding"])
+        model = self.model.get_model(self)
+        for features, outputs in loader.map(lambda batch: (batch, model(**batch))):
+            sentence_embedding = self.model.get_embedding(features, outputs)
+            all_embeddings_ls.append(sentence_embedding)
 
         out = cudf.DataFrame(index=index)
         embedding = cp.asarray(torch.vstack(all_embeddings_ls))

diff --git a/crossfit/dataset/beir/raw.py b/crossfit/dataset/beir/raw.py
@@ -236,7 +236,12 @@ def sample_raw(name, out_dir=None, overwrite=False, sample_size=100, blocksize=2
     import cudf
     import dask_cudf
 
-    full_path = download_raw(name, overwrite=overwrite)
+    # if we are running tests with `pytest tests/`, use testdata.
+    testdata_dir = os.path.join(os.getcwd(), "tests", "testdata", "beir", name)
+    if os.path.exists(testdata_dir):
+        full_path = testdata_dir
+    else:
+        full_path = download_raw(name, overwrite=overwrite)
 
     out_dir = out_dir or CF_HOME
     sampled_dir = os.path.join(out_dir, "sampled")

diff --git a/tests/backend/dask_backend/test_aggregate.py b/tests/backend/dask_backend/test_aggregate.py
@@ -2,9 +2,8 @@
 
 from crossfit.calculate.aggregate import Aggregator, metric_key
 from crossfit.data.dataframe.dispatch import CrossFrame
-from crossfit.metric.continuous.range import Range
 from crossfit.metric import Mean
-
+from crossfit.metric.continuous.range import Range
 from tests.utils import is_leaf_node_instance_of, sample_df
 
 

diff --git a/tests/backend/pytorch_backend/test_torch_convert.py b/tests/backend/pytorch_backend/test_torch_convert.py
@@ -1,6 +1,5 @@
-import pytest
-
 import numpy as np
+import pytest
 import torch
 
 from crossfit.data import convert_array

diff --git a/tests/backend/test_sklearn.py b/tests/backend/test_sklearn.py
@@ -1,13 +1,11 @@
-import pytest
-
 import numpy as np
+import pytest
 from sklearn import metrics
-from sklearn.utils.multiclass import type_of_target
 from sklearn.utils._array_api import get_namespace
+from sklearn.utils.multiclass import type_of_target
 
 from crossfit.data import crossarray, np_backend_dispatch
 
-
 arr1 = [1, 2, 3]
 arr2 = [4, 5, 6]
 

diff --git a/tests/data/array/test_conversion.py b/tests/data/array/test_conversion.py
@@ -1,6 +1,5 @@
-import pytest
-
 import numpy as np
+import pytest
 
 from crossfit.data import convert_array
 

diff --git a/tests/data/array/test_decorator.py b/tests/data/array/test_decorator.py
@@ -1,7 +1,5 @@
-import pytest
-
 import numpy as np
-
+import pytest
 
 from crossfit.data import crossarray
 from crossfit.utils import test_utils

diff --git a/tests/data/dataframe/test_dispatch.py b/tests/data/dataframe/test_dispatch.py
@@ -1,14 +1,12 @@
-import pytest
-
 import numpy as np
+import pytest
 
 from crossfit.backend.pandas.dataframe import PandasDataFrame
 from crossfit.data.dataframe.core import ArrayBundle
 from crossfit.data.dataframe.dispatch import CrossFrame
 
 
 def test_pandas_frame():
-
     arr1 = np.arange(10)
     arr2 = np.ones(10)
     arr3 = np.array(["cat", "dog"] * 5)

diff --git a/tests/dataset/test_load.py b/tests/dataset/test_load.py
@@ -0,0 +1,30 @@
+import pytest
+
+beir = pytest.importorskip("beir")
+
+import os
+import random
+
+import crossfit as cf
+from crossfit.dataset.beir.raw import BEIR_DATASETS
+
+DATASETS = set(BEIR_DATASETS.keys())
+DATASETS.discard("cqadupstack")
+DATASETS.discard("germanquad")
+
+
+@pytest.mark.singlegpu
+@pytest.mark.parametrize("dataset", DATASETS)
+def test_load_beir(dataset):
+    data = cf.load_dataset(f"beir/{dataset}", overwrite=True, tiny_sample=True)
+
+    for split in ["train", "val", "test"]:
+        split_data = getattr(data, split)
+
+        if split_data is None:
+            continue
+
+        split = split_data.ddf().compute()
+
+        assert split["query-index"].nunique() == split["query-id"].nunique()
+        assert split["query-id"].nunique() <= 100
diff --git a/tests/metrics/ranking/__init__.py b/tests/metrics/ranking/__init__.py
diff --git a/tests/metrics/ranking/test_f1.py b/tests/metrics/ranking/test_f1.py
@@ -0,0 +1,51 @@
+import pytest
+
+pytest.importorskip("cupy")
+
+from crossfit.data.sparse.ranking import SparseBinaryLabels, SparseRankings
+from crossfit.metric.ranking import F1
+
+y1 = [0, 5]
+y2 = [8, 9]
+y3 = []
+y4 = [1, 2, 3, 4, 5, 6]
+y5 = [3]
+y6 = [0, 1]
+y7 = [[]]
+
+r1 = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
+r2 = [9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+r3 = []
+r4 = [1, 6, 8]
+
+yn1 = [0, 3, 4]
+yn2 = [2, 0, 5]
+
+rn1 = [0, 1, 2]
+rn2 = [2, 1, 0]
+
+
+class TestF1:
+    @pytest.mark.parametrize(
+        "k,y_gold,y_pred,expect",
+        [
+            (3, y2, r1, [0.0]),
+            (10, y2, r1, [1.0 / 3]),
+            (2, y2, r2, [1.0]),
+            (10, y2, r3, [0.0]),
+            (1, y4, r4, [0.285714286]),
+            (10, y3, r3, [float("nan")]),
+            (10, y3, r2, [float("nan")]),
+            (1, y1, [r1, r2], [2.0 / 3, 0.0]),
+            (1, [y1, y1], [r1, r2], [2.0 / 3, 0.0]),
+        ],
+    )
+    def test_score(self, k, y_gold, y_pred, expect):
+        y_gold = SparseBinaryLabels.from_positive_indices(y_gold)
+        if len(y_pred) == 0 or [] in y_pred:
+            with pytest.warns(UserWarning):
+                y_pred = SparseRankings.from_ranked_indices(y_pred)
+        else:
+            y_pred = SparseRankings.from_ranked_indices(y_pred)
+        pred = F1(k).score(y_gold, y_pred, nan_handling="propagate").tolist()
+        assert pred == pytest.approx(expect, nan_ok=True)
diff --git a/tests/metrics/ranking/test_hitrate.py b/tests/metrics/ranking/test_hitrate.py
@@ -0,0 +1,54 @@
+import pytest
+
+pytest.importorskip("cupy")
+
+from crossfit.data.sparse.ranking import SparseBinaryLabels, SparseRankings
+from crossfit.metric.ranking import HitRate
+
+y1 = [0, 5]
+y2 = [8, 9]
+y3 = []
+y4 = [1, 2, 3, 4, 5, 6]
+y5 = [3]
+y6 = [0, 1]
+y7 = [[]]
+
+r1 = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
+r2 = [9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+r3 = []
+r4 = [1, 6, 8]
+
+yn1 = [0, 3, 4]
+yn2 = [2, 0, 5]
+
+rn1 = [0, 1, 2]
+rn2 = [2, 1, 0]
+
+
+class TestHitrate:
+    @pytest.mark.parametrize(
+        "k,y_gold,y_pred,expect",
+        [
+            (3, y5, r1, [0.0]),
+            (3, y5, r2, [0.0]),
+            (4, y5, r1, [1.0]),
+            (4, y5, r2, [0.0]),
+            (4, y1, r1, [float("nan")]),
+            (4, y1, r2, [float("nan")]),
+            (10, y3, r3, [float("nan")]),
+            (10, y5, r3, [0.0]),
+            (10, y3, r2, [float("nan")]),
+            (5, y5, [r1, r2], [1.0, 0.0]),
+            (5, [y5, y1], [r1, r2], [1.0, float("nan")]),
+        ],
+    )
+    def test_score(self, k, y_gold, y_pred, expect):
+        y_gold = SparseBinaryLabels.from_positive_indices(y_gold)
+        if len(y_pred) == 0 or [] in y_pred:
+            with pytest.warns(UserWarning):
+                y_pred = SparseRankings.from_ranked_indices(y_pred)
+        else:
+            y_pred = SparseRankings.from_ranked_indices(y_pred)
+        pred = HitRate(k).score(y_gold, y_pred, nan_handling="propagate").tolist()
+
+        assert pred == pytest.approx(expect, nan_ok=True)