Merge branch 'branch-25.02' into simplify-cmake

rapidsai · Jan 16, 2025 · cf9e9f8 · cf9e9f8
2 parents 4b49ec7 + a9ab8b4
commit cf9e9f8
Show file tree

Hide file tree

Showing 13 changed files with 515 additions and 196 deletions.
diff --git a/ci/test_notebooks.sh b/ci/test_notebooks.sh
@@ -1,17 +1,25 @@
 #!/bin/bash
-# Copyright (c) 2020-2024, NVIDIA CORPORATION.
+# Copyright (c) 2020-2025, NVIDIA CORPORATION.
 
 set -Eeuo pipefail
 
 . /opt/conda/etc/profile.d/conda.sh
 
 RAPIDS_VERSION="$(rapids-version)"
 
+rapids-logger "Downloading artifacts from previous jobs"
+CPP_CHANNEL=$(rapids-download-conda-from-s3 cpp)
+PYTHON_CHANNEL=$(rapids-download-conda-from-s3 python)
+
 rapids-logger "Generate notebook testing dependencies"
 rapids-dependency-file-generator \
   --output conda \
   --file-key test_notebooks \
-  --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION}" | tee env.yaml
+  --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION}"  \
+  --prepend-channel dglteam/label/th23_cu118 \
+  --prepend-channel "${CPP_CHANNEL}" \
+  --prepend-channel "${PYTHON_CHANNEL}" \
+| tee env.yaml
 
 rapids-mamba-retry env create --yes -f env.yaml -n test
 
@@ -22,16 +30,6 @@ set -u
 
 rapids-print-env
 
-rapids-logger "Downloading artifacts from previous jobs"
-CPP_CHANNEL=$(rapids-download-conda-from-s3 cpp)
-PYTHON_CHANNEL=$(rapids-download-conda-from-s3 python)
-
-rapids-mamba-retry install \
-  --channel "${CPP_CHANNEL}" \
-  --channel "${PYTHON_CHANNEL}" \
-  --channel dglteam/label/th23_cu118 \
-  "cugraph-dgl=${RAPIDS_VERSION}"
-
 NBTEST="$(realpath "$(dirname "$0")/utils/nbtest.sh")"
 NOTEBOOK_LIST="$(realpath "$(dirname "$0")/notebook_list.py")"
 EXITCODE=0

diff --git a/dependencies.yaml b/dependencies.yaml
@@ -54,6 +54,7 @@ files:
     includes:
       - cuda_version
       - depends_on_pytorch
+      - depends_on_cugraph_dgl
       - py_version
       - test_notebook
   test_python:
@@ -540,6 +541,12 @@ dependencies:
               - cugraph-cu11==25.2.*,>=0.0.0a0
           - {matrix: null, packages: [*cugraph_unsuffixed]}
 
+  depends_on_cugraph_dgl:
+      common:
+        - output_types: conda
+          packages:
+            - cugraph-dgl==25.2.*,>=0.0.0a0
+
   depends_on_cudf:
     common:
       - output_types: conda

diff --git a/python/cugraph-dgl/cugraph_dgl/dataloading/neighbor_sampler.py b/python/cugraph-dgl/cugraph_dgl/dataloading/neighbor_sampler.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2022-2024, NVIDIA CORPORATION.
+# Copyright (c) 2022-2025, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -14,11 +14,10 @@
 from __future__ import annotations
 
 import warnings
-import tempfile
 
 from typing import Sequence, Optional, Union, List, Tuple, Iterator
 
-from cugraph.gnn import UniformNeighborSampler, BiasedNeighborSampler, DistSampleWriter
+from cugraph.gnn import UniformNeighborSampler, BiasedNeighborSampler
 from cugraph.utilities.utils import import_optional
 
 import cugraph_dgl
@@ -124,7 +123,7 @@ def __init__(
             Can be either "dgl.Block" (default), or "cugraph_dgl.nn.SparseGraph".
         **kwargs
             Keyword arguments for the underlying cuGraph distributed sampler
-            and writer (directory, batches_per_partition, format,
+            and writer (batches_per_partition, format,
             local_seeds_per_call).
         """
 
@@ -165,18 +164,6 @@ def sample(
     ) -> Iterator[DGLSamplerOutput]:
         kwargs = dict(**self.__kwargs)
 
-        directory = kwargs.pop("directory", None)
-        if directory is None:
-            warnings.warn("Setting a directory to store samples is recommended.")
-            self._tempdir = tempfile.TemporaryDirectory()
-            directory = self._tempdir.name
-
-        writer = DistSampleWriter(
-            directory=directory,
-            batches_per_partition=kwargs.pop("batches_per_partition", 256),
-            format=kwargs.pop("format", "parquet"),
-        )
-
         sampling_clx = (
             UniformNeighborSampler
             if self.__prob_attr is None
@@ -185,7 +172,7 @@ def sample(
 
         ds = sampling_clx(
             g._graph(self.edge_dir, prob_attr=self.__prob_attr),
-            writer,
+            writer=None,
             compression="CSR",
             fanout=self._reversed_fanout_vals,
             prior_sources_behavior="carryover",

diff --git a/python/cugraph-pyg/cugraph_pyg/data/graph_store.py b/python/cugraph-pyg/cugraph_pyg/data/graph_store.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -70,6 +70,7 @@ def __clear_graph(self):
         self.__graph = None
         self.__vertex_offsets = None
         self.__weight_attr = None
+        self.__numeric_edge_types = None
 
     def _put_edge_index(
         self,
@@ -240,6 +241,27 @@ def _vertex_offsets(self) -> Dict[str, int]:
 
         return dict(self.__vertex_offsets)
 
+    @property
+    def _vertex_offset_array(self) -> "torch.Tensor":
+        off = torch.tensor(
+            [self._vertex_offsets[k] for k in sorted(self._vertex_offsets.keys())],
+            dtype=torch.int64,
+            device="cuda",
+        )
+
+        return torch.concat(
+            [
+                off,
+                torch.tensor(
+                    list(self._num_vertices().values()),
+                    device="cuda",
+                    dtype=torch.int64,
+                )
+                .sum()
+                .reshape((1,)),
+            ]
+        )
+
     @property
     def is_homogeneous(self) -> bool:
         return len(self._vertex_offsets) == 1
@@ -270,6 +292,38 @@ def __get_weight_tensor(
 
         return torch.concat(weights)
 
+    @property
+    def _numeric_edge_types(self) -> Tuple[List, "torch.Tensor", "torch.Tensor"]:
+        """
+        Returns the canonical edge types in order (the 0th canonical type corresponds
+        to numeric edge type 0, etc.), along with the numeric source and destination
+        vertex types for each edge type.
+        """
+
+        if self.__numeric_edge_types is None:
+            sorted_keys = sorted(
+                list(self.__edge_indices.keys(leaves_only=True, include_nested=True))
+            )
+
+            vtype_table = {
+                k: i for i, k in enumerate(sorted(self._vertex_offsets.keys()))
+            }
+
+            srcs = []
+            dsts = []
+
+            for can_etype in sorted_keys:
+                srcs.append(vtype_table[can_etype[0]])
+                dsts.append(vtype_table[can_etype[2]])
+
+            self.__numeric_edge_types = (
+                sorted_keys,
+                torch.tensor(srcs, device="cuda", dtype=torch.int32),
+                torch.tensor(dsts, device="cuda", dtype=torch.int32),
+            )
+
+        return self.__numeric_edge_types
+
     def __get_edgelist(self):
         """
         Returns

diff --git a/python/cugraph-pyg/cugraph_pyg/examples/gcn_dist_mnmg.py b/python/cugraph-pyg/cugraph_pyg/examples/gcn_dist_mnmg.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -17,7 +17,6 @@
 import argparse
 import os
 import warnings
-import tempfile
 import time
 import json
 
@@ -179,7 +178,6 @@ def run_train(
     fan_out,
     num_classes,
     wall_clock_start,
-    tempdir=None,
     num_layers=3,
     in_memory=False,
     seeds_per_call=-1,
@@ -194,41 +192,29 @@ def run_train(
     from cugraph_pyg.loader import NeighborLoader
 
     ix_train = split_idx["train"].cuda()
-    train_path = None if in_memory else os.path.join(tempdir, f"train_{global_rank}")
-    if train_path:
-        os.mkdir(train_path)
     train_loader = NeighborLoader(
         data,
         input_nodes=ix_train,
-        directory=train_path,
         shuffle=True,
         drop_last=True,
         local_seeds_per_call=seeds_per_call if seeds_per_call > 0 else None,
         **kwargs,
     )
 
     ix_test = split_idx["test"].cuda()
-    test_path = None if in_memory else os.path.join(tempdir, f"test_{global_rank}")
-    if test_path:
-        os.mkdir(test_path)
     test_loader = NeighborLoader(
         data,
         input_nodes=ix_test,
-        directory=test_path,
         shuffle=True,
         drop_last=True,
         local_seeds_per_call=80000,
         **kwargs,
     )
 
     ix_valid = split_idx["valid"].cuda()
-    valid_path = None if in_memory else os.path.join(tempdir, f"valid_{global_rank}")
-    if valid_path:
-        os.mkdir(valid_path)
     valid_loader = NeighborLoader(
         data,
         input_nodes=ix_valid,
-        directory=valid_path,
         shuffle=True,
         drop_last=True,
         local_seeds_per_call=seeds_per_call if seeds_per_call > 0 else None,
@@ -347,7 +333,6 @@ def parse_args():
     parser.add_argument("--epochs", type=int, default=4)
     parser.add_argument("--batch_size", type=int, default=1024)
     parser.add_argument("--fan_out", type=int, default=30)
-    parser.add_argument("--tempdir_root", type=str, default=None)
     parser.add_argument("--dataset_root", type=str, default="datasets")
     parser.add_argument("--dataset", type=str, default="ogbn-products")
     parser.add_argument("--skip_partition", action="store_true")
@@ -427,23 +412,21 @@ def parse_args():
         ).to(device)
         model = DistributedDataParallel(model, device_ids=[local_rank])
 
-        with tempfile.TemporaryDirectory(dir=args.tempdir_root) as tempdir:
-            run_train(
-                global_rank,
-                data,
-                split_idx,
-                world_size,
-                device,
-                model,
-                args.epochs,
-                args.batch_size,
-                args.fan_out,
-                meta["num_classes"],
-                wall_clock_start,
-                tempdir,
-                args.num_layers,
-                args.in_memory,
-                args.seeds_per_call,
-            )
+        run_train(
+            global_rank,
+            data,
+            split_idx,
+            world_size,
+            device,
+            model,
+            args.epochs,
+            args.batch_size,
+            args.fan_out,
+            meta["num_classes"],
+            wall_clock_start,
+            args.num_layers,
+            args.in_memory,
+            args.seeds_per_call,
+        )
     else:
         warnings.warn("This script should be run with 'torchrun`.  Exiting.")