Skip to content

Commit

Permalink
Merge branch 'branch-25.02' into simplify-cmake
Browse files Browse the repository at this point in the history
  • Loading branch information
jameslamb authored Jan 16, 2025
2 parents 4b49ec7 + a9ab8b4 commit cf9e9f8
Show file tree
Hide file tree
Showing 13 changed files with 515 additions and 196 deletions.
22 changes: 10 additions & 12 deletions ci/test_notebooks.sh
Original file line number Diff line number Diff line change
@@ -1,17 +1,25 @@
#!/bin/bash
# Copyright (c) 2020-2024, NVIDIA CORPORATION.
# Copyright (c) 2020-2025, NVIDIA CORPORATION.

set -Eeuo pipefail

. /opt/conda/etc/profile.d/conda.sh

RAPIDS_VERSION="$(rapids-version)"

rapids-logger "Downloading artifacts from previous jobs"
CPP_CHANNEL=$(rapids-download-conda-from-s3 cpp)
PYTHON_CHANNEL=$(rapids-download-conda-from-s3 python)

rapids-logger "Generate notebook testing dependencies"
rapids-dependency-file-generator \
--output conda \
--file-key test_notebooks \
--matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION}" | tee env.yaml
--matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION}" \
--prepend-channel dglteam/label/th23_cu118 \
--prepend-channel "${CPP_CHANNEL}" \
--prepend-channel "${PYTHON_CHANNEL}" \
| tee env.yaml

rapids-mamba-retry env create --yes -f env.yaml -n test

Expand All @@ -22,16 +30,6 @@ set -u

rapids-print-env

rapids-logger "Downloading artifacts from previous jobs"
CPP_CHANNEL=$(rapids-download-conda-from-s3 cpp)
PYTHON_CHANNEL=$(rapids-download-conda-from-s3 python)

rapids-mamba-retry install \
--channel "${CPP_CHANNEL}" \
--channel "${PYTHON_CHANNEL}" \
--channel dglteam/label/th23_cu118 \
"cugraph-dgl=${RAPIDS_VERSION}"

NBTEST="$(realpath "$(dirname "$0")/utils/nbtest.sh")"
NOTEBOOK_LIST="$(realpath "$(dirname "$0")/notebook_list.py")"
EXITCODE=0
Expand Down
7 changes: 7 additions & 0 deletions dependencies.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ files:
includes:
- cuda_version
- depends_on_pytorch
- depends_on_cugraph_dgl
- py_version
- test_notebook
test_python:
Expand Down Expand Up @@ -540,6 +541,12 @@ dependencies:
- cugraph-cu11==25.2.*,>=0.0.0a0
- {matrix: null, packages: [*cugraph_unsuffixed]}

depends_on_cugraph_dgl:
common:
- output_types: conda
packages:
- cugraph-dgl==25.2.*,>=0.0.0a0

depends_on_cudf:
common:
- output_types: conda
Expand Down
21 changes: 4 additions & 17 deletions python/cugraph-dgl/cugraph_dgl/dataloading/neighbor_sampler.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2022-2024, NVIDIA CORPORATION.
# Copyright (c) 2022-2025, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
Expand All @@ -14,11 +14,10 @@
from __future__ import annotations

import warnings
import tempfile

from typing import Sequence, Optional, Union, List, Tuple, Iterator

from cugraph.gnn import UniformNeighborSampler, BiasedNeighborSampler, DistSampleWriter
from cugraph.gnn import UniformNeighborSampler, BiasedNeighborSampler
from cugraph.utilities.utils import import_optional

import cugraph_dgl
Expand Down Expand Up @@ -124,7 +123,7 @@ def __init__(
Can be either "dgl.Block" (default), or "cugraph_dgl.nn.SparseGraph".
**kwargs
Keyword arguments for the underlying cuGraph distributed sampler
and writer (directory, batches_per_partition, format,
and writer (batches_per_partition, format,
local_seeds_per_call).
"""

Expand Down Expand Up @@ -165,18 +164,6 @@ def sample(
) -> Iterator[DGLSamplerOutput]:
kwargs = dict(**self.__kwargs)

directory = kwargs.pop("directory", None)
if directory is None:
warnings.warn("Setting a directory to store samples is recommended.")
self._tempdir = tempfile.TemporaryDirectory()
directory = self._tempdir.name

writer = DistSampleWriter(
directory=directory,
batches_per_partition=kwargs.pop("batches_per_partition", 256),
format=kwargs.pop("format", "parquet"),
)

sampling_clx = (
UniformNeighborSampler
if self.__prob_attr is None
Expand All @@ -185,7 +172,7 @@ def sample(

ds = sampling_clx(
g._graph(self.edge_dir, prob_attr=self.__prob_attr),
writer,
writer=None,
compression="CSR",
fanout=self._reversed_fanout_vals,
prior_sources_behavior="carryover",
Expand Down
56 changes: 55 additions & 1 deletion python/cugraph-pyg/cugraph_pyg/data/graph_store.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2024, NVIDIA CORPORATION.
# Copyright (c) 2024-2025, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
Expand Down Expand Up @@ -70,6 +70,7 @@ def __clear_graph(self):
self.__graph = None
self.__vertex_offsets = None
self.__weight_attr = None
self.__numeric_edge_types = None

def _put_edge_index(
self,
Expand Down Expand Up @@ -240,6 +241,27 @@ def _vertex_offsets(self) -> Dict[str, int]:

return dict(self.__vertex_offsets)

@property
def _vertex_offset_array(self) -> "torch.Tensor":
off = torch.tensor(
[self._vertex_offsets[k] for k in sorted(self._vertex_offsets.keys())],
dtype=torch.int64,
device="cuda",
)

return torch.concat(
[
off,
torch.tensor(
list(self._num_vertices().values()),
device="cuda",
dtype=torch.int64,
)
.sum()
.reshape((1,)),
]
)

@property
def is_homogeneous(self) -> bool:
return len(self._vertex_offsets) == 1
Expand Down Expand Up @@ -270,6 +292,38 @@ def __get_weight_tensor(

return torch.concat(weights)

@property
def _numeric_edge_types(self) -> Tuple[List, "torch.Tensor", "torch.Tensor"]:
"""
Returns the canonical edge types in order (the 0th canonical type corresponds
to numeric edge type 0, etc.), along with the numeric source and destination
vertex types for each edge type.
"""

if self.__numeric_edge_types is None:
sorted_keys = sorted(
list(self.__edge_indices.keys(leaves_only=True, include_nested=True))
)

vtype_table = {
k: i for i, k in enumerate(sorted(self._vertex_offsets.keys()))
}

srcs = []
dsts = []

for can_etype in sorted_keys:
srcs.append(vtype_table[can_etype[0]])
dsts.append(vtype_table[can_etype[2]])

self.__numeric_edge_types = (
sorted_keys,
torch.tensor(srcs, device="cuda", dtype=torch.int32),
torch.tensor(dsts, device="cuda", dtype=torch.int32),
)

return self.__numeric_edge_types

def __get_edgelist(self):
"""
Returns
Expand Down
51 changes: 17 additions & 34 deletions python/cugraph-pyg/cugraph_pyg/examples/gcn_dist_mnmg.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2024, NVIDIA CORPORATION.
# Copyright (c) 2024-2025, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
Expand All @@ -17,7 +17,6 @@
import argparse
import os
import warnings
import tempfile
import time
import json

Expand Down Expand Up @@ -179,7 +178,6 @@ def run_train(
fan_out,
num_classes,
wall_clock_start,
tempdir=None,
num_layers=3,
in_memory=False,
seeds_per_call=-1,
Expand All @@ -194,41 +192,29 @@ def run_train(
from cugraph_pyg.loader import NeighborLoader

ix_train = split_idx["train"].cuda()
train_path = None if in_memory else os.path.join(tempdir, f"train_{global_rank}")
if train_path:
os.mkdir(train_path)
train_loader = NeighborLoader(
data,
input_nodes=ix_train,
directory=train_path,
shuffle=True,
drop_last=True,
local_seeds_per_call=seeds_per_call if seeds_per_call > 0 else None,
**kwargs,
)

ix_test = split_idx["test"].cuda()
test_path = None if in_memory else os.path.join(tempdir, f"test_{global_rank}")
if test_path:
os.mkdir(test_path)
test_loader = NeighborLoader(
data,
input_nodes=ix_test,
directory=test_path,
shuffle=True,
drop_last=True,
local_seeds_per_call=80000,
**kwargs,
)

ix_valid = split_idx["valid"].cuda()
valid_path = None if in_memory else os.path.join(tempdir, f"valid_{global_rank}")
if valid_path:
os.mkdir(valid_path)
valid_loader = NeighborLoader(
data,
input_nodes=ix_valid,
directory=valid_path,
shuffle=True,
drop_last=True,
local_seeds_per_call=seeds_per_call if seeds_per_call > 0 else None,
Expand Down Expand Up @@ -347,7 +333,6 @@ def parse_args():
parser.add_argument("--epochs", type=int, default=4)
parser.add_argument("--batch_size", type=int, default=1024)
parser.add_argument("--fan_out", type=int, default=30)
parser.add_argument("--tempdir_root", type=str, default=None)
parser.add_argument("--dataset_root", type=str, default="datasets")
parser.add_argument("--dataset", type=str, default="ogbn-products")
parser.add_argument("--skip_partition", action="store_true")
Expand Down Expand Up @@ -427,23 +412,21 @@ def parse_args():
).to(device)
model = DistributedDataParallel(model, device_ids=[local_rank])

with tempfile.TemporaryDirectory(dir=args.tempdir_root) as tempdir:
run_train(
global_rank,
data,
split_idx,
world_size,
device,
model,
args.epochs,
args.batch_size,
args.fan_out,
meta["num_classes"],
wall_clock_start,
tempdir,
args.num_layers,
args.in_memory,
args.seeds_per_call,
)
run_train(
global_rank,
data,
split_idx,
world_size,
device,
model,
args.epochs,
args.batch_size,
args.fan_out,
meta["num_classes"],
wall_clock_start,
args.num_layers,
args.in_memory,
args.seeds_per_call,
)
else:
warnings.warn("This script should be run with 'torchrun`. Exiting.")
Loading

0 comments on commit cf9e9f8

Please sign in to comment.