From 87455cfedcc6721f24c783ba555af14a9a180624 Mon Sep 17 00:00:00 2001 From: Alex Barghi <105237337+alexbarghi-nv@users.noreply.github.com> Date: Fri, 10 Jan 2025 17:06:29 -0500 Subject: [PATCH] Remove Build Directory (#107) Removes the build directory from `cugraph-pyg` which should not have been committed. Authors: - Alex Barghi (https://github.com/alexbarghi-nv) Approvers: - James Lamb (https://github.com/jameslamb) - Tingyu Wang (https://github.com/tingyu66) URL: https://github.com/rapidsai/cugraph-gnn/pull/107 --- .../cugraph-pyg/build/lib/cugraph_pyg/VERSION | 1 - .../build/lib/cugraph_pyg/__init__.py | 14 - .../build/lib/cugraph_pyg/_version.py | 26 - .../build/lib/cugraph_pyg/data/__init__.py | 18 - .../lib/cugraph_pyg/data/cugraph_store.py | 1215 ----------------- .../lib/cugraph_pyg/examples/graph_sage_mg.py | 432 ------ .../lib/cugraph_pyg/examples/graph_sage_sg.py | 215 --- .../build/lib/cugraph_pyg/loader/__init__.py | 24 - .../cugraph_pyg/loader/cugraph_node_loader.py | 534 -------- .../build/lib/cugraph_pyg/loader/filter.py | 57 - .../build/lib/cugraph_pyg/nn/__init__.py | 14 - .../build/lib/cugraph_pyg/nn/conv/__init__.py | 26 - .../build/lib/cugraph_pyg/nn/conv/base.py | 176 --- .../build/lib/cugraph_pyg/nn/conv/gat_conv.py | 234 ---- .../lib/cugraph_pyg/nn/conv/gatv2_conv.py | 231 ---- .../lib/cugraph_pyg/nn/conv/rgcn_conv.py | 141 -- .../lib/cugraph_pyg/nn/conv/sage_conv.py | 149 -- .../cugraph_pyg/nn/conv/transformer_conv.py | 216 --- .../build/lib/cugraph_pyg/sampler/__init__.py | 12 - .../cugraph_pyg/sampler/cugraph_sampler.py | 438 ------ .../build/lib/cugraph_pyg/tests/conftest.py | 286 ---- .../tests/mg/test_mg_cugraph_loader.py | 76 -- .../tests/mg/test_mg_cugraph_sampler.py | 238 ---- .../tests/mg/test_mg_cugraph_store.py | 388 ------ .../lib/cugraph_pyg/tests/nn/test_gat_conv.py | 118 -- .../cugraph_pyg/tests/nn/test_gatv2_conv.py | 87 -- .../cugraph_pyg/tests/nn/test_rgcn_conv.py | 72 - .../cugraph_pyg/tests/nn/test_sage_conv.py | 90 -- .../tests/nn/test_transformer_conv.py | 83 -- .../cugraph_pyg/tests/test_cugraph_loader.py | 492 ------- .../cugraph_pyg/tests/test_cugraph_sampler.py | 199 --- .../cugraph_pyg/tests/test_cugraph_store.py | 397 ------ 32 files changed, 6699 deletions(-) delete mode 100644 python/cugraph-pyg/build/lib/cugraph_pyg/VERSION delete mode 100644 python/cugraph-pyg/build/lib/cugraph_pyg/__init__.py delete mode 100644 python/cugraph-pyg/build/lib/cugraph_pyg/_version.py delete mode 100644 python/cugraph-pyg/build/lib/cugraph_pyg/data/__init__.py delete mode 100644 python/cugraph-pyg/build/lib/cugraph_pyg/data/cugraph_store.py delete mode 100644 python/cugraph-pyg/build/lib/cugraph_pyg/examples/graph_sage_mg.py delete mode 100644 python/cugraph-pyg/build/lib/cugraph_pyg/examples/graph_sage_sg.py delete mode 100644 python/cugraph-pyg/build/lib/cugraph_pyg/loader/__init__.py delete mode 100644 python/cugraph-pyg/build/lib/cugraph_pyg/loader/cugraph_node_loader.py delete mode 100644 python/cugraph-pyg/build/lib/cugraph_pyg/loader/filter.py delete mode 100644 python/cugraph-pyg/build/lib/cugraph_pyg/nn/__init__.py delete mode 100644 python/cugraph-pyg/build/lib/cugraph_pyg/nn/conv/__init__.py delete mode 100644 python/cugraph-pyg/build/lib/cugraph_pyg/nn/conv/base.py delete mode 100644 python/cugraph-pyg/build/lib/cugraph_pyg/nn/conv/gat_conv.py delete mode 100644 python/cugraph-pyg/build/lib/cugraph_pyg/nn/conv/gatv2_conv.py delete mode 100644 python/cugraph-pyg/build/lib/cugraph_pyg/nn/conv/rgcn_conv.py delete mode 100644 python/cugraph-pyg/build/lib/cugraph_pyg/nn/conv/sage_conv.py delete mode 100644 python/cugraph-pyg/build/lib/cugraph_pyg/nn/conv/transformer_conv.py delete mode 100644 python/cugraph-pyg/build/lib/cugraph_pyg/sampler/__init__.py delete mode 100644 python/cugraph-pyg/build/lib/cugraph_pyg/sampler/cugraph_sampler.py delete mode 100644 python/cugraph-pyg/build/lib/cugraph_pyg/tests/conftest.py delete mode 100644 python/cugraph-pyg/build/lib/cugraph_pyg/tests/mg/test_mg_cugraph_loader.py delete mode 100644 python/cugraph-pyg/build/lib/cugraph_pyg/tests/mg/test_mg_cugraph_sampler.py delete mode 100644 python/cugraph-pyg/build/lib/cugraph_pyg/tests/mg/test_mg_cugraph_store.py delete mode 100644 python/cugraph-pyg/build/lib/cugraph_pyg/tests/nn/test_gat_conv.py delete mode 100644 python/cugraph-pyg/build/lib/cugraph_pyg/tests/nn/test_gatv2_conv.py delete mode 100644 python/cugraph-pyg/build/lib/cugraph_pyg/tests/nn/test_rgcn_conv.py delete mode 100644 python/cugraph-pyg/build/lib/cugraph_pyg/tests/nn/test_sage_conv.py delete mode 100644 python/cugraph-pyg/build/lib/cugraph_pyg/tests/nn/test_transformer_conv.py delete mode 100644 python/cugraph-pyg/build/lib/cugraph_pyg/tests/test_cugraph_loader.py delete mode 100644 python/cugraph-pyg/build/lib/cugraph_pyg/tests/test_cugraph_sampler.py delete mode 100644 python/cugraph-pyg/build/lib/cugraph_pyg/tests/test_cugraph_store.py diff --git a/python/cugraph-pyg/build/lib/cugraph_pyg/VERSION b/python/cugraph-pyg/build/lib/cugraph_pyg/VERSION deleted file mode 100644 index a193fff4..00000000 --- a/python/cugraph-pyg/build/lib/cugraph_pyg/VERSION +++ /dev/null @@ -1 +0,0 @@ -23.12.00 diff --git a/python/cugraph-pyg/build/lib/cugraph_pyg/__init__.py b/python/cugraph-pyg/build/lib/cugraph_pyg/__init__.py deleted file mode 100644 index 31b45208..00000000 --- a/python/cugraph-pyg/build/lib/cugraph_pyg/__init__.py +++ /dev/null @@ -1,14 +0,0 @@ -# Copyright (c) 2019-2024, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from cugraph_pyg._version import __git_commit__, __version__ diff --git a/python/cugraph-pyg/build/lib/cugraph_pyg/_version.py b/python/cugraph-pyg/build/lib/cugraph_pyg/_version.py deleted file mode 100644 index f5afdb31..00000000 --- a/python/cugraph-pyg/build/lib/cugraph_pyg/_version.py +++ /dev/null @@ -1,26 +0,0 @@ -# Copyright (c) 2023-2024, NVIDIA CORPORATION. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import importlib.resources - -# Read VERSION file from the module that is symlinked to VERSION file -# in the root of the repo at build time or copied to the moudle at -# installation. VERSION is a separate file that allows CI build-time scripts -# to update version info (including commit hashes) without modifying -# source files. -__version__ = ( - importlib.resources.files("cugraph_pyg").joinpath("VERSION").read_text().strip() -) -__git_commit__ = "" diff --git a/python/cugraph-pyg/build/lib/cugraph_pyg/data/__init__.py b/python/cugraph-pyg/build/lib/cugraph_pyg/data/__init__.py deleted file mode 100644 index 3d830aa2..00000000 --- a/python/cugraph-pyg/build/lib/cugraph_pyg/data/__init__.py +++ /dev/null @@ -1,18 +0,0 @@ -# Copyright (c) 2022-2024, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from cugraph.utilities.api_tools import experimental_warning_wrapper - -from cugraph_pyg.data.cugraph_store import EXPERIMENTAL__CuGraphStore - -CuGraphStore = experimental_warning_wrapper(EXPERIMENTAL__CuGraphStore) diff --git a/python/cugraph-pyg/build/lib/cugraph_pyg/data/cugraph_store.py b/python/cugraph-pyg/build/lib/cugraph_pyg/data/cugraph_store.py deleted file mode 100644 index 5b06eb37..00000000 --- a/python/cugraph-pyg/build/lib/cugraph_pyg/data/cugraph_store.py +++ /dev/null @@ -1,1215 +0,0 @@ -# Copyright (c) 2019-2024, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from typing import Optional, Tuple, Any, Union, List, Dict - -from enum import Enum, auto - -from dataclasses import dataclass -from collections import defaultdict -from itertools import chain -from functools import cached_property - -import numpy as np -import cupy -import pandas -import cudf -import cugraph -import warnings - -from cugraph.utilities.utils import import_optional, MissingModule - -dd = import_optional("dask.dataframe") -distributed = import_optional("dask.distributed") -dask_cudf = import_optional("dask_cudf") - -torch = import_optional("torch") -torch_geometric = import_optional("torch_geometric") - -Tensor = None if isinstance(torch, MissingModule) else torch.Tensor -NdArray = None if isinstance(cupy, MissingModule) else cupy.ndarray -DaskCudfSeries = None if isinstance(dask_cudf, MissingModule) else dask_cudf.Series - -TensorType = Union[Tensor, NdArray, cudf.Series, DaskCudfSeries] -NodeType = ( - None - if isinstance(torch_geometric, MissingModule) - else torch_geometric.typing.NodeType -) -EdgeType = ( - None - if isinstance(torch_geometric, MissingModule) - else torch_geometric.typing.EdgeType -) - - -class EdgeLayout(Enum): - COO = "coo" - CSC = "csc" - CSR = "csr" - - -@dataclass -class CuGraphEdgeAttr: - """ - Defines the attributes of an :obj:`GraphStore` edge. - """ - - # The type of the edge - edge_type: Optional[Any] - - # The layout of the edge representation - layout: EdgeLayout - - # Whether the edge index is sorted, by destination node. Useful for - # avoiding sorting costs when performing neighbor sampling, and only - # meaningful for COO (CSC and CSR are sorted by definition) - is_sorted: bool = False - - # The number of nodes in this edge type. If set to None, will attempt to - # infer with the simple heuristic int(self.edge_index.max()) + 1 - size: Optional[Tuple[int, int]] = None - - # NOTE we define __post_init__ to force-cast layout - def __post_init__(self): - self.layout = EdgeLayout(self.layout) - - @classmethod - def cast(cls, *args, **kwargs): - """ - Cast to a CuGraphTensorAttr from a tuple, list, or dict. - - Returns - ------- - CuGraphTensorAttr - contains the data of the tuple, list, or dict passed in - """ - if len(args) == 1 and len(kwargs) == 0: - elem = args[0] - if elem is None: - return None - if isinstance(elem, CuGraphEdgeAttr): - return elem - if isinstance(elem, (tuple, list)): - return cls(*elem) - if isinstance(elem, dict): - return cls(**elem) - return cls(*args, **kwargs) - - -class _field_status(Enum): - UNSET = auto() - - -@dataclass -class CuGraphTensorAttr: - """ - Defines the attributes of a class:`FeatureStore` tensor; in particular, - all the parameters necessary to uniquely identify a tensor from the feature - store. - - Note that the order of the attributes is important; this is the order in - which attributes must be provided for indexing calls. Feature store - implementor classes can define a different ordering by overriding - :meth:`TensorAttr.__init__`. - """ - - # The group name that the tensor corresponds to. Defaults to UNSET. - group_name: Optional[str] = _field_status.UNSET - - # The name of the tensor within its group. Defaults to UNSET. - attr_name: Optional[str] = _field_status.UNSET - - # The node indices the rows of the tensor correspond to. Defaults to UNSET. - index: Optional[Any] = _field_status.UNSET - - # The properties in the FeatureStore the rows of the tensor correspond to. - # Defaults to UNSET. - properties: Optional[Any] = _field_status.UNSET - - # The datatype of the tensor. Defaults to UNSET. - dtype: Optional[Any] = _field_status.UNSET - - # Convenience methods - - def is_set(self, key): - """ - Whether an attribute is set in :obj:`TensorAttr`. - """ - if key not in self.__dataclass_fields__: - raise KeyError(key) - attr = getattr(self, key) - return type(attr) is not _field_status or attr != _field_status.UNSET - - def is_fully_specified(self): - """ - Whether the :obj:`TensorAttr` has no unset fields. - """ - return all([self.is_set(key) for key in self.__dataclass_fields__]) - - def fully_specify(self): - """ - Sets all :obj:`UNSET` fields to :obj:`None`. - """ - for key in self.__dataclass_fields__: - if not self.is_set(key): - setattr(self, key, None) - return self - - def update(self, attr): - """ - Updates an :class:`TensorAttr` with set attributes from another - :class:`TensorAttr`. - """ - for key in self.__dataclass_fields__: - if attr.is_set(key): - setattr(self, key, getattr(attr, key)) - - @classmethod - def cast(cls, *args, **kwargs): - """ - Casts to a CuGraphTensorAttr from a tuple, list, or dict - - Returns - ------- - CuGraphTensorAttr - contains the data of the tuple, list, or dict passed in - """ - if len(args) == 1 and len(kwargs) == 0: - elem = args[0] - if elem is None: - return None - if isinstance(elem, CuGraphTensorAttr): - return elem - if isinstance(elem, (tuple, list)): - return cls(*elem) - if isinstance(elem, dict): - return cls(**elem) - return cls(*args, **kwargs) - - -class EXPERIMENTAL__CuGraphStore: - """ - Duck-typed version of PyG's GraphStore and FeatureStore. - """ - - # TODO allow (and possibly require) separate stores for node, edge attrs - # For now edge attrs are entirely unsupported. - # TODO add an "expensive check" argument that ensures the graph store - # and feature store are valid and compatible with PyG. - def __init__( - self, - F: cugraph.gnn.FeatureStore, - G: Union[ - Dict[Tuple[str, str, str], Tuple[TensorType]], - Dict[Tuple[str, str, str], int], - ], - num_nodes_dict: Dict[str, int], - *, - multi_gpu: bool = False, - order: str = "CSR", - ): - """ - Constructs a new CuGraphStore from the provided - arguments. - - Parameters - ---------- - F: cugraph.gnn.FeatureStore (Required) - The feature store containing this graph's features. - Typed lexicographic-ordered numbering convention - should match that of the graph. - - G: dict[str, tuple[TensorType]] or dict[str, int] (Required) - Dictionary of edge indices. - Option 1 (graph in memory): - - Pass the edge indices: i.e. - { - ('author', 'writes', 'paper'): [[0,1,2],[2,0,1]], - ('author', 'affiliated', 'institution'): [[0,1],[0,1]] - } - - - Option 2 (graph not in memory): - - Pass the number of edges: i.e. - { - ('author', 'writes', 'paper'): 2, - ('author', 'affiliated', 'institution'): 2 - } - If the graph is not in memory, manipulating the edge indices - or calling sampling is not possible. This is for cases where - sampling has already been done and samples were written to disk. - - Note: the internal cugraph representation will use - offsetted vertex and edge ids. - - num_nodes_dict: dict (Required) - A dictionary mapping each node type to the count of nodes - of that type in the graph. - - multi_gpu: bool (Optional, default = False) - Whether the store should be backed by a multi-GPU graph. - Requires dask to have been set up. - - order: str (Optional ["CSR", "CSC"], default = CSR) - The order to use for sampling. CSR corresponds to the - standard OGB dataset order that is usually used in PyG. - CSC order constructs the same graph as CSR, but with - edges in the opposite direction. - """ - - if None in G: - raise ValueError("Unspecified edge types not allowed in PyG") - - if order != "CSR" and order != "CSC": - raise ValueError("invalid valid for order") - - self.__vertex_dtype = torch.int64 - - self._tensor_attr_cls = CuGraphTensorAttr - self._tensor_attr_dict = defaultdict(list) - - construct_graph = True - if isinstance(next(iter(G.values())), int): - # User has passed in the number of edges - # (not the actual edge index), so the number of edges - # does not need to be counted. - num_edges_dict = dict(G) # make sure the cugraph store owns this dict - construct_graph = False - else: - # User has passed in the actual edge index, so the - # number of edges needs to be counted. - num_edges_dict = { - pyg_can_edge_type: len(ei[0]) for pyg_can_edge_type, ei in G.items() - } - - self.__infer_offsets(num_nodes_dict, num_edges_dict) - self.__infer_existing_tensors(F) - self.__infer_edge_types(num_nodes_dict, num_edges_dict) - - self._edge_attr_cls = CuGraphEdgeAttr - - self.__features = F - self.__graph = None - self.__is_graph_owner = False - self.__order = order - - if construct_graph: - if multi_gpu: - self.__graph = distributed.get_client().get_dataset( - "cugraph_graph", default=None - ) - - if self.__graph is None: - self.__graph = self.__construct_graph( - G, multi_gpu=multi_gpu, order=order - ) - self.__is_graph_owner = True - - self.__subgraphs = {} - - def __del__(self): - if self.__is_graph_owner: - if isinstance(self.__graph._plc_graph, dict): - try: - distributed.get_client().unpublish_dataset("cugraph_graph") - except TypeError: - warnings.warn( - "Could not unpublish graph dataset, most likely because" - " dask has already shut down." - ) - del self.__graph - - def __make_offsets(self, input_dict): - offsets = {} - offsets["stop"] = [input_dict[v] for v in sorted(input_dict.keys())] - offsets["stop"] = torch.tensor(offsets["stop"]) - if torch.has_cuda: - offsets["stop"] = offsets["stop"].cuda() - - cumsum = offsets["stop"].cumsum(0) - offsets["start"] = cumsum - offsets["stop"] - offsets["stop"] = cumsum - 1 - - offsets["type"] = np.array(sorted(input_dict.keys())) - - return offsets - - def __infer_offsets( - self, - num_nodes_dict: Dict[str, int], - num_edges_dict: Dict[Tuple[str, str, str], int], - ) -> None: - """ - Sets the vertex offsets for this store. - """ - self.__vertex_type_offsets = self.__make_offsets(num_nodes_dict) - - # Need to convert tuples to string in order to use searchsorted - # Can convert back using x.split('__') - # Lexicographic ordering is unchanged. - self.__edge_type_offsets = self.__make_offsets( - { - "__".join(pyg_can_edge_type): n - for pyg_can_edge_type, n in num_edges_dict.items() - } - ) - - def __construct_graph( - self, - edge_info: Dict[Tuple[str, str, str], List[TensorType]], - multi_gpu: bool = False, - order: str = "CSC", - ) -> cugraph.MultiGraph: - """ - This function takes edge information and uses it to construct - a cugraph Graph. It determines the numerical edge type by - sorting the keys of the input dictionary - (the canonical edge types). - - Parameters - ---------- - edge_info: Dict[Tuple[str, str, str], List[TensorType]] (Required) - Input edge info dictionary, where keys are the canonical - edge type and values are the edge index (src/dst). - - multi_gpu: bool (Optional, default=False) - Whether to construct a single-GPU or multi-GPU cugraph Graph. - Defaults to a single-GPU graph. - - order: str (CSC or CSR) - Essentially whether to reverse edges so that the cuGraph - sampling algorithm operates on the CSC matrix instead of - the CSR matrix. Should nearly always be CSC unless there - is a specific expectation of reverse sampling, or correctness - testing is being performed. - - Returns - ------- - A newly-constructed directed cugraph.MultiGraph object. - """ - - # Ensure the original dict is not modified. - edge_info_cg = {} - - if order != "CSR" and order != "CSC": - raise ValueError("Order must be either CSC (default) or CSR!") - - # Iterate over the keys in sorted order so that the created - # numerical types correspond to the lexicographic order - # of the keys, which is critical to converting the numeric - # keys back to canonical edge types later. - # FIXME don't always convert to host arrays (#3383) - for pyg_can_edge_type in sorted(edge_info.keys()): - src_type, _, dst_type = pyg_can_edge_type - srcs, dsts = edge_info[pyg_can_edge_type] - - src_offset = np.searchsorted(self.__vertex_type_offsets["type"], src_type) - srcs_t = srcs + int(self.__vertex_type_offsets["start"][src_offset]) - if isinstance(srcs_t, torch.Tensor): - srcs_t = srcs_t.cpu() - else: - if isinstance(srcs_t, dask_cudf.Series): - srcs_t = srcs_t.compute() - if isinstance(srcs_t, cudf.Series): - srcs_t = srcs_t.values_host - - dst_offset = np.searchsorted(self.__vertex_type_offsets["type"], dst_type) - dsts_t = dsts + int(self.__vertex_type_offsets["start"][dst_offset]) - if isinstance(dsts_t, torch.Tensor): - dsts_t = dsts_t.cpu() - else: - if isinstance(dsts_t, dask_cudf.Series): - dsts_t = dsts_t.compute() - if isinstance(dsts_t, cudf.Series): - dsts_t = dsts_t.values_host - - edge_info_cg[pyg_can_edge_type] = (srcs_t, dsts_t) - - na_src = np.concatenate( - [ - edge_info_cg[pyg_can_edge_type][0] - for pyg_can_edge_type in sorted(edge_info_cg.keys()) - ] - ) - - na_dst = np.concatenate( - [ - edge_info_cg[pyg_can_edge_type][1] - for pyg_can_edge_type in sorted(edge_info_cg.keys()) - ] - ) - - et_offsets = self.__edge_type_offsets - na_etp = np.concatenate( - [ - np.full( - int(et_offsets["stop"][i] - et_offsets["start"][i] + 1), - i, - dtype="int32", - ) - for i in range(len(self.__edge_type_offsets["start"])) - ] - ) - - df = pandas.DataFrame( - { - "src": pandas.Series(na_dst) - if order == "CSC" - else pandas.Series(na_src), - "dst": pandas.Series(na_src) - if order == "CSC" - else pandas.Series(na_dst), - "etp": pandas.Series(na_etp), - } - ) - vertex_dtype = df.src.dtype - - if multi_gpu: - nworkers = len(distributed.get_client().scheduler_info()["workers"]) - df = dd.from_pandas(df, npartitions=nworkers if len(df) > 32 else 1) - - # Ensure the dataframe is constructed on each partition - # instead of adding additional synchronization head from potential - # host to device copies. - def get_empty_df(): - return cudf.DataFrame( - { - "src": cudf.Series([], dtype=vertex_dtype), - "dst": cudf.Series([], dtype=vertex_dtype), - "etp": cudf.Series([], dtype="int32"), - } - ) - - # Have to check for empty partitions and handle them appropriately - df = df.persist() - df = df.map_partitions( - lambda f: cudf.DataFrame.from_pandas(f) - if len(f) > 0 - else get_empty_df(), - meta=get_empty_df(), - ).reset_index(drop=True) - else: - df = cudf.from_pandas(df).reset_index(drop=True) - - graph = cugraph.MultiGraph(directed=True) - if multi_gpu: - graph.from_dask_cudf_edgelist( - df, - source="src", - destination="dst", - edge_type="etp", - ) - distributed.get_client().publish_dataset(cugraph_graph=graph) - else: - graph.from_cudf_edgelist( - df, - source="src", - destination="dst", - edge_type="etp", - ) - - return graph - - @property - def _edge_types_to_attrs(self) -> dict: - return dict(self.__edge_types_to_attrs) - - @property - def order(self) -> str: - return self.__order - - @property - def node_types(self) -> List[NodeType]: - return list(self.__vertex_type_offsets["type"]) - - @property - def edge_types(self) -> List[EdgeType]: - return list(self.__edge_types_to_attrs.keys()) - - def canonical_edge_type_to_numeric(self, etype: EdgeType) -> int: - return np.searchsorted(self.__edge_type_offsets["type"], "__".join(etype)) - - def numeric_edge_type_to_canonical(self, etype: int) -> EdgeType: - return tuple(self.__edge_type_offsets["type"][etype].split("__")) - - @cached_property - def _is_delayed(self): - if self.__graph is None: - return False - return self.__graph.is_multi_gpu() - - def _numeric_vertex_type_from_name(self, vertex_type_name: str) -> int: - return np.searchsorted(self.__vertex_type_offsets["type"], vertex_type_name) - - def get_vertex_index(self, vtypes) -> TensorType: - if isinstance(vtypes, str): - vtypes = [vtypes] - - ix = torch.tensor([], dtype=torch.int64) - - if isinstance(self.__vertex_type_offsets, dict): - vtypes = np.searchsorted(self.__vertex_type_offsets["type"], vtypes) - for vtype in vtypes: - start = int(self.__vertex_type_offsets["start"][vtype]) - stop = int(self.__vertex_type_offsets["stop"][vtype]) - ix = torch.concatenate( - [ - ix, - torch.arange( - start, stop + 1, 1, dtype=self.__vertex_dtype, device="cuda" - ), - ] - ) - - return ix - - def put_edge_index(self, edge_index, edge_attr): - """ - Adds additional edges to the graph. - Not yet implemented. - """ - raise NotImplementedError("Adding indices not supported.") - - def get_all_edge_attrs(self): - """ - Gets a list of all edge types and indices in this store. - - Returns - ------- - list[str] - All edge types and indices in this store. - """ - return self.__edge_types_to_attrs.values() - - def _get_edge_index(self, attr: CuGraphEdgeAttr) -> Tuple[TensorType, TensorType]: - """ - Returns the edge index in the requested format - (as defined by attr). Currently, only unsorted - COO is supported, which is returned as a (src,dst) - tuple as expected by the PyG API. - - Parameters - ---------- - attr: CuGraphEdgeAttr - The CuGraphEdgeAttr specifying the - desired edge type, layout (i.e. CSR, COO, CSC), and - whether the returned index should be sorted (if COO). - Currently, only unsorted COO is supported. - - Returns - ------- - (src, dst) : Tuple[tensor type] - Tuple of the requested edge index in COO form. - Currently, only COO form is supported. - """ - - if self.__graph is None: - raise ValueError("Graph is not in memory, cannot access edge index!") - - if attr.layout != EdgeLayout.COO: - # TODO support returning CSR/CSC (Issue #3802) - raise TypeError("Only COO direct access is supported!") - - # Currently, graph creation enforces that input vertex ids are always of - # integer type. Therefore, it is currently safe to assume that for MG - # graphs, the src/dst col names are renumbered_src/dst - # and for SG graphs, the src/dst col names are src/dst. - # This may change in the future if/when renumbering or the graph - # creation process is refactored. - # See Issue #3201 for more details. - # Also note src/dst are flipped so that cuGraph sampling is done in - # CSC format rather than CSR format. - if self._is_delayed: - dst_col_name = self.__graph.renumber_map.renumbered_src_col_name - src_col_name = self.__graph.renumber_map.renumbered_dst_col_name - else: - dst_col_name = self.__graph.srcCol - src_col_name = self.__graph.dstCol - - # If there is only one edge type (homogeneous graph) then - # bypass the edge filters for a significant speed improvement. - if len(self.__edge_types_to_attrs) == 1: - if attr.edge_type not in self.__edge_types_to_attrs: - raise ValueError( - f"Requested edge type {attr.edge_type}" "is not present in graph." - ) - - df = self.__graph.edgelist.edgelist_df[[src_col_name, dst_col_name]] - src_offset = 0 - dst_offset = 0 - else: - src_type, _, dst_type = attr.edge_type - src_offset = int( - self.__vertex_type_offsets["start"][ - self._numeric_vertex_type_from_name(src_type) - ] - ) - dst_offset = int( - self.__vertex_type_offsets["start"][ - self._numeric_vertex_type_from_name(dst_type) - ] - ) - coli = np.searchsorted( - self.__edge_type_offsets["type"], "__".join(attr.edge_type) - ) - - df = self.__graph.edgelist.edgelist_df[ - [src_col_name, dst_col_name, self.__graph.edgeTypeCol] - ] - df = df[df[self.__graph.edgeTypeCol] == coli] - df = df[[src_col_name, dst_col_name]] - - if self._is_delayed: - df = df.compute() - - src = torch.as_tensor(df[src_col_name], device="cuda") - src_offset - dst = torch.as_tensor(df[dst_col_name], device="cuda") - dst_offset - - src = src.to(self.__vertex_dtype) - dst = dst.to(self.__vertex_dtype) - - if src.shape[0] != dst.shape[0]: - raise IndexError("src and dst shape do not match!") - - return (src, dst) - - def get_edge_index(self, *args, **kwargs) -> Tuple[TensorType, TensorType]: - """ - Synchronously gets an edge_index tensor from the materialized - graph. - - Args: - **attr(EdgeAttr): the edge attributes. - - Returns: - EdgeTensorType: an edge_index tensor corresonding to the provided - attributes, or None if there is no such tensor. - - Raises: - KeyError: if the edge index corresponding to attr was not found. - """ - - edge_attr = self._edge_attr_cls.cast(*args, **kwargs) - edge_attr.layout = EdgeLayout(edge_attr.layout) - # Override is_sorted for CSC and CSR: - # TODO treat is_sorted specially in this function, where is_sorted=True - # returns an edge index sorted by column. - edge_attr.is_sorted = edge_attr.is_sorted or ( - edge_attr.layout in [EdgeLayout.CSC, EdgeLayout.CSR] - ) - edge_index = self._get_edge_index(edge_attr) - if edge_index is None: - raise KeyError(f"An edge corresponding to '{edge_attr}' was not " f"found") - return edge_index - - def _subgraph(self, edge_types: List[tuple] = None) -> cugraph.MultiGraph: - """ - Returns a subgraph with edges limited to those of a given type - - Parameters - ---------- - edge_types : list of pyg canonical edge types - Directly references the graph's internal edge types. Does - not accept PyG edge type tuples. - - Returns - ------- - The appropriate extracted subgraph. Will extract the subgraph - if it has not already been extracted. - - """ - if self.__graph is None: - raise ValueError("Graph is not in memory, cannot get subgraph") - - if edge_types is not None and set(edge_types) != set( - self.__edge_types_to_attrs.keys() - ): - raise ValueError( - "Subgraphing is currently unsupported, please" - " specify all edge types in the graph or leave" - " this argument empty." - ) - - return self.__graph - - def _get_vertex_groups_from_sample( - self, nodes_of_interest: TensorType, is_sorted: bool = False - ) -> Dict[str, torch.Tensor]: - """ - Given a tensor of nodes of interest, this - method a single dictionary, noi_index. - - noi_index is the original vertex ids grouped by vertex type. - - Example Input: [5, 2, 1, 10, 11, 8] - Output: {'red_vertex': [5, 1, 8], 'blue_vertex': [2], 'green_vertex': [10, 11]} - - """ - - noi_index = {} - - vtypes = cudf.Series(self.__vertex_type_offsets["type"]) - if len(vtypes) == 1: - noi_index[vtypes.iloc[0]] = nodes_of_interest - else: - noi_type_indices = torch.searchsorted( - torch.as_tensor(self.__vertex_type_offsets["stop"], device="cuda"), - nodes_of_interest, - ) - - noi_types = vtypes.iloc[cupy.asarray(noi_type_indices)].reset_index( - drop=True - ) - noi_starts = self.__vertex_type_offsets["start"][noi_type_indices] - - noi_types = cudf.Series(noi_types, name="t").groupby("t").groups - - for type_name, ix in noi_types.items(): - # store the renumbering for this vertex type - # renumbered vertex id is the index of the old id - ix = torch.as_tensor(ix, device="cuda") - # subtract off the offsets - noi_index[type_name] = nodes_of_interest[ix] - noi_starts[ix] - - return noi_index - - def _get_sample_from_vertex_groups( - self, vertex_groups: Dict[str, TensorType] - ) -> TensorType: - """ - Inverse of _get_vertex_groups_from_sample() (although with de-offsetted ids). - Given a dictionary of node types and de-offsetted node ids, return - the global (non-renumbered) vertex ids. - - Example Input: {'horse': [1, 3, 5], 'duck': [1, 2]} - Output: [1, 3, 5, 14, 15] - """ - t = torch.tensor([], dtype=torch.int64, device="cuda") - - for group_name, ix in vertex_groups.items(): - type_id = self._numeric_vertex_type_from_name(group_name) - if not ix.is_cuda: - ix = ix.cuda() - offset = self.__vertex_type_offsets["start"][type_id] - u = ix + offset - t = torch.concatenate([t, u]) - - return t - - def _get_renumbered_edge_groups_from_sample( - self, sampling_results: cudf.DataFrame, noi_index: dict - ) -> Tuple[ - Dict[Tuple[str, str, str], torch.Tensor], - Tuple[Dict[Tuple[str, str, str], torch.Tensor]], - ]: - """ - Given a cudf (NOT dask_cudf) DataFrame of sampling results and a dictionary - of non-renumbered vertex ids grouped by vertex type, this method - outputs two dictionaries: - 1. row_dict - 2. col_dict - (1) row_dict corresponds to the renumbered source vertex ids grouped - by PyG edge type - (src, type, dst) tuple. - (2) col_dict corresponds to the renumbered destination vertex ids grouped - by PyG edge type (src, type, dst) tuple. - * The two outputs combined make a PyG "edge index". - * The ith element of each array corresponds to the same edge. - * The _get_vertex_groups_from_sample() method is usually called - before this one to get the noi_index. - - Example Input: Series({ - 'majors': [0, 5, 11, 3], - 'minors': [8, 2, 3, 5]}, - 'edge_type': [1, 3, 5, 14] - }), - { - 'blue_vertex': [0, 5], - 'red_vertex': [3, 11], - 'green_vertex': [2, 8] - } - Output: { - ('blue', 'etype1', 'green'): [0, 1], - ('red', 'etype2', 'red'): [1], - ('red', 'etype3', 'blue'): [0] - }, - { - ('blue', 'etype1', 'green'): [1, 0], - ('red', 'etype2', 'red'): [0], - ('red', 'etype3', 'blue'): [1] - } - - """ - row_dict = {} - col_dict = {} - # If there is only 1 edge type (includes heterogeneous graphs) - if len(self.edge_types) == 1: - t_pyg_type = list(self.__edge_types_to_attrs.values())[0].edge_type - src_type, _, dst_type = t_pyg_type - - # If there is only 1 node type (homogeneous) - # This should only occur if the cuGraph loader was - # not used. This logic is deprecated. - if len(self.node_types) == 1: - warnings.warn( - "Renumbering after sampling for homogeneous graphs is deprecated.", - FutureWarning, - ) - - # Create a dataframe mapping old ids to new ids. - vtype = src_type - id_table = noi_index[vtype] - id_map = cudf.Series( - cupy.arange(id_table.shape[0], dtype="int32"), - name="new_id", - index=cupy.asarray(id_table), - ).sort_index() - - # Renumber the majors using binary search - # Step 1: get the index of the new id - ix_r = torch.searchsorted( - torch.as_tensor(id_map.index.values, device="cuda"), - torch.as_tensor(sampling_results.majors.values, device="cuda"), - ) - # Step 2: Go from id indices to actual ids - row_dict[t_pyg_type] = torch.as_tensor(id_map.values, device="cuda")[ - ix_r - ] - - # Renumber the minors using binary search - # Step 1: get the index of the new id - ix_c = torch.searchsorted( - torch.as_tensor(id_map.index.values, device="cuda"), - torch.as_tensor(sampling_results.minors.values, device="cuda"), - ) - # Step 2: Go from id indices to actual ids - col_dict[t_pyg_type] = torch.as_tensor(id_map.values, device="cuda")[ - ix_c - ] - else: - # Handle the heterogeneous case where there is only 1 edge type - dst_id_table = noi_index[dst_type] - dst_id_map = cudf.DataFrame( - { - "dst": cupy.asarray(dst_id_table), - "new_id": cupy.arange(dst_id_table.shape[0]), - } - ).set_index("dst") - dst = dst_id_map["new_id"].loc[sampling_results.minors] - col_dict[t_pyg_type] = torch.as_tensor(dst.values, device="cuda") - - src_id_table = noi_index[src_type] - src_id_map = cudf.DataFrame( - { - "src": cupy.asarray(src_id_table), - "new_id": cupy.arange(src_id_table.shape[0]), - } - ).set_index("src") - src = src_id_map["new_id"].loc[sampling_results.majors] - row_dict[t_pyg_type] = torch.as_tensor(src.values, device="cuda") - - else: - # This will retrieve the single string representation. - # It needs to be converted to a tuple in the for loop below. - eoi_types = ( - cudf.Series(self.__edge_type_offsets["type"]) - .iloc[sampling_results.edge_type.astype("int32")] - .reset_index(drop=True) - ) - - eoi_types = cudf.Series(eoi_types, name="t").groupby("t").groups - - for pyg_can_edge_type_str, ix in eoi_types.items(): - pyg_can_edge_type = tuple(pyg_can_edge_type_str.split("__")) - - if self.__order == "CSR": - src_type, _, dst_type = pyg_can_edge_type - else: # CSC - dst_type, _, src_type = pyg_can_edge_type - - # Get the de-offsetted minors - dst_num_type = self._numeric_vertex_type_from_name(dst_type) - minors = torch.as_tensor( - sampling_results.minors.iloc[ix].values, device="cuda" - ) - minors -= self.__vertex_type_offsets["start"][dst_num_type] - - # Create the col entry for this type - dst_id_table = noi_index[dst_type] - dst_id_map = ( - cudf.Series(cupy.asarray(dst_id_table), name="dst") - .reset_index() - .rename(columns={"index": "new_id"}) - .set_index("dst") - ) - dst = dst_id_map["new_id"].loc[cupy.asarray(minors)] - col_dict[pyg_can_edge_type] = torch.as_tensor(dst.values, device="cuda") - - # Get the de-offsetted majors - src_num_type = self._numeric_vertex_type_from_name(src_type) - majors = torch.as_tensor( - sampling_results.majors.iloc[ix].values, device="cuda" - ) - majors -= self.__vertex_type_offsets["start"][src_num_type] - - # Create the row entry for this type - src_id_table = noi_index[src_type] - src_id_map = ( - cudf.Series(cupy.asarray(src_id_table), name="src") - .reset_index() - .rename(columns={"index": "new_id"}) - .set_index("src") - ) - src = src_id_map["new_id"].loc[cupy.asarray(majors)] - row_dict[pyg_can_edge_type] = torch.as_tensor(src.values, device="cuda") - - return row_dict, col_dict - - def put_tensor(self, tensor, attr) -> None: - raise NotImplementedError("Adding properties not supported.") - - def create_named_tensor( - self, attr_name: str, properties: List[str], vertex_type: str, dtype: str - ) -> None: - """ - Create a named tensor that contains a subset of - properties in the graph. - - Parameters - ---------- - attr_name : str - The name of the tensor within its group. - properties : list[str] - The properties the rows - of the tensor correspond to. - vertex_type : str - The vertex type associated with this new tensor property. - dtype : numpy/cupy dtype (i.e. 'int32') or torch dtype (i.e. torch.float) - The datatype of the tensor. Usually float32/float64. - """ - self._tensor_attr_dict[vertex_type].append( - CuGraphTensorAttr( - vertex_type, attr_name, properties=properties, dtype=dtype - ) - ) - - def __infer_edge_types( - self, - num_nodes_dict: Dict[str, int], - num_edges_dict: Dict[Tuple[str, str, str], int], - ) -> None: - self.__edge_types_to_attrs = {} - - for pyg_can_edge_type in sorted(num_edges_dict.keys()): - sz_src = num_nodes_dict[pyg_can_edge_type[0]] - sz_dst = num_nodes_dict[pyg_can_edge_type[-1]] - self.__edge_types_to_attrs[pyg_can_edge_type] = CuGraphEdgeAttr( - edge_type=pyg_can_edge_type, - layout=EdgeLayout.COO, - is_sorted=False, - size=(sz_src, sz_dst), - ) - - def __infer_existing_tensors(self, F) -> None: - """ - Infers the tensor attributes/features. - """ - for attr_name, types_with_attr in F.get_feature_list().items(): - for vt in types_with_attr: - attr_dtype = F.get_data(np.array([0]), vt, attr_name).dtype - self.create_named_tensor( - attr_name=attr_name, - properties=None, - vertex_type=vt, - dtype=attr_dtype, - ) - - def get_all_tensor_attrs(self) -> List[CuGraphTensorAttr]: - """ - Obtains all tensor attributes stored in this feature store. - """ - # unpack and return the list of lists - it = chain.from_iterable(self._tensor_attr_dict.values()) - return [CuGraphTensorAttr.cast(c) for c in it] - - def _get_tensor(self, attr: CuGraphTensorAttr) -> TensorType: - feature_backend = self.__features.backend - cols = attr.properties - - idx = attr.index - if idx is not None: - if feature_backend == "torch": - if not isinstance(idx, torch.Tensor): - raise TypeError( - f"Type {type(idx)} invalid" - f" for feature store backend {feature_backend}" - ) - idx = idx.cpu() - elif feature_backend == "numpy": - # allow feature indexing through cupy arrays - if isinstance(idx, cupy.ndarray): - idx = idx.get() - elif isinstance(idx, torch.Tensor): - idx = np.asarray(idx.cpu()) - - if cols is None: - t = self.__features.get_data(idx, attr.group_name, attr.attr_name) - if idx is None: - t = t[-1] - - if isinstance(t, np.ndarray): - t = torch.as_tensor(t, device="cpu") - - return t - - else: - t = self.__features.get_data(idx, attr.group_name, cols[0]) - - if len(t.shape) == 1: - t = torch.tensor([t]) - - for col in cols[1:]: - u = self.__features.get_data(idx, attr.group_name, col) - - if len(u.shape) == 1: - u = torch.tensor([u]) - - t = torch.concatenate([t, u]) - - return t - - def _multi_get_tensor(self, attrs: List[CuGraphTensorAttr]) -> List[TensorType]: - return [self._get_tensor(attr) for attr in attrs] - - def multi_get_tensor(self, attrs: List[CuGraphTensorAttr]) -> List[TensorType]: - """ - Synchronously obtains a :class:`FeatureTensorType` object from the - feature store for each tensor associated with the attributes in - `attrs`. - - Parameters - ---------- - attrs (List[TensorAttr]): a list of :class:`TensorAttr` attributes - that identify the tensors to get. - - Returns - ------- - List[FeatureTensorType]: a Tensor of the same type as the index for - each attribute. - - Raises - ------ - KeyError: if a tensor corresponding to an attr was not found. - ValueError: if any input `TensorAttr` is not fully specified. - """ - attrs = [ - self._infer_unspecified_attr(self._tensor_attr_cls.cast(attr)) - for attr in attrs - ] - bad_attrs = [attr for attr in attrs if not attr.is_fully_specified()] - if len(bad_attrs) > 0: - raise ValueError( - f"The input TensorAttr(s) '{bad_attrs}' are not fully " - f"specified. Please fully specify them by specifying all " - f"'UNSET' fields" - ) - - tensors = self._multi_get_tensor(attrs) - - bad_attrs = [attrs[i] for i, v in enumerate(tensors) if v is None] - if len(bad_attrs) > 0: - raise KeyError( - f"Tensors corresponding to attributes " f"'{bad_attrs}' were not found" - ) - - return [tensor for attr, tensor in zip(attrs, tensors)] - - def get_tensor(self, *args, **kwargs) -> TensorType: - """ - Synchronously obtains a :class:`FeatureTensorType` object from the - feature store. Feature store implementors guarantee that the call - :obj:`get_tensor(put_tensor(tensor, attr), attr) = tensor` holds. - - Parameters - ---------- - **attr (TensorAttr): Any relevant tensor attributes that correspond - to the feature tensor. See the :class:`TensorAttr` - documentation for required and optional attributes. It is the - job of implementations of a :class:`FeatureStore` to store this - metadata in a meaningful way that allows for tensor retrieval - from a :class:`TensorAttr` object. - - Returns - ------- - FeatureTensorType: a Tensor of the same type as the index. - - Raises - ------ - KeyError: if the tensor corresponding to attr was not found. - ValueError: if the input `TensorAttr` is not fully specified. - """ - - attr = self._tensor_attr_cls.cast(*args, **kwargs) - attr = self._infer_unspecified_attr(attr) - - if not attr.is_fully_specified(): - raise ValueError( - f"The input TensorAttr '{attr}' is not fully " - f"specified. Please fully specify the input by " - f"specifying all 'UNSET' fields." - ) - - tensor = self._get_tensor(attr) - if tensor is None: - raise KeyError(f"A tensor corresponding to '{attr}' was not found") - return tensor - - def _get_tensor_size(self, attr: CuGraphTensorAttr) -> Union[List, int]: - return self._get_tensor(attr).size() - - def get_tensor_size(self, *args, **kwargs) -> Union[List, int]: - """ - Obtains the size of a tensor given its attributes, or :obj:`None` - if the tensor does not exist. - """ - attr = self._tensor_attr_cls.cast(*args, **kwargs) - if not attr.is_set("index"): - attr.index = None - return self._get_tensor_size(attr) - - def _remove_tensor(self, attr): - raise NotImplementedError("Removing features not supported") - - def _infer_unspecified_attr(self, attr: CuGraphTensorAttr) -> CuGraphTensorAttr: - if attr.properties == _field_status.UNSET: - # attempt to infer property names - if attr.group_name in self._tensor_attr_dict: - for n in self._tensor_attr_dict[attr.group_name]: - if attr.attr_name == n.attr_name: - attr.properties = n.properties - else: - raise KeyError(f"Invalid group name {attr.group_name}") - - if attr.dtype == _field_status.UNSET: - # attempt to infer dtype - if attr.group_name in self._tensor_attr_dict: - for n in self._tensor_attr_dict[attr.group_name]: - if attr.attr_name == n.attr_name: - attr.dtype = n.dtype - - return attr - - def __len__(self): - return len(self.get_all_tensor_attrs()) diff --git a/python/cugraph-pyg/build/lib/cugraph_pyg/examples/graph_sage_mg.py b/python/cugraph-pyg/build/lib/cugraph_pyg/examples/graph_sage_mg.py deleted file mode 100644 index f10ac939..00000000 --- a/python/cugraph-pyg/build/lib/cugraph_pyg/examples/graph_sage_mg.py +++ /dev/null @@ -1,432 +0,0 @@ -# Copyright (c) 2023-2024, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -from ogb.nodeproppred import NodePropPredDataset - -import time -import argparse -import gc - -import torch -import numpy as np - -from torch_geometric.nn import CuGraphSAGEConv - -import torch.nn as nn -import torch.nn.functional as F - -import torch.distributed as td -import torch.multiprocessing as tmp -from torch.nn.parallel import DistributedDataParallel as ddp - -from typing import List - - -class CuGraphSAGE(nn.Module): - def __init__(self, in_channels, hidden_channels, out_channels, num_layers): - super().__init__() - - self.convs = torch.nn.ModuleList() - self.convs.append(CuGraphSAGEConv(in_channels, hidden_channels)) - for _ in range(num_layers - 1): - conv = CuGraphSAGEConv(hidden_channels, hidden_channels) - self.convs.append(conv) - - self.lin = nn.Linear(hidden_channels, out_channels) - - def forward(self, x, edge, size): - edge_csc = CuGraphSAGEConv.to_csc(edge, (size[0], size[0])) - for conv in self.convs: - x = conv(x, edge_csc)[: size[1]] - x = F.relu(x) - x = F.dropout(x, p=0.5) - - return self.lin(x) - - -def enable_cudf_spilling(): - import cudf - - cudf.set_option("spill", True) - - -def init_pytorch_worker(rank, devices, manager_ip, manager_port) -> None: - import cupy - import rmm - - device_id = devices[rank] - - rmm.reinitialize( - devices=[device_id], - pool_allocator=False, - ) - - # torch.cuda.change_current_allocator(rmm.rmm_torch_allocator) - # cupy.cuda.set_allocator(rmm.rmm_cupy_allocator) - - cupy.cuda.Device(device_id).use() - torch.cuda.set_device(device_id) - - # Pytorch training worker initialization - dist_init_method = f"tcp://{manager_ip}:{manager_port}" - - torch.distributed.init_process_group( - backend="nccl", - init_method=dist_init_method, - world_size=len(devices), - rank=rank, - ) - - # enable_cudf_spilling() - - -def start_cugraph_dask_client(rank, dask_scheduler_file): - print( - "Connecting to dask... " - "(warning: this may take a while depending on your configuration)" - ) - start_time_connect_dask = time.perf_counter_ns() - from distributed import Client - from cugraph.dask.comms import comms as Comms - - client = Client(scheduler_file=dask_scheduler_file) - Comms.initialize(p2p=True) - - end_time_connect_dask = time.perf_counter_ns() - print( - f"Successfully connected to dask on rank {rank}, took " - f"{(end_time_connect_dask - start_time_connect_dask) / 1e9:3.4f} s" - ) - return client - - -def stop_cugraph_dask_client(): - from cugraph.dask.comms import comms as Comms - - Comms.destroy() - - from dask.distributed import get_client - - get_client().close() - - -def train( - rank, - torch_devices: List[int], - manager_ip: str, - manager_port: int, - dask_scheduler_file: str, - num_epochs: int, - features_on_gpu=True, -) -> None: - """ - Parameters - ---------- - device: int - The CUDA device where the model, graph data, and node labels will be stored. - features_on_gpu: bool - Whether to store a replica of features on each worker's GPU. If False, - all features will be stored on the CPU. - """ - - start_time_preprocess = time.perf_counter_ns() - - world_size = len(torch_devices) - device_id = torch_devices[rank] - features_device = device_id if features_on_gpu else "cpu" - init_pytorch_worker(rank, torch_devices, manager_ip, manager_port) - td.barrier() - - client = start_cugraph_dask_client(rank, dask_scheduler_file) - - from distributed import Event as Dask_Event - - event = Dask_Event("cugraph_store_creation_event") - download_event = Dask_Event("dataset_download_event") - - td.barrier() - - import cugraph - from cugraph_pyg.data import CuGraphStore - from cugraph_pyg.loader import CuGraphNeighborLoader - - if rank == 0: - print("Rank 0 downloading dataset") - dataset = NodePropPredDataset(name="ogbn-mag") - data = dataset[0] - download_event.set() - print("Dataset downloaded") - else: - if download_event.wait(timeout=1000): - print(f"Rank {rank} loading dataset") - dataset = NodePropPredDataset(name="ogbn-mag") - data = dataset[0] - print(f"Rank {rank} loaded dataset successfully") - - ei = data[0]["edge_index_dict"][("paper", "cites", "paper")] - G = { - ("paper", "cites", "paper"): np.stack( - [np.concatenate([ei[0], ei[1]]), np.concatenate([ei[1], ei[0]])] - ) - } - N = {"paper": data[0]["num_nodes_dict"]["paper"]} - - fs = cugraph.gnn.FeatureStore(backend="torch") - - fs.add_data( - torch.as_tensor(data[0]["node_feat_dict"]["paper"], device=features_device), - "paper", - "x", - ) - - fs.add_data(torch.as_tensor(data[1]["paper"].T[0], device=device_id), "paper", "y") - - num_papers = data[0]["num_nodes_dict"]["paper"] - - if rank == 0: - train_perc = 0.1 - all_train_nodes = torch.randperm(num_papers) - all_train_nodes = all_train_nodes[: int(train_perc * num_papers)] - train_nodes = all_train_nodes[: int(len(all_train_nodes) / world_size)] - - train_mask = torch.full((num_papers,), -1, device=device_id) - train_mask[train_nodes] = 1 - fs.add_data(train_mask, "paper", "train") - - print(f"Rank {rank} finished loading graph and feature data") - - if rank == 0: - print("Rank 0 creating its cugraph store and initializing distributed graph") - # Rank 0 will initialize the distributed cugraph graph. - cugraph_store_create_start = time.perf_counter_ns() - print("G:", G[("paper", "cites", "paper")].shape) - cugraph_store = CuGraphStore(fs, G, N, multi_gpu=True) - cugraph_store_create_end = time.perf_counter_ns() - print( - "cuGraph Store created on rank 0 in " - f"{(cugraph_store_create_end - cugraph_store_create_start) / 1e9:3.4f} s" - ) - client.publish_dataset(train_nodes=all_train_nodes) - event.set() - print("Rank 0 done with cugraph store creation") - else: - if event.wait(timeout=1000): - print(f"Rank {rank} creating cugraph store") - train_nodes = client.get_dataset("train_nodes") - train_nodes = train_nodes[ - int(rank * len(train_nodes) / world_size) : int( - (rank + 1) * len(train_nodes) / world_size - ) - ] - - train_mask = torch.full((num_papers,), -1, device=device_id) - train_mask[train_nodes] = 1 - fs.add_data(train_mask, "paper", "train") - - # Will automatically use the stored distributed cugraph graph on rank 0. - cugraph_store_create_start = time.perf_counter_ns() - cugraph_store = CuGraphStore(fs, G, N, multi_gpu=True) - cugraph_store_create_end = time.perf_counter_ns() - print( - f"Rank {rank} created cugraph store in " - f"{(cugraph_store_create_end - cugraph_store_create_start) / 1e9:3.4f}" - " s" - ) - print(f"Rank {rank} done with cugraph store creation") - - end_time_preprocess = time.perf_counter_ns() - print(f"rank {rank}: train {train_nodes.shape}", flush=True) - print( - f"rank {rank}: all preprocessing took" - f" {(end_time_preprocess - start_time_preprocess) / 1e9:3.4f}", - flush=True, - ) - td.barrier() - model = ( - CuGraphSAGE(in_channels=128, hidden_channels=64, out_channels=349, num_layers=3) - .to(torch.float32) - .to(device_id) - ) - model = ddp(model, device_ids=[device_id], output_device=device_id) - td.barrier() - - optimizer = torch.optim.Adam(model.parameters(), lr=0.01) - - for epoch in range(num_epochs): - start_time_train = time.perf_counter_ns() - model.train() - - start_time_loader = time.perf_counter_ns() - cugraph_bulk_loader = CuGraphNeighborLoader( - cugraph_store, - train_nodes, - batch_size=250, - num_neighbors=[10, 10, 10], - seeds_per_call=1000, - batches_per_partition=2, - replace=False, - ) - end_time_loader = time.perf_counter_ns() - total_time_loader = (end_time_loader - start_time_loader) / 1e9 - - total_loss = 0 - num_batches = 0 - - print(f"rank {rank} starting epoch {epoch}") - with td.algorithms.join.Join([model]): - total_time_sample = 0 - total_time_forward = 0 - total_time_backward = 0 - - start_time_sample = time.perf_counter_ns() - for iter_i, hetero_data in enumerate(cugraph_bulk_loader): - end_time_sample = time.perf_counter_ns() - total_time_sample += (end_time_sample - start_time_sample) / 1e9 - num_batches += 1 - - if iter_i % 20 == 0: - print(f"iteration {iter_i}") - - # train - train_mask = hetero_data.train_dict["paper"] - y_true = hetero_data.y_dict["paper"] - - start_time_forward = time.perf_counter_ns() - y_pred = model( - hetero_data.x_dict["paper"].to(device_id).to(torch.float32), - hetero_data.edge_index_dict[("paper", "cites", "paper")].to( - device_id - ), - (len(y_true), len(y_true)), - ) - end_time_forward = time.perf_counter_ns() - total_time_forward += (end_time_forward - start_time_forward) / 1e9 - - y_true = F.one_hot( - y_true[train_mask].to(torch.int64), num_classes=349 - ).to(torch.float32) - - y_pred = y_pred[train_mask] - - loss = F.cross_entropy(y_pred, y_true) - - start_time_backward = time.perf_counter_ns() - optimizer.zero_grad() - loss.backward() - optimizer.step() - end_time_backward = time.perf_counter_ns() - total_time_backward += (end_time_backward - start_time_backward) / 1e9 - - total_loss += loss.item() - - del y_true - del y_pred - del loss - del hetero_data - gc.collect() - - start_time_sample = time.perf_counter_ns() - - end_time_train = time.perf_counter_ns() - print( - f"epoch {epoch} " - f"total time: {(end_time_train - start_time_train) / 1e9:3.4f} s" - f"\nloader create time per batch: {total_time_loader / num_batches} s" - f"\nsampling/load time per batch: {total_time_sample / num_batches} s" - f"\nforward time per batch: {total_time_forward / num_batches} s" - f"\nbackward time per batch: {total_time_backward / num_batches} s" - f"\nnum batches: {num_batches}" - ) - print(f"loss after epoch {epoch}: {total_loss / num_batches}") - - td.barrier() - if rank == 0: - print("DONE", flush=True) - client.unpublish_dataset("train_nodes") - event.clear() - - td.destroy_process_group() - - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument( - "--torch_devices", - type=str, - default="0,1", - help="GPU to allocate to pytorch for model, graph data, and node label storage", - required=False, - ) - - parser.add_argument( - "--num_epochs", - type=int, - default=1, - help="Number of training epochs", - required=False, - ) - - parser.add_argument( - "--features_on_gpu", - type=bool, - default=True, - help="Whether to store the features on each worker's GPU", - required=False, - ) - - parser.add_argument( - "--torch_manager_ip", - type=str, - default="127.0.0.1", - help="The torch distributed manager ip address", - required=False, - ) - - parser.add_argument( - "--torch_manager_port", - type=str, - default="12346", - help="The torch distributed manager port", - required=False, - ) - - parser.add_argument( - "--dask_scheduler_file", - type=str, - help="The path to the dask scheduler file", - required=True, - ) - - return parser.parse_args() - - -def main(): - args = parse_args() - - torch_devices = [int(d) for d in args.torch_devices.split(",")] - - train_args = ( - torch_devices, - args.torch_manager_ip, - args.torch_manager_port, - args.dask_scheduler_file, - args.num_epochs, - args.features_on_gpu, - ) - - tmp.spawn(train, args=train_args, nprocs=len(torch_devices)) - - -if __name__ == "__main__": - main() diff --git a/python/cugraph-pyg/build/lib/cugraph_pyg/examples/graph_sage_sg.py b/python/cugraph-pyg/build/lib/cugraph_pyg/examples/graph_sage_sg.py deleted file mode 100644 index c27fd781..00000000 --- a/python/cugraph-pyg/build/lib/cugraph_pyg/examples/graph_sage_sg.py +++ /dev/null @@ -1,215 +0,0 @@ -# Copyright (c) 2023-2024, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -import time -import argparse -import gc - -import torch - -from torch_geometric.nn import CuGraphSAGEConv - -import torch.nn as nn -import torch.nn.functional as F - -from typing import Union - - -class CuGraphSAGE(nn.Module): - def __init__(self, in_channels, hidden_channels, out_channels, num_layers): - super().__init__() - - self.convs = torch.nn.ModuleList() - self.convs.append(CuGraphSAGEConv(in_channels, hidden_channels)) - for _ in range(num_layers - 1): - conv = CuGraphSAGEConv(hidden_channels, hidden_channels) - self.convs.append(conv) - - self.lin = nn.Linear(hidden_channels, out_channels) - - def forward(self, x, edge, size): - edge_csc = CuGraphSAGEConv.to_csc(edge, (size[0], size[0])) - for conv in self.convs: - x = conv(x, edge_csc)[: size[1]] - x = F.relu(x) - x = F.dropout(x, p=0.5) - - return self.lin(x) - - -def init_pytorch_worker(device_id: int) -> None: - import cupy - import rmm - - rmm.reinitialize( - devices=[device_id], - pool_allocator=False, - ) - - cupy.cuda.Device(device_id).use() - torch.cuda.set_device(device_id) - - -def train(device: int, features_device: Union[str, int] = "cpu", num_epochs=2) -> None: - """ - Parameters - ---------- - device: int - The CUDA device where the model, graph data, and node labels will be stored. - features_device: Union[str, int] - The device (CUDA device or CPU) where features will be stored. - """ - - init_pytorch_worker(device) - - import cugraph - from cugraph_pyg.data import CuGraphStore - from cugraph_pyg.loader import CuGraphNeighborLoader - - from ogb.nodeproppred import NodePropPredDataset - - dataset = NodePropPredDataset(name="ogbn-mag") - data = dataset[0] - - G = data[0]["edge_index_dict"] - N = data[0]["num_nodes_dict"] - - fs = cugraph.gnn.FeatureStore(backend="torch") - - fs.add_data( - torch.as_tensor(data[0]["node_feat_dict"]["paper"], device=features_device), - "paper", - "x", - ) - - fs.add_data(torch.as_tensor(data[1]["paper"].T[0], device=device), "paper", "y") - - num_papers = data[0]["num_nodes_dict"]["paper"] - train_perc = 0.1 - train_nodes = torch.randperm(num_papers) - train_nodes = train_nodes[: int(train_perc * num_papers)] - train_mask = torch.full((num_papers,), -1, device=device) - train_mask[train_nodes] = 1 - fs.add_data(train_mask, "paper", "train") - - cugraph_store = CuGraphStore(fs, G, N) - - model = ( - CuGraphSAGE(in_channels=128, hidden_channels=64, out_channels=349, num_layers=3) - .to(torch.float32) - .to(device) - ) - - optimizer = torch.optim.Adam(model.parameters(), lr=0.01) - - for epoch in range(num_epochs): - start_time_train = time.perf_counter_ns() - model.train() - - cugraph_bulk_loader = CuGraphNeighborLoader( - cugraph_store, train_nodes, batch_size=500, num_neighbors=[10, 25] - ) - - total_loss = 0 - num_batches = 0 - - # This context manager will handle different # batches per rank - # barrier() cannot do this since the number of ops per rank is - # different. It essentially acts like barrier would if the - # number of ops per rank was the same. - for epoch in range(num_epochs): - for iter_i, hetero_data in enumerate(cugraph_bulk_loader): - num_batches += 1 - if iter_i % 20 == 0: - print(f"iteration {iter_i}") - - # train - train_mask = hetero_data.train_dict["paper"] - y_true = hetero_data.y_dict["paper"] - - y_pred = model( - hetero_data.x_dict["paper"].to(device).to(torch.float32), - hetero_data.edge_index_dict[("paper", "cites", "paper")].to(device), - (len(y_true), len(y_true)), - ) - - y_true = F.one_hot( - y_true[train_mask].to(torch.int64), num_classes=349 - ).to(torch.float32) - - y_pred = y_pred[train_mask] - - loss = F.cross_entropy(y_pred, y_true) - - optimizer.zero_grad() - loss.backward() - optimizer.step() - total_loss += loss.item() - - del y_true - del y_pred - del loss - del hetero_data - gc.collect() - - end_time_train = time.perf_counter_ns() - print( - f"epoch {epoch} time: " - f"{(end_time_train - start_time_train) / 1e9:3.4f} s" - ) - print(f"loss after epoch {epoch}: {total_loss / num_batches}") - - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument( - "--device", - type=int, - default=0, - help="GPU to allocate to pytorch for model, graph data, and node label storage", - required=False, - ) - - parser.add_argument( - "--features_device", - type=str, - default="0", - help="Device to allocate to pytorch for feature storage", - required=False, - ) - - parser.add_argument( - "--num_epochs", - type=int, - default=1, - help="Number of training epochs", - required=False, - ) - - return parser.parse_args() - - -def main(): - args = parse_args() - - try: - features_device = int(args.features_device) - except ValueError: - features_device = args.features_device - - train(args.device, features_device, args.num_epochs) - - -if __name__ == "__main__": - main() diff --git a/python/cugraph-pyg/build/lib/cugraph_pyg/loader/__init__.py b/python/cugraph-pyg/build/lib/cugraph_pyg/loader/__init__.py deleted file mode 100644 index 20745111..00000000 --- a/python/cugraph-pyg/build/lib/cugraph_pyg/loader/__init__.py +++ /dev/null @@ -1,24 +0,0 @@ -# Copyright (c) 2022-2024, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from cugraph.utilities.api_tools import experimental_warning_wrapper - -from cugraph_pyg.loader.cugraph_node_loader import EXPERIMENTAL__CuGraphNeighborLoader - -CuGraphNeighborLoader = experimental_warning_wrapper( - EXPERIMENTAL__CuGraphNeighborLoader -) - -from cugraph_pyg.loader.cugraph_node_loader import EXPERIMENTAL__BulkSampleLoader - -BulkSampleLoader = experimental_warning_wrapper(EXPERIMENTAL__BulkSampleLoader) diff --git a/python/cugraph-pyg/build/lib/cugraph_pyg/loader/cugraph_node_loader.py b/python/cugraph-pyg/build/lib/cugraph_pyg/loader/cugraph_node_loader.py deleted file mode 100644 index 64416863..00000000 --- a/python/cugraph-pyg/build/lib/cugraph_pyg/loader/cugraph_node_loader.py +++ /dev/null @@ -1,534 +0,0 @@ -# Copyright (c) 2023-2024, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import tempfile - -import os -import re -import warnings - -import cupy -import cudf - -from cugraph.experimental.gnn import BulkSampler -from cugraph.utilities.utils import import_optional, MissingModule - -from cugraph_pyg.data import CuGraphStore -from cugraph_pyg.sampler.cugraph_sampler import ( - _sampler_output_from_sampling_results_heterogeneous, - _sampler_output_from_sampling_results_homogeneous_csr, - _sampler_output_from_sampling_results_homogeneous_coo, - filter_cugraph_store_csc, -) - -from typing import Union, Tuple, Sequence, List, Dict - -torch_geometric = import_optional("torch_geometric") -torch = import_optional("torch") -InputNodes = ( - Sequence - if isinstance(torch_geometric, MissingModule) - else torch_geometric.typing.InputNodes -) - - -class EXPERIMENTAL__BulkSampleLoader: - - __ex_parquet_file = re.compile(r"batch=([0-9]+)\-([0-9]+)\.parquet") - - def __init__( - self, - feature_store: CuGraphStore, - graph_store: CuGraphStore, - input_nodes: InputNodes = None, - batch_size: int = 0, - shuffle: bool = False, - edge_types: Sequence[Tuple[str]] = None, - directory: Union[str, tempfile.TemporaryDirectory] = None, - input_files: List[str] = None, - starting_batch_id: int = 0, - batches_per_partition: int = 100, - # Sampler args - num_neighbors: Union[List[int], Dict[Tuple[str, str, str], List[int]]] = None, - replace: bool = True, - compression: str = "COO", - # Other kwargs for the BulkSampler - **kwargs, - ): - """ - Executes a bulk sampling job immediately upon creation. - Allows iteration over the returned results. - - Parameters - ---------- - feature_store: CuGraphStore - The feature store containing features for the graph. - - graph_store: CuGraphStore - The graph store containing the graph structure. - - input_nodes: InputNodes - The input nodes associated with this sampler. - If None, this loader will load batches - from disk rather than performing sampling in memory. - - batch_size: int - The number of input nodes per sampling batch. - Generally required unless loading already-sampled - data from disk. - - shuffle: bool (optional, default=False) - Whether to shuffle the input indices. - If True, will shuffle the input indices. - If False, will create batches in the original order. - - edge_types: Sequence[Tuple[str]] (optional, default=None) - The desired edge types for the subgraph. - Defaults to all edges in the graph. - - directory: str (optional, default=new tempdir) - The path of the directory to write samples to. - Defaults to a new generated temporary directory. - - input_files: List[str] (optional, default=None) - The input files to read from the directory containing - samples. This argument is only used when loading - alread-sampled batches from disk. - - starting_batch_id: int (optional, default=0) - The starting id for each batch. Defaults to 0. - - batches_per_partition: int (optional, default=100) - The number of batches in each output partition. - Defaults to 100. Gets passed to the bulk - sampler if there is one; otherwise, this argument - is used to determine which files to read. - - num_neighbors: Union[List[int], - Dict[Tuple[str, str, str], List[int]]] (required) - The number of neighbors to sample for each node in each iteration. - If an entry is set to -1, all neighbors will be included. - In heterogeneous graphs, may also take in a dictionary denoting - the number of neighbors to sample for each individual edge type. - - Note: in cuGraph, only one value of num_neighbors is currently supported. - Passing in a dictionary will result in an exception. - """ - - self.__feature_store = feature_store - self.__graph_store = graph_store - self.__next_batch = -1 - self.__end_exclusive = -1 - self.__batches_per_partition = batches_per_partition - self.__starting_batch_id = starting_batch_id - - self._total_read_time = 0.0 - self._total_convert_time = 0.0 - self._total_feature_time = 0.0 - - if input_nodes is None: - # Will be loading from disk - self.__num_batches = input_nodes - self.__directory = directory - if input_files is None: - if isinstance(self.__directory, str): - self.__input_files = iter(os.listdir(self.__directory)) - else: - self.__input_files = iter(os.listdir(self.__directory.name)) - else: - self.__input_files = iter(input_files) - return - - input_type, input_nodes = torch_geometric.loader.utils.get_input_nodes( - (feature_store, graph_store), input_nodes - ) - if input_type is not None: - input_nodes = graph_store._get_sample_from_vertex_groups( - {input_type: input_nodes} - ) - - if batch_size is None or batch_size < 1: - raise ValueError("Batch size must be >= 1") - - self.__directory = ( - tempfile.TemporaryDirectory() if directory is None else directory - ) - - if isinstance(num_neighbors, dict): - raise ValueError("num_neighbors dict is currently unsupported!") - - if "renumber" in kwargs: - warnings.warn( - "Setting renumbering manually could result in invalid output," - " please ensure you intended to do this." - ) - renumber = kwargs.pop("renumber") - else: - renumber = ( - True - if ( - (len(self.__graph_store.node_types) == 1) - and (len(self.__graph_store.edge_types) == 1) - ) - else False - ) - - bulk_sampler = BulkSampler( - batch_size, - self.__directory - if isinstance(self.__directory, str) - else self.__directory.name, - self.__graph_store._subgraph(edge_types), - fanout_vals=num_neighbors, - with_replacement=replace, - batches_per_partition=self.__batches_per_partition, - renumber=renumber, - use_legacy_names=False, - deduplicate_sources=True, - prior_sources_behavior="exclude", - include_hop_column=(compression == "COO"), - **kwargs, - ) - - # Make sure indices are in cupy - input_nodes = cupy.asarray(input_nodes) - - # Shuffle - if shuffle: - cupy.random.shuffle(input_nodes) - - # Truncate if we can't evenly divide the input array - stop = (len(input_nodes) // batch_size) * batch_size - input_nodes = input_nodes[:stop] - - # Split into batches - input_nodes = cupy.split(input_nodes, len(input_nodes) // batch_size) - - self.__num_batches = 0 - for batch_num, batch_i in enumerate(input_nodes): - self.__num_batches += 1 - bulk_sampler.add_batches( - cudf.DataFrame( - { - "start": batch_i, - "batch": cupy.full( - batch_size, batch_num + starting_batch_id, dtype="int32" - ), - } - ), - start_col_name="start", - batch_col_name="batch", - ) - - bulk_sampler.flush() - self.__input_files = iter( - os.listdir( - self.__directory - if isinstance(self.__directory, str) - else self.__directory.name - ) - ) - - def __next__(self): - from time import perf_counter - - start_time_read_data = perf_counter() - - # Load the next set of sampling results if necessary - if self.__next_batch >= self.__end_exclusive: - if self.__directory is None: - raise StopIteration - - # Read the next parquet file into memory - dir_path = ( - self.__directory - if isinstance(self.__directory, str) - else self.__directory.name - ) - - # Will raise StopIteration if there are no files left - try: - fname = next(self.__input_files) - except StopIteration as ex: - # Won't delete a non-temp dir (since it would just be deleting a string) - del self.__directory - self.__directory = None - raise StopIteration(ex) - - m = self.__ex_parquet_file.match(fname) - if m is None: - raise ValueError(f"Invalid parquet filename {fname}") - - self.__start_inclusive, end_inclusive = [int(g) for g in m.groups()] - self.__next_batch = self.__start_inclusive - self.__end_exclusive = end_inclusive + 1 - - parquet_path = os.path.join( - dir_path, - fname, - ) - - raw_sample_data = cudf.read_parquet(parquet_path) - - if "map" in raw_sample_data.columns: - if "renumber_map_offsets" not in raw_sample_data.columns: - num_batches = end_inclusive - self.__start_inclusive + 1 - - map_end = raw_sample_data["map"].iloc[num_batches] - - map = torch.as_tensor( - raw_sample_data["map"].iloc[0:map_end], device="cuda" - ) - raw_sample_data.drop("map", axis=1, inplace=True) - - self.__renumber_map_offsets = map[0 : num_batches + 1] - map[0] - self.__renumber_map = map[num_batches + 1 :] - else: - self.__renumber_map = raw_sample_data["map"] - self.__renumber_map_offsets = raw_sample_data[ - "renumber_map_offsets" - ] - raw_sample_data.drop( - columns=["map", "renumber_map_offsets"], inplace=True - ) - - self.__renumber_map.dropna(inplace=True) - self.__renumber_map = torch.as_tensor( - self.__renumber_map, device="cuda" - ) - - self.__renumber_map_offsets.dropna(inplace=True) - self.__renumber_map_offsets = torch.as_tensor( - self.__renumber_map_offsets, device="cuda" - ) - - else: - self.__renumber_map = None - - self.__data = raw_sample_data - self.__coo = "majors" in self.__data.columns - if self.__coo: - self.__data.dropna(inplace=True) - - if ( - len(self.__graph_store.edge_types) == 1 - and len(self.__graph_store.node_types) == 1 - ): - if self.__coo: - group_cols = ["batch_id", "hop_id"] - self.__data_index = self.__data.groupby( - group_cols, as_index=True - ).agg({"majors": "max", "minors": "max"}) - self.__data_index.rename( - columns={"majors": "src_max", "minors": "dst_max"}, - inplace=True, - ) - self.__data_index = self.__data_index.to_dict(orient="index") - else: - self.__data_index = None - - self.__label_hop_offsets = self.__data["label_hop_offsets"] - self.__data.drop(columns=["label_hop_offsets"], inplace=True) - self.__label_hop_offsets.dropna(inplace=True) - self.__label_hop_offsets = torch.as_tensor( - self.__label_hop_offsets, device="cuda" - ) - self.__label_hop_offsets -= self.__label_hop_offsets[0].clone() - - self.__major_offsets = self.__data["major_offsets"] - self.__data.drop(columns="major_offsets", inplace=True) - self.__major_offsets.dropna(inplace=True) - self.__major_offsets = torch.as_tensor( - self.__major_offsets, device="cuda" - ) - self.__major_offsets -= self.__major_offsets[0].clone() - - self.__minors = self.__data["minors"] - self.__data.drop(columns="minors", inplace=True) - self.__minors.dropna(inplace=True) - self.__minors = torch.as_tensor(self.__minors, device="cuda") - - num_batches = self.__end_exclusive - self.__start_inclusive - offsets_len = len(self.__label_hop_offsets) - 1 - if offsets_len % num_batches != 0: - raise ValueError("invalid label-hop offsets") - self.__fanout_length = int(offsets_len / num_batches) - - end_time_read_data = perf_counter() - self._total_read_time += end_time_read_data - start_time_read_data - - # Pull the next set of sampling results out of the dataframe in memory - if self.__coo: - f = self.__data["batch_id"] == self.__next_batch - if self.__renumber_map is not None: - i = self.__next_batch - self.__start_inclusive - - # this should avoid d2h copy - current_renumber_map = self.__renumber_map[ - self.__renumber_map_offsets[i] : self.__renumber_map_offsets[i + 1] - ] - - else: - current_renumber_map = None - - start_time_convert = perf_counter() - # Get and return the sampled subgraph - if ( - len(self.__graph_store.edge_types) == 1 - and len(self.__graph_store.node_types) == 1 - ): - if self.__coo: - sampler_output = _sampler_output_from_sampling_results_homogeneous_coo( - self.__data[f], - current_renumber_map, - self.__graph_store, - self.__data_index, - self.__next_batch, - ) - else: - i = (self.__next_batch - self.__start_inclusive) * self.__fanout_length - current_label_hop_offsets = self.__label_hop_offsets[ - i : i + self.__fanout_length + 1 - ] - - current_major_offsets = self.__major_offsets[ - current_label_hop_offsets[0] : (current_label_hop_offsets[-1] + 1) - ] - - current_minors = self.__minors[ - current_major_offsets[0] : current_major_offsets[-1] - ] - - sampler_output = _sampler_output_from_sampling_results_homogeneous_csr( - current_major_offsets, - current_minors, - current_renumber_map, - self.__graph_store, - current_label_hop_offsets, - self.__data_index, - self.__next_batch, - ) - else: - sampler_output = _sampler_output_from_sampling_results_heterogeneous( - self.__data[f], current_renumber_map, self.__graph_store - ) - - # Get ready for next iteration - self.__next_batch += 1 - - end_time_convert = perf_counter() - self._total_convert_time += end_time_convert - start_time_convert - - start_time_feature = perf_counter() - # Create a PyG HeteroData object, loading the required features - if self.__coo: - out = torch_geometric.loader.utils.filter_custom_store( - self.__feature_store, - self.__graph_store, - sampler_output.node, - sampler_output.row, - sampler_output.col, - sampler_output.edge, - ) - else: - if self.__graph_store.order == "CSR": - raise ValueError("CSR format incompatible with CSC output") - - out = filter_cugraph_store_csc( - self.__feature_store, - self.__graph_store, - sampler_output.node, - sampler_output.row, - sampler_output.col, - sampler_output.edge, - ) - - # Account for CSR format in cuGraph vs. CSC format in PyG - if self.__coo and self.__graph_store.order == "CSC": - for edge_type in out.edge_index_dict: - out[edge_type].edge_index = out[edge_type].edge_index.flip(dims=[0]) - - out.set_value_dict("num_sampled_nodes", sampler_output.num_sampled_nodes) - out.set_value_dict("num_sampled_edges", sampler_output.num_sampled_edges) - - end_time_feature = perf_counter() - self._total_feature_time = end_time_feature - start_time_feature - - return out - - @property - def _starting_batch_id(self): - return self.__starting_batch_id - - def __iter__(self): - return self - - -class EXPERIMENTAL__CuGraphNeighborLoader: - def __init__( - self, - data: Union[CuGraphStore, Tuple[CuGraphStore, CuGraphStore]], - input_nodes: Union[InputNodes, int] = None, - batch_size: int = None, - **kwargs, - ): - """ - Parameters - ---------- - data: CuGraphStore or (CuGraphStore, CuGraphStore) - The CuGraphStore or stores where the graph/feature data is held. - - batch_size: int (required) - The number of input nodes in each batch. - - input_nodes: Union[InputNodes, int] (required) - The input nodes associated with this sampler. - - **kwargs: kwargs - Keyword arguments to pass through for sampling. - i.e. "shuffle", "fanout" - See BulkSampleLoader. - """ - - if input_nodes is None: - raise ValueError("input_nodes is required") - if batch_size is None: - raise ValueError("batch_size is required") - - # Allow passing in a feature store and graph store as a tuple, as - # in the standard PyG API. If only one is passed, it is assumed - # it is behaving as both a graph store and a feature store. - if isinstance(data, (list, tuple)): - self.__feature_store, self.__graph_store = data - else: - self.__feature_store = data - self.__graph_store = data - - self.__batch_size = batch_size - self.__input_nodes = input_nodes - self.inner_loader_args = kwargs - - @property - def batch_size(self) -> int: - return self.__batch_size - - def __iter__(self): - self.current_loader = EXPERIMENTAL__BulkSampleLoader( - self.__feature_store, - self.__graph_store, - self.__input_nodes, - self.__batch_size, - **self.inner_loader_args, - ) - - return self.current_loader diff --git a/python/cugraph-pyg/build/lib/cugraph_pyg/loader/filter.py b/python/cugraph-pyg/build/lib/cugraph_pyg/loader/filter.py deleted file mode 100644 index 58c44640..00000000 --- a/python/cugraph-pyg/build/lib/cugraph_pyg/loader/filter.py +++ /dev/null @@ -1,57 +0,0 @@ -# Copyright (c) 2023-2024, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import cupy - -from cugraph_pyg.data import CuGraphStore - -from typing import ( - Dict, - Sequence, -) - - -def _filter_cugraph_store( - feature_store: CuGraphStore, - graph_store: CuGraphStore, - node_dict: Dict[str, Sequence], - row_dict: Dict[str, Sequence], - col_dict: Dict[str, Sequence], - edge_dict: Dict[str, Sequence], -) -> dict: - """ - Primarily for testing without torch and torch_geometric. - Returns a dictionary containing the sampled subgraph. - """ - data = {} - - for attr in graph_store.get_all_edge_attrs(): - key = attr.edge_type - if key in row_dict and key in col_dict: - edge_index = cupy.stack([row_dict[key], col_dict[key]]) - data[attr.edge_type] = {} - data[attr.edge_type]["edge_index"] = edge_index - - # Filter node storage: - required_attrs = [] - for attr in feature_store.get_all_tensor_attrs(): - if attr.group_name in node_dict: - attr.index = node_dict[attr.group_name] - required_attrs.append(attr) - data[attr.group_name] = {} - data["num_nodes"] = attr.index.size - tensors = feature_store.multi_get_tensor(required_attrs) - for i, attr in enumerate(required_attrs): - data[attr.group_name][attr.attr_name] = tensors[i] - - return data diff --git a/python/cugraph-pyg/build/lib/cugraph_pyg/nn/__init__.py b/python/cugraph-pyg/build/lib/cugraph_pyg/nn/__init__.py deleted file mode 100644 index 65136a84..00000000 --- a/python/cugraph-pyg/build/lib/cugraph_pyg/nn/__init__.py +++ /dev/null @@ -1,14 +0,0 @@ -# Copyright (c) 2023-2024, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from .conv import * diff --git a/python/cugraph-pyg/build/lib/cugraph_pyg/nn/conv/__init__.py b/python/cugraph-pyg/build/lib/cugraph_pyg/nn/conv/__init__.py deleted file mode 100644 index 312f5dbf..00000000 --- a/python/cugraph-pyg/build/lib/cugraph_pyg/nn/conv/__init__.py +++ /dev/null @@ -1,26 +0,0 @@ -# Copyright (c) 2023-2024, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from .gat_conv import GATConv -from .gatv2_conv import GATv2Conv -from .rgcn_conv import RGCNConv -from .sage_conv import SAGEConv -from .transformer_conv import TransformerConv - -__all__ = [ - "GATConv", - "GATv2Conv", - "RGCNConv", - "SAGEConv", - "TransformerConv", -] diff --git a/python/cugraph-pyg/build/lib/cugraph_pyg/nn/conv/base.py b/python/cugraph-pyg/build/lib/cugraph_pyg/nn/conv/base.py deleted file mode 100644 index 3e876c9e..00000000 --- a/python/cugraph-pyg/build/lib/cugraph_pyg/nn/conv/base.py +++ /dev/null @@ -1,176 +0,0 @@ -# Copyright (c) 2023-2024, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import warnings -from typing import Optional, Tuple, Union - -from cugraph.utilities.utils import import_optional -from pylibcugraphops.pytorch import CSC, HeteroCSC - -torch = import_optional("torch") -torch_geometric = import_optional("torch_geometric") - - -class BaseConv(torch.nn.Module): # pragma: no cover - r"""An abstract base class for implementing cugraph-ops message passing layers.""" - - def reset_parameters(self): - r"""Resets all learnable parameters of the module.""" - pass - - @staticmethod - def to_csc( - edge_index: torch.Tensor, - size: Optional[Tuple[int, int]] = None, - edge_attr: Optional[torch.Tensor] = None, - ) -> Union[ - Tuple[torch.Tensor, torch.Tensor, int], - Tuple[Tuple[torch.Tensor, torch.Tensor, int], torch.Tensor], - ]: - r"""Returns a CSC representation of an :obj:`edge_index` tensor to be - used as input to cugraph-ops conv layers. - - Args: - edge_index (torch.Tensor): The edge indices. - size ((int, int), optional). The shape of :obj:`edge_index` in each - dimension. (default: :obj:`None`) - edge_attr (torch.Tensor, optional): The edge features. - (default: :obj:`None`) - """ - if size is None: - warnings.warn( - f"Inferring the graph size from 'edge_index' causes " - f"a decline in performance and does not work for " - f"bipartite graphs. To suppress this warning, pass " - f"the 'size' explicitly in '{__name__}.to_csc()'." - ) - num_src_nodes = num_dst_nodes = int(edge_index.max()) + 1 - else: - num_src_nodes, num_dst_nodes = size - - row, col = edge_index - col, perm = torch_geometric.utils.index_sort(col, max_value=num_dst_nodes) - row = row[perm] - - colptr = torch_geometric.utils.sparse.index2ptr(col, num_dst_nodes) - - if edge_attr is not None: - return (row, colptr, num_src_nodes), edge_attr[perm] - - return row, colptr, num_src_nodes - - def get_cugraph( - self, - csc: Tuple[torch.Tensor, torch.Tensor, int], - bipartite: bool = False, - max_num_neighbors: Optional[int] = None, - ) -> CSC: - r"""Constructs a :obj:`cugraph-ops` graph object from CSC representation. - Supports both bipartite and non-bipartite graphs. - - Args: - csc ((torch.Tensor, torch.Tensor, int)): A tuple containing the CSC - representation of a graph, given as a tuple of - :obj:`(row, colptr, num_src_nodes)`. Use the - :meth:`to_csc` method to convert an :obj:`edge_index` - representation to the desired format. - bipartite (bool): If set to :obj:`True`, will create the bipartite - structure in cugraph-ops. (default: :obj:`False`) - max_num_neighbors (int, optional): The maximum number of neighbors - of a destination node. When enabled, it allows models to use - the message-flow-graph primitives in cugraph-ops. - (default: :obj:`None`) - """ - row, colptr, num_src_nodes = csc - - if not row.is_cuda: - raise RuntimeError( - f"'{self.__class__.__name__}' requires GPU-based processing " - f"but got CPU tensor." - ) - - if max_num_neighbors is None: - max_num_neighbors = -1 - - return CSC( - offsets=colptr, - indices=row, - num_src_nodes=num_src_nodes, - dst_max_in_degree=max_num_neighbors, - is_bipartite=bipartite, - ) - - def get_typed_cugraph( - self, - csc: Tuple[torch.Tensor, torch.Tensor, int], - edge_type: torch.Tensor, - num_edge_types: Optional[int] = None, - bipartite: bool = False, - max_num_neighbors: Optional[int] = None, - ) -> HeteroCSC: - r"""Constructs a typed :obj:`cugraph` graph object from a CSC - representation where each edge corresponds to a given edge type. - Supports both bipartite and non-bipartite graphs. - - Args: - csc ((torch.Tensor, torch.Tensor, int)): A tuple containing the CSC - representation of a graph, given as a tuple of - :obj:`(row, colptr, num_src_nodes)`. Use the - :meth:`to_csc` method to convert an :obj:`edge_index` - representation to the desired format. - edge_type (torch.Tensor): The edge type. - num_edge_types (int, optional): The maximum number of edge types. - When not given, will be computed on-the-fly, leading to - slightly worse performance. (default: :obj:`None`) - bipartite (bool): If set to :obj:`True`, will create the bipartite - structure in cugraph-ops. (default: :obj:`False`) - max_num_neighbors (int, optional): The maximum number of neighbors - of a destination node. When enabled, it allows models to use - the message-flow-graph primitives in cugraph-ops. - (default: :obj:`None`) - """ - if num_edge_types is None: - num_edge_types = int(edge_type.max()) + 1 - - if max_num_neighbors is None: - max_num_neighbors = -1 - - row, colptr, num_src_nodes = csc - edge_type = edge_type.int() - - return HeteroCSC( - offsets=colptr, - indices=row, - edge_types=edge_type, - num_src_nodes=num_src_nodes, - num_edge_types=num_edge_types, - dst_max_in_degree=max_num_neighbors, - is_bipartite=bipartite, - ) - - def forward( - self, - x: Union[torch.Tensor, Tuple[torch.Tensor, torch.Tensor]], - csc: Tuple[torch.Tensor, torch.Tensor, int], - ) -> torch.Tensor: - r"""Runs the forward pass of the module. - - Args: - x (torch.Tensor): The node features. - csc ((torch.Tensor, torch.Tensor, int)): A tuple containing the CSC - representation of a graph, given as a tuple of - :obj:`(row, colptr, num_src_nodes)`. Use the - :meth:`to_csc` method to convert an :obj:`edge_index` - representation to the desired format. - """ - raise NotImplementedError diff --git a/python/cugraph-pyg/build/lib/cugraph_pyg/nn/conv/gat_conv.py b/python/cugraph-pyg/build/lib/cugraph_pyg/nn/conv/gat_conv.py deleted file mode 100644 index 6e2f322b..00000000 --- a/python/cugraph-pyg/build/lib/cugraph_pyg/nn/conv/gat_conv.py +++ /dev/null @@ -1,234 +0,0 @@ -# Copyright (c) 2023-2024, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from typing import Optional, Tuple, Union - -from cugraph.utilities.utils import import_optional -from pylibcugraphops.pytorch.operators import mha_gat_n2n - -from .base import BaseConv - -torch = import_optional("torch") -nn = import_optional("torch.nn") -torch_geometric = import_optional("torch_geometric") - - -class GATConv(BaseConv): - r"""The graph attentional operator from the `"Graph Attention Networks" - `_ paper. - - .. math:: - \mathbf{x}^{\prime}_i = \alpha_{i,i}\mathbf{\Theta}\mathbf{x}_{i} + - \sum_{j \in \mathcal{N}(i)} \alpha_{i,j}\mathbf{\Theta}\mathbf{x}_{j}, - - where the attention coefficients :math:`\alpha_{i,j}` are computed as - - .. math:: - \alpha_{i,j} = - \frac{ - \exp\left(\mathrm{LeakyReLU}\left(\mathbf{a}^{\top} - [\mathbf{\Theta}\mathbf{x}_i \, \Vert \, \mathbf{\Theta}\mathbf{x}_j] - \right)\right)} - {\sum_{k \in \mathcal{N}(i) \cup \{ i \}} - \exp\left(\mathrm{LeakyReLU}\left(\mathbf{a}^{\top} - [\mathbf{\Theta}\mathbf{x}_i \, \Vert \, \mathbf{\Theta}\mathbf{x}_k] - \right)\right)}. - - If the graph has multi-dimensional edge features :math:`\mathbf{e}_{i,j}`, - the attention coefficients :math:`\alpha_{i,j}` are computed as - - .. math:: - \alpha_{i,j} = - \frac{ - \exp\left(\mathrm{LeakyReLU}\left(\mathbf{a}^{\top} - [\mathbf{\Theta}\mathbf{x}_i \, \Vert \, \mathbf{\Theta}\mathbf{x}_j - \, \Vert \, \mathbf{\Theta}_{e} \mathbf{e}_{i,j}]\right)\right)} - {\sum_{k \in \mathcal{N}(i) \cup \{ i \}} - \exp\left(\mathrm{LeakyReLU}\left(\mathbf{a}^{\top} - [\mathbf{\Theta}\mathbf{x}_i \, \Vert \, \mathbf{\Theta}\mathbf{x}_k - \, \Vert \, \mathbf{\Theta}_{e} \mathbf{e}_{i,k}]\right)\right)}. - - Args: - in_channels (int or tuple): Size of each input sample, or :obj:`-1` to - derive the size from the first input(s) to the forward method. - A tuple corresponds to the sizes of source and target - dimensionalities. - out_channels (int): Size of each output sample. - heads (int, optional): Number of multi-head-attentions. - (default: :obj:`1`) - concat (bool, optional): If set to :obj:`False`, the multi-head - attentions are averaged instead of concatenated. - (default: :obj:`True`) - negative_slope (float, optional): LeakyReLU angle of the negative - slope. (default: :obj:`0.2`) - edge_dim (int, optional): Edge feature dimensionality (in case - there are any). (default: :obj:`None`) - bias (bool, optional): If set to :obj:`False`, the layer will not learn - an additive bias. (default: :obj:`True`) - """ - - def __init__( - self, - in_channels: Union[int, Tuple[int, int]], - out_channels: int, - heads: int = 1, - concat: bool = True, - negative_slope: float = 0.2, - edge_dim: Optional[int] = None, - bias: bool = True, - ): - super().__init__() - - self.in_channels = in_channels - self.out_channels = out_channels - self.heads = heads - self.concat = concat - self.negative_slope = negative_slope - self.edge_dim = edge_dim - - Linear = torch_geometric.nn.Linear - - if isinstance(in_channels, int): - self.lin = Linear( - in_channels, - heads * out_channels, - bias=False, - weight_initializer="glorot", - ) - else: - self.lin_src = Linear( - in_channels[0], - heads * out_channels, - bias=False, - weight_initializer="glorot", - ) - self.lin_dst = Linear( - in_channels[1], - heads * out_channels, - bias=False, - weight_initializer="glorot", - ) - - if edge_dim is not None: - self.lin_edge = Linear( - edge_dim, - heads * out_channels, - bias=False, - weight_initializer="glorot", - ) - self.att = nn.Parameter(torch.Tensor(3 * heads * out_channels)) - else: - self.register_parameter("lin_edge", None) - self.att = nn.Parameter(torch.Tensor(2 * heads * out_channels)) - - if bias and concat: - self.bias = nn.Parameter(torch.Tensor(heads * out_channels)) - elif bias and not concat: - self.bias = nn.Parameter(torch.Tensor(out_channels)) - else: - self.register_parameter("bias", None) - - self.reset_parameters() - - def reset_parameters(self): - if isinstance(self.in_channels, int): - self.lin.reset_parameters() - else: - self.lin_src.reset_parameters() - self.lin_dst.reset_parameters() - - torch_geometric.nn.inits.glorot( - self.att.view(-1, self.heads, self.out_channels) - ) - - if self.lin_edge is not None: - self.lin_edge.reset_parameters() - - torch_geometric.nn.inits.zeros(self.bias) - - def forward( - self, - x: Union[torch.Tensor, Tuple[torch.Tensor, torch.Tensor]], - csc: Tuple[torch.Tensor, torch.Tensor, int], - edge_attr: Optional[torch.Tensor] = None, - max_num_neighbors: Optional[int] = None, - ) -> torch.Tensor: - r"""Runs the forward pass of the module. - - Args: - x (torch.Tensor or tuple): The node features. Can be a tuple of - tensors denoting source and destination node features. - csc ((torch.Tensor, torch.Tensor, int)): A tuple containing the CSC - representation of a graph, given as a tuple of - :obj:`(row, colptr, num_src_nodes)`. Use the - :meth:`to_csc` method to convert an :obj:`edge_index` - representation to the desired format. - edge_attr: (torch.Tensor, optional) The edge features. - max_num_neighbors (int, optional): The maximum number of neighbors - of a destination node. When enabled, it allows models to use - the message-flow-graph primitives in cugraph-ops. - (default: :obj:`None`) - """ - bipartite = not isinstance(x, torch.Tensor) - graph = self.get_cugraph( - csc, bipartite=bipartite, max_num_neighbors=max_num_neighbors - ) - - if edge_attr is not None: - if self.lin_edge is None: - raise RuntimeError( - f"{self.__class__.__name__}.edge_dim must be set to accept " - f"edge features." - ) - if edge_attr.dim() == 1: - edge_attr = edge_attr.view(-1, 1) - edge_attr = self.lin_edge(edge_attr) - - if bipartite: - if not hasattr(self, "lin_src"): - raise RuntimeError( - f"{self.__class__.__name__}.in_channels must be a pair of " - f"integers to allow bipartite node features, but got " - f"{self.in_channels}." - ) - x_src = self.lin_src(x[0]) - x_dst = self.lin_dst(x[1]) - else: - if not hasattr(self, "lin"): - raise RuntimeError( - f"{self.__class__.__name__}.in_channels is expected to be an " - f"integer, but got {self.in_channels}." - ) - x = self.lin(x) - - out = mha_gat_n2n( - (x_src, x_dst) if bipartite else x, - self.att, - graph, - num_heads=self.heads, - activation="LeakyReLU", - negative_slope=self.negative_slope, - concat_heads=self.concat, - edge_feat=edge_attr, - ) - - if self.bias is not None: - out = out + self.bias - - return out - - def __repr__(self) -> str: - return ( - f"{self.__class__.__name__}({self.in_channels}, " - f"{self.out_channels}, heads={self.heads})" - ) diff --git a/python/cugraph-pyg/build/lib/cugraph_pyg/nn/conv/gatv2_conv.py b/python/cugraph-pyg/build/lib/cugraph_pyg/nn/conv/gatv2_conv.py deleted file mode 100644 index ba2ea7fe..00000000 --- a/python/cugraph-pyg/build/lib/cugraph_pyg/nn/conv/gatv2_conv.py +++ /dev/null @@ -1,231 +0,0 @@ -# Copyright (c) 2023-2024, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from typing import Optional, Tuple, Union - -from cugraph.utilities.utils import import_optional -from pylibcugraphops.pytorch.operators import mha_gat_v2_n2n - -from .base import BaseConv - -torch = import_optional("torch") -nn = import_optional("torch.nn") -torch_geometric = import_optional("torch_geometric") - - -class GATv2Conv(BaseConv): - r"""The GATv2 operator from the `"How Attentive are Graph Attention - Networks?" `_ paper, which fixes the - static attention problem of the standard - :class:`~torch_geometric.conv.GATConv` layer. - Since the linear layers in the standard GAT are applied right after each - other, the ranking of attended nodes is unconditioned on the query node. - In contrast, in :class:`GATv2`, every node can attend to any other node. - - .. math:: - \mathbf{x}^{\prime}_i = \alpha_{i,i}\mathbf{\Theta}\mathbf{x}_{i} + - \sum_{j \in \mathcal{N}(i)} \alpha_{i,j}\mathbf{\Theta}\mathbf{x}_{j}, - - where the attention coefficients :math:`\alpha_{i,j}` are computed as - - .. math:: - \alpha_{i,j} = - \frac{ - \exp\left(\mathbf{a}^{\top}\mathrm{LeakyReLU}\left(\mathbf{\Theta} - [\mathbf{x}_i \, \Vert \, \mathbf{x}_j] - \right)\right)} - {\sum_{k \in \mathcal{N}(i) \cup \{ i \}} - \exp\left(\mathbf{a}^{\top}\mathrm{LeakyReLU}\left(\mathbf{\Theta} - [\mathbf{x}_i \, \Vert \, \mathbf{x}_k] - \right)\right)}. - - If the graph has multi-dimensional edge features :math:`\mathbf{e}_{i,j}`, - the attention coefficients :math:`\alpha_{i,j}` are computed as - - .. math:: - \alpha_{i,j} = - \frac{ - \exp\left(\mathbf{a}^{\top}\mathrm{LeakyReLU}\left(\mathbf{\Theta} - [\mathbf{x}_i \, \Vert \, \mathbf{x}_j \, \Vert \, \mathbf{e}_{i,j}] - \right)\right)} - {\sum_{k \in \mathcal{N}(i) \cup \{ i \}} - \exp\left(\mathbf{a}^{\top}\mathrm{LeakyReLU}\left(\mathbf{\Theta} - [\mathbf{x}_i \, \Vert \, \mathbf{x}_k \, \Vert \, \mathbf{e}_{i,k}] - \right)\right)}. - - Args: - in_channels (int or tuple): Size of each input sample, or :obj:`-1` to - derive the size from the first input(s) to the forward method. - A tuple corresponds to the sizes of source and target - dimensionalities. - out_channels (int): Size of each output sample. - heads (int, optional): Number of multi-head-attentions. - (default: :obj:`1`) - concat (bool, optional): If set to :obj:`False`, the multi-head - attentions are averaged instead of concatenated. - (default: :obj:`True`) - negative_slope (float, optional): LeakyReLU angle of the negative - slope. (default: :obj:`0.2`) - edge_dim (int, optional): Edge feature dimensionality (in case - there are any). (default: :obj:`None`) - bias (bool, optional): If set to :obj:`False`, the layer will not learn - an additive bias. (default: :obj:`True`) - share_weights (bool, optional): If set to :obj:`True`, the same matrix - will be applied to the source and the target node of every edge. - (default: :obj:`False`) - """ - - def __init__( - self, - in_channels: Union[int, Tuple[int, int]], - out_channels: int, - heads: int = 1, - concat: bool = True, - negative_slope: float = 0.2, - edge_dim: Optional[int] = None, - bias: bool = True, - share_weights: bool = False, - ): - super().__init__() - - self.in_channels = in_channels - self.out_channels = out_channels - self.heads = heads - self.concat = concat - self.negative_slope = negative_slope - self.edge_dim = edge_dim - self.share_weights = share_weights - - Linear = torch_geometric.nn.Linear - - if isinstance(in_channels, int): - self.lin_src = Linear( - in_channels, - heads * out_channels, - bias=bias, - weight_initializer="glorot", - ) - - if share_weights: - self.lin_dst = self.lin_src - else: - self.lin_dst = Linear( - in_channels, - heads * out_channels, - bias=bias, - weight_initializer="glorot", - ) - else: - self.lin_src = Linear( - in_channels[0], - heads * out_channels, - bias=bias, - weight_initializer="glorot", - ) - self.lin_dst = Linear( - in_channels[1], - heads * out_channels, - bias=bias, - weight_initializer="glorot", - ) - - self.att = nn.Parameter(torch.Tensor(heads * out_channels)) - - if edge_dim is not None: - self.lin_edge = Linear( - edge_dim, heads * out_channels, bias=False, weight_initializer="glorot" - ) - else: - self.register_parameter("lin_edge", None) - - if bias and concat: - self.bias = nn.Parameter(torch.Tensor(heads * out_channels)) - elif bias and not concat: - self.bias = nn.Parameter(torch.Tensor(out_channels)) - else: - self.register_parameter("bias", None) - - self.reset_parameters() - - def reset_parameters(self): - self.lin_src.reset_parameters() - self.lin_dst.reset_parameters() - if self.lin_edge is not None: - self.lin_edge.reset_parameters() - - torch_geometric.nn.inits.glorot( - self.att.view(-1, self.heads, self.out_channels) - ) - - torch_geometric.nn.inits.zeros(self.bias) - - def forward( - self, - x: Union[torch.Tensor, Tuple[torch.Tensor, torch.Tensor]], - csc: Tuple[torch.Tensor, torch.Tensor, int], - edge_attr: Optional[torch.Tensor] = None, - ) -> torch.Tensor: - r"""Runs the forward pass of the module. - - Args: - x (torch.Tensor or tuple): The node features. Can be a tuple of - tensors denoting source and destination node features. - csc ((torch.Tensor, torch.Tensor, int)): A tuple containing the CSC - representation of a graph, given as a tuple of - :obj:`(row, colptr, num_src_nodes)`. Use the - :meth:`to_csc` method to convert an :obj:`edge_index` - representation to the desired format. - edge_attr: (torch.Tensor, optional) The edge features. - """ - bipartite = not isinstance(x, torch.Tensor) or not self.share_weights - graph = self.get_cugraph(csc, bipartite=bipartite) - - if edge_attr is not None: - if self.lin_edge is None: - raise RuntimeError( - f"{self.__class__.__name__}.edge_dim must be set to accept " - f"edge features." - ) - if edge_attr.dim() == 1: - edge_attr = edge_attr.view(-1, 1) - edge_attr = self.lin_edge(edge_attr) - - if bipartite: - if isinstance(x, torch.Tensor): - x = (x, x) - x_src = self.lin_src(x[0]) - x_dst = self.lin_dst(x[1]) - else: - x = self.lin_src(x) - - out = mha_gat_v2_n2n( - (x_src, x_dst) if bipartite else x, - self.att, - graph, - num_heads=self.heads, - activation="LeakyReLU", - negative_slope=self.negative_slope, - concat_heads=self.concat, - edge_feat=edge_attr, - ) - - if self.bias is not None: - out = out + self.bias - - return out - - def __repr__(self) -> str: - return ( - f"{self.__class__.__name__}({self.in_channels}, " - f"{self.out_channels}, heads={self.heads})" - ) diff --git a/python/cugraph-pyg/build/lib/cugraph_pyg/nn/conv/rgcn_conv.py b/python/cugraph-pyg/build/lib/cugraph_pyg/nn/conv/rgcn_conv.py deleted file mode 100644 index a4b1919b..00000000 --- a/python/cugraph-pyg/build/lib/cugraph_pyg/nn/conv/rgcn_conv.py +++ /dev/null @@ -1,141 +0,0 @@ -# Copyright (c) 2023-2024, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from typing import Optional, Tuple - -from cugraph.utilities.utils import import_optional -from pylibcugraphops.pytorch.operators import agg_hg_basis_n2n_post - -from .base import BaseConv - -torch = import_optional("torch") -torch_geometric = import_optional("torch_geometric") - - -class RGCNConv(BaseConv): # pragma: no cover - r"""The relational graph convolutional operator from the `"Modeling - Relational Data with Graph Convolutional Networks" - `_ paper. - - .. math:: - \mathbf{x}^{\prime}_i = \mathbf{\Theta}_{\textrm{root}} \cdot - \mathbf{x}_i + \sum_{r \in \mathcal{R}} \sum_{j \in \mathcal{N}_r(i)} - \frac{1}{|\mathcal{N}_r(i)|} \mathbf{\Theta}_r \cdot \mathbf{x}_j, - - where :math:`\mathcal{R}` denotes the set of relations, *i.e.* edge types. - Edge type needs to be a one-dimensional :obj:`torch.long` tensor which - stores a relation identifier - :math:`\in \{ 0, \ldots, |\mathcal{R}| - 1\}` for each edge. - - Args: - in_channels (int): Size of each input sample. - out_channels (int): Size of each output sample. - num_relations (int): Number of relations. - num_bases (int, optional): If set, this layer will use the - basis-decomposition regularization scheme where :obj:`num_bases` - denotes the number of bases to use. (default: :obj:`None`) - aggr (str, optional): The aggregation scheme to use - (:obj:`"add"`, :obj:`"mean"`, :obj:`"sum"`). - (default: :obj:`"mean"`) - root_weight (bool, optional): If set to :obj:`False`, the layer will - not add transformed root node features to the output. - (default: :obj:`True`) - bias (bool, optional): If set to :obj:`False`, the layer will not learn - an additive bias. (default: :obj:`True`) - """ - - def __init__( - self, - in_channels: int, - out_channels: int, - num_relations: int, - num_bases: Optional[int] = None, - aggr: str = "mean", - root_weight: bool = True, - bias: bool = True, - ): - super().__init__() - - if aggr not in ["mean", "sum", "add"]: - raise ValueError( - f"Aggregation function must be chosen from 'mean', 'sum' or " - f"'add', but got '{aggr}'." - ) - - self.in_channels = in_channels - self.out_channels = out_channels - self.num_relations = num_relations - self.num_bases = num_bases - self.aggr = aggr - self.root_weight = root_weight - - dim_root_weight = 1 if root_weight else 0 - - if num_bases is not None: - self.weight = torch.nn.Parameter( - torch.empty(num_bases + dim_root_weight, in_channels, out_channels) - ) - self.comp = torch.nn.Parameter(torch.empty(num_relations, num_bases)) - else: - self.weight = torch.nn.Parameter( - torch.empty(num_relations + dim_root_weight, in_channels, out_channels) - ) - self.register_parameter("comp", None) - - if bias: - self.bias = torch.nn.Parameter(torch.empty(out_channels)) - else: - self.register_parameter("bias", None) - - self.reset_parameters() - - def reset_parameters(self): - end = -1 if self.root_weight else None - torch_geometric.nn.inits.glorot(self.weight[:end]) - torch_geometric.nn.inits.glorot(self.comp) - if self.root_weight: - torch_geometric.nn.inits.glorot(self.weight[-1]) - torch_geometric.nn.inits.zeros(self.bias) - - def forward( - self, - x: torch.Tensor, - csc: Tuple[torch.Tensor, torch.Tensor, int], - edge_type: torch.Tensor, - max_num_neighbors: Optional[int] = None, - ) -> torch.Tensor: - - graph = self.get_typed_cugraph( - csc, edge_type, self.num_relations, max_num_neighbors=max_num_neighbors - ) - - out = agg_hg_basis_n2n_post( - x, - self.comp, - graph, - concat_own=self.root_weight, - norm_by_out_degree=bool(self.aggr == "mean"), - ) - - out = out @ self.weight.view(-1, self.out_channels) - - if self.bias is not None: - out = out + self.bias - - return out - - def __repr__(self) -> str: - return ( - f"{self.__class__.__name__}({self.in_channels}, " - f"{self.out_channels}, num_relations={self.num_relations})" - ) diff --git a/python/cugraph-pyg/build/lib/cugraph_pyg/nn/conv/sage_conv.py b/python/cugraph-pyg/build/lib/cugraph_pyg/nn/conv/sage_conv.py deleted file mode 100644 index f2d8c2d2..00000000 --- a/python/cugraph-pyg/build/lib/cugraph_pyg/nn/conv/sage_conv.py +++ /dev/null @@ -1,149 +0,0 @@ -# Copyright (c) 2023-2024, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from typing import Optional, Tuple, Union - -from cugraph.utilities.utils import import_optional -from pylibcugraphops.pytorch.operators import agg_concat_n2n - -from .base import BaseConv - -torch = import_optional("torch") -torch_geometric = import_optional("torch_geometric") - - -class SAGEConv(BaseConv): - r"""The GraphSAGE operator from the `"Inductive Representation Learning on - Large Graphs" `_ paper. - - .. math:: - \mathbf{x}^{\prime}_i = \mathbf{W}_1 \mathbf{x}_i + \mathbf{W}_2 \cdot - \mathrm{mean}_{j \in \mathcal{N(i)}} \mathbf{x}_j - - If :obj:`project = True`, then :math:`\mathbf{x}_j` will first get - projected via - - .. math:: - \mathbf{x}_j \leftarrow \sigma ( \mathbf{W}_3 \mathbf{x}_j + - \mathbf{b}) - - as described in Eq. (3) of the paper. - - Args: - in_channels (int or tuple): Size of each input sample. A tuple - corresponds to the sizes of source and target dimensionalities. - out_channels (int): Size of each output sample. - aggr (str or Aggregation, optional): The aggregation scheme to use. - Choose from :obj:`"mean"`, :obj:`"sum"`, :obj:`"min"` or - :obj:`"max"`. (default: :obj:`"mean"`) - normalize (bool, optional): If set to :obj:`True`, output features - will be :math:`\ell_2`-normalized, *i.e.*, - :math:`\frac{\mathbf{h}_i^{k+1}} - {\| \mathbf{h}_i^{k+1} \|_2}`. - (default: :obj:`False`) - root_weight (bool, optional): If set to :obj:`False`, the layer will - not add transformed root node features to the output. - (default: :obj:`True`) - project (bool, optional): If set to :obj:`True`, the layer will apply a - linear transformation followed by an activation function before - aggregation (as described in Eq. (3) of the paper). - (default: :obj:`False`) - bias (bool, optional): If set to :obj:`False`, the layer will not learn - an additive bias. (default: :obj:`True`) - """ - - def __init__( - self, - in_channels: Union[int, Tuple[int, int]], - out_channels: int, - aggr: str = "mean", - normalize: bool = False, - root_weight: bool = True, - project: bool = False, - bias: bool = True, - ): - super().__init__() - - if aggr not in ["mean", "sum", "min", "max"]: - raise ValueError( - f"Aggregation function must be chosen from 'mean'," - f" 'sum', 'min' or 'max', but got '{aggr}'." - ) - - self.in_channels = in_channels - self.out_channels = out_channels - self.aggr = aggr - self.normalize = normalize - self.root_weight = root_weight - self.project = project - - if isinstance(in_channels, int): - self.in_channels_src = self.in_channels_dst = in_channels - else: - self.in_channels_src, self.in_channels_dst = in_channels - - if self.project: - self.pre_lin = torch_geometric.nn.Linear( - self.in_channels_src, self.in_channels_src, bias=True - ) - - if self.root_weight: - self.lin = torch_geometric.nn.Linear( - self.in_channels_src + self.in_channels_dst, out_channels, bias=bias - ) - else: - self.lin = torch_geometric.nn.Linear( - self.in_channels_src, out_channels, bias=bias - ) - - self.reset_parameters() - - def reset_parameters(self): - if self.project: - self.pre_lin.reset_parameters() - self.lin.reset_parameters() - - def forward( - self, - x: Union[torch.Tensor, Tuple[torch.Tensor, torch.Tensor]], - csc: Tuple[torch.Tensor, torch.Tensor, int], - max_num_neighbors: Optional[int] = None, - ) -> torch.Tensor: - bipartite = isinstance(x, Tuple) - graph = self.get_cugraph( - csc, bipartite=bipartite, max_num_neighbors=max_num_neighbors - ) - - if self.project: - if bipartite: - x = (self.pre_lin(x[0]).relu(), x[1]) - else: - x = self.pre_lin(x).relu() - - out = agg_concat_n2n(x, graph, self.aggr) - - if self.root_weight: - out = self.lin(out) - else: - out = self.lin(out[:, : self.in_channels_src]) - - if self.normalize: - out = torch.nn.functional.normalize(out, p=2.0, dim=-1) - - return out - - def __repr__(self) -> str: - return ( - f"{self.__class__.__name__}({self.in_channels}, " - f"{self.out_channels}, aggr={self.aggr})" - ) diff --git a/python/cugraph-pyg/build/lib/cugraph_pyg/nn/conv/transformer_conv.py b/python/cugraph-pyg/build/lib/cugraph_pyg/nn/conv/transformer_conv.py deleted file mode 100644 index 46fa19ff..00000000 --- a/python/cugraph-pyg/build/lib/cugraph_pyg/nn/conv/transformer_conv.py +++ /dev/null @@ -1,216 +0,0 @@ -# Copyright (c) 2023-2024, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from typing import Optional, Tuple, Union - -from cugraph.utilities.utils import import_optional -from pylibcugraphops.pytorch.operators import mha_simple_n2n - -from .base import BaseConv - -torch = import_optional("torch") -nn = import_optional("torch.nn") -torch_geometric = import_optional("torch_geometric") - - -class TransformerConv(BaseConv): - r"""The graph transformer operator from the `"Masked Label Prediction: - Unified Message Passing Model for Semi-Supervised Classification" - `_ paper. - - .. math:: - \mathbf{x}^{\prime}_i = \mathbf{W}_1 \mathbf{x}_i + - \sum_{j \in \mathcal{N}(i)} \alpha_{i,j} \mathbf{W}_2 \mathbf{x}_{j}, - - where the attention coefficients :math:`\alpha_{i,j}` are computed via - multi-head dot product attention: - - .. math:: - \alpha_{i,j} = \textrm{softmax} \left( - \frac{(\mathbf{W}_3\mathbf{x}_i)^{\top} (\mathbf{W}_4\mathbf{x}_j)} - {\sqrt{d}} \right) - - Args: - in_channels (int or tuple): Size of each input sample, or :obj:`-1` to - derive the size from the first input(s) to the forward method. - A tuple corresponds to the sizes of source and target - dimensionalities. - out_channels (int): Size of each output sample. - heads (int, optional): Number of multi-head-attentions. - (default: :obj:`1`) - concat (bool, optional): If set to :obj:`False`, the multi-head - attentions are averaged instead of concatenated. - (default: :obj:`True`) - beta (bool, optional): If set, will combine aggregation and - skip information via - - .. math:: - \mathbf{x}^{\prime}_i = \beta_i \mathbf{W}_1 \mathbf{x}_i + - (1 - \beta_i) \underbrace{\left(\sum_{j \in \mathcal{N}(i)} - \alpha_{i,j} \mathbf{W}_2 \vec{x}_j \right)}_{=\mathbf{m}_i} - - with :math:`\beta_i = \textrm{sigmoid}(\mathbf{w}_5^{\top} - [ \mathbf{W}_1 \mathbf{x}_i, \mathbf{m}_i, \mathbf{W}_1 - \mathbf{x}_i - \mathbf{m}_i ])` (default: :obj:`False`) - edge_dim (int, optional): Edge feature dimensionality (in case - there are any). Edge features are added to the keys after - linear transformation, that is, prior to computing the - attention dot product. They are also added to final values - after the same linear transformation. The model is: - - .. math:: - \mathbf{x}^{\prime}_i = \mathbf{W}_1 \mathbf{x}_i + - \sum_{j \in \mathcal{N}(i)} \alpha_{i,j} \left( - \mathbf{W}_2 \mathbf{x}_{j} + \mathbf{W}_6 \mathbf{e}_{ij} - \right), - - where the attention coefficients :math:`\alpha_{i,j}` are now - computed via: - - .. math:: - \alpha_{i,j} = \textrm{softmax} \left( - \frac{(\mathbf{W}_3\mathbf{x}_i)^{\top} - (\mathbf{W}_4\mathbf{x}_j + \mathbf{W}_6 \mathbf{e}_{ij})} - {\sqrt{d}} \right) - - (default :obj:`None`) - bias (bool, optional): If set to :obj:`False`, the layer will not learn - an additive bias. (default: :obj:`True`) - root_weight (bool, optional): If set to :obj:`False`, the layer will - not add the transformed root node features to the output and the - option :attr:`beta` is set to :obj:`False`. (default: :obj:`True`) - """ - - def __init__( - self, - in_channels: Union[int, Tuple[int, int]], - out_channels: int, - heads: int = 1, - concat: bool = True, - beta: bool = False, - edge_dim: Optional[int] = None, - bias: bool = True, - root_weight: bool = True, - ): - super().__init__() - - self.in_channels = in_channels - self.out_channels = out_channels - self.heads = heads - self.beta = beta and root_weight - self.root_weight = root_weight - self.concat = concat - self.edge_dim = edge_dim - - if isinstance(in_channels, int): - in_channels = (in_channels, in_channels) - - Linear = torch_geometric.nn.Linear - - self.lin_key = Linear(in_channels[0], heads * out_channels) - self.lin_query = Linear(in_channels[1], heads * out_channels) - self.lin_value = Linear(in_channels[0], heads * out_channels) - if edge_dim is not None: - self.lin_edge = Linear(edge_dim, heads * out_channels, bias=False) - else: - self.lin_edge = self.register_parameter("lin_edge", None) - - if concat: - self.lin_skip = Linear(in_channels[1], heads * out_channels, bias=bias) - if self.beta: - self.lin_beta = Linear(3 * heads * out_channels, 1, bias=False) - else: - self.lin_beta = self.register_parameter("lin_beta", None) - else: - self.lin_skip = Linear(in_channels[1], out_channels, bias=bias) - if self.beta: - self.lin_beta = Linear(3 * out_channels, 1, bias=False) - else: - self.lin_beta = self.register_parameter("lin_beta", None) - - self.reset_parameters() - - def reset_parameters(self): - self.lin_key.reset_parameters() - self.lin_query.reset_parameters() - self.lin_value.reset_parameters() - if self.lin_edge is not None: - self.lin_edge.reset_parameters() - self.lin_skip.reset_parameters() - if self.lin_beta is not None: - self.lin_beta.reset_parameters() - - def forward( - self, - x: Union[torch.Tensor, Tuple[torch.Tensor, torch.Tensor]], - csc: Tuple[torch.Tensor, torch.Tensor, int], - edge_attr: Optional[torch.Tensor] = None, - ) -> torch.Tensor: - r"""Runs the forward pass of the module. - - Args: - x (torch.Tensor or tuple): The node features. Can be a tuple of - tensors denoting source and destination node features. - csc ((torch.Tensor, torch.Tensor, int)): A tuple containing the CSC - representation of a graph, given as a tuple of - :obj:`(row, colptr, num_src_nodes)`. Use the - :meth:`to_csc` method to convert an :obj:`edge_index` - representation to the desired format. - edge_attr: (torch.Tensor, optional) The edge features. - """ - bipartite = True - graph = self.get_cugraph(csc, bipartite=bipartite) - - if isinstance(x, torch.Tensor): - x = (x, x) - - query = self.lin_query(x[1]) - key = self.lin_key(x[0]) - value = self.lin_value(x[0]) - - if edge_attr is not None: - if self.lin_edge is None: - raise RuntimeError( - f"{self.__class__.__name__}.edge_dim must be set to accept " - f"edge features." - ) - edge_attr = self.lin_edge(edge_attr) - - out = mha_simple_n2n( - key, - query, - value, - graph, - self.heads, - self.concat, - edge_emb=edge_attr, - norm_by_dim=True, - score_bias=None, - ) - - if self.root_weight: - x_r = self.lin_skip(x[1]) - if self.lin_beta is not None: - beta = self.lin_beta(torch.cat([out, x_r, out - x_r], dim=-1)) - beta = beta.sigmoid() - out = beta * x_r + (1 - beta) * out - else: - out = out + x_r - - return out - - def __repr__(self) -> str: - return ( - f"{self.__class__.__name__}({self.in_channels}, " - f"{self.out_channels}, heads={self.heads})" - ) diff --git a/python/cugraph-pyg/build/lib/cugraph_pyg/sampler/__init__.py b/python/cugraph-pyg/build/lib/cugraph_pyg/sampler/__init__.py deleted file mode 100644 index 202651e6..00000000 --- a/python/cugraph-pyg/build/lib/cugraph_pyg/sampler/__init__.py +++ /dev/null @@ -1,12 +0,0 @@ -# Copyright (c) 2022-2024, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. diff --git a/python/cugraph-pyg/build/lib/cugraph_pyg/sampler/cugraph_sampler.py b/python/cugraph-pyg/build/lib/cugraph_pyg/sampler/cugraph_sampler.py deleted file mode 100644 index ac767cc5..00000000 --- a/python/cugraph-pyg/build/lib/cugraph_pyg/sampler/cugraph_sampler.py +++ /dev/null @@ -1,438 +0,0 @@ -# Copyright (c) 2022-2024, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -from typing import Sequence, Dict, Tuple - -from cugraph_pyg.data import CuGraphStore - -from cugraph.utilities.utils import import_optional -import cudf - -dask_cudf = import_optional("dask_cudf") -torch_geometric = import_optional("torch_geometric") - -torch = import_optional("torch") -HeteroSamplerOutput = torch_geometric.sampler.base.HeteroSamplerOutput - - -def _get_unique_nodes( - sampling_results: cudf.DataFrame, - graph_store: CuGraphStore, - node_type: str, - node_position: str, -) -> int: - """ - Counts the number of unique nodes of a given node type. - - Parameters - ---------- - sampling_results: cudf.DataFrame - The dataframe containing sampling results or filtered sampling results - (i.e. sampling results for hop 2) - graph_store: CuGraphStore - The graph store containing the structure of the sampled graph. - node_type: str - The node type to count the number of unique nodes of. - node_position: str ('src' or 'dst') - Whether to examine source or destination nodes. - - Returns - ------- - cudf.Series - The unique nodes of the given node type. - """ - if node_position == "src": - edge_index = "majors" - edge_sel = 0 - elif node_position == "dst": - edge_index = "minors" - edge_sel = -1 - else: - raise ValueError(f"Illegal value {node_position} for node_position") - - etypes = [ - graph_store.canonical_edge_type_to_numeric(et) - for et in graph_store.edge_types - if et[edge_sel] == node_type - ] - if len(etypes) > 0: - f = sampling_results.edge_type == etypes[0] - for et in etypes[1:]: - f |= sampling_results.edge_type == et - - sampling_results_node = sampling_results[f] - else: - return cudf.Series([], dtype="int64") - - return sampling_results_node[edge_index] - - -def _sampler_output_from_sampling_results_homogeneous_coo( - sampling_results: cudf.DataFrame, - renumber_map: torch.Tensor, - graph_store: CuGraphStore, - data_index: Dict[Tuple[int, int], Dict[str, int]], - batch_id: int, - metadata: Sequence = None, -) -> HeteroSamplerOutput: - """ - Parameters - ---------- - sampling_results: cudf.DataFrame - The dataframe containing sampling results. - renumber_map: torch.Tensor - The tensor containing the renumber map, or None if there - is no renumber map. - graph_store: CuGraphStore - The graph store containing the structure of the sampled graph. - data_index: Dict[Tuple[int, int], Dict[str, int]] - Dictionary where keys are the batch id and hop id, - and values are dictionaries containing the max src - and max dst node ids for the batch and hop. - batch_id: int - The current batch id, whose samples are being retrieved - from the sampling results and data index. - metadata: Tensor - The metadata for the sampled batch. - - Returns - ------- - HeteroSamplerOutput - """ - - if len(graph_store.edge_types) > 1 or len(graph_store.node_types) > 1: - raise ValueError("Graph is heterogeneous") - - hops = torch.arange( - sampling_results.hop_id.iloc[len(sampling_results) - 1] + 1, device="cuda" - ) - hops = torch.searchsorted( - torch.as_tensor(sampling_results.hop_id, device="cuda"), hops - ) - - node_type = graph_store.node_types[0] - edge_type = graph_store.edge_types[0] - - num_nodes_per_hop_dict = {node_type: torch.zeros(len(hops) + 1, dtype=torch.int64)} - num_edges_per_hop_dict = {edge_type: torch.zeros(len(hops), dtype=torch.int64)} - - if renumber_map is None: - raise ValueError("Renumbered input is expected for homogeneous graphs") - - noi_index = {node_type: torch.as_tensor(renumber_map, device="cuda")} - - row_dict = { - edge_type: torch.as_tensor(sampling_results.majors, device="cuda"), - } - - col_dict = { - edge_type: torch.as_tensor(sampling_results.minors, device="cuda"), - } - - num_nodes_per_hop_dict[node_type][0] = data_index[batch_id, 0]["src_max"] + 1 - for hop in range(len(hops)): - hop_ix_start = hops[hop] - hop_ix_end = hops[hop + 1] if hop < len(hops) - 1 else len(sampling_results) - - if num_nodes_per_hop_dict[node_type][hop] > 0: - max_id_hop = data_index[batch_id, hop]["dst_max"] - max_id_prev_hop = ( - data_index[batch_id, hop - 1]["dst_max"] - if hop > 0 - else data_index[batch_id, 0]["src_max"] - ) - - if max_id_hop > max_id_prev_hop: - num_nodes_per_hop_dict[node_type][hop + 1] = ( - max_id_hop - max_id_prev_hop - ) - else: - num_nodes_per_hop_dict[node_type][hop + 1] = 0 - # will default to 0 if the previous hop was 0, since this is a PyG requirement - - num_edges_per_hop_dict[edge_type][hop] = hop_ix_end - hop_ix_start - - if HeteroSamplerOutput is None: - raise ImportError("Error importing from pyg") - - return HeteroSamplerOutput( - node=noi_index, - row=row_dict, - col=col_dict, - edge=None, - num_sampled_nodes=num_nodes_per_hop_dict, - num_sampled_edges=num_edges_per_hop_dict, - metadata=metadata, - ) - - -def _sampler_output_from_sampling_results_homogeneous_csr( - major_offsets: torch.Tensor, - minors: torch.Tensor, - renumber_map: torch.Tensor, - graph_store: CuGraphStore, - label_hop_offsets: torch.Tensor, - batch_id: int, - metadata: Sequence = None, -) -> HeteroSamplerOutput: - """ - Parameters - ---------- - major_offsets: torch.Tensor - The major offsets for the CSC/CSR matrix ("row pointer") - minors: torch.Tensor - The minors for the CSC/CSR matrix ("col index") - renumber_map: torch.Tensor - The tensor containing the renumber map. - Required. - graph_store: CuGraphStore - The graph store containing the structure of the sampled graph. - label_hop_offsets: torch.Tensor - The tensor containing the label-hop offsets. - batch_id: int - The current batch id, whose samples are being retrieved - from the sampling results and data index. - metadata: Tensor - The metadata for the sampled batch. - - Returns - ------- - HeteroSamplerOutput - """ - - if len(graph_store.edge_types) > 1 or len(graph_store.node_types) > 1: - raise ValueError("Graph is heterogeneous") - - if renumber_map is None: - raise ValueError("Renumbered input is expected for homogeneous graphs") - - node_type = graph_store.node_types[0] - edge_type = graph_store.edge_types[0] - - major_offsets = major_offsets.clone() - major_offsets[0] - label_hop_offsets = label_hop_offsets.clone() - label_hop_offsets[0] - - num_edges_per_hop_dict = {edge_type: major_offsets[label_hop_offsets].diff().cpu()} - - label_hop_offsets = label_hop_offsets.cpu() - num_nodes_per_hop_dict = { - node_type: torch.concat( - [ - label_hop_offsets.diff(), - (renumber_map.shape[0] - label_hop_offsets[-1]).reshape((1,)), - ] - ).cpu() - } - - noi_index = {node_type: torch.as_tensor(renumber_map, device="cuda")} - - col_dict = { - edge_type: major_offsets, - } - - row_dict = { - edge_type: minors, - } - - if HeteroSamplerOutput is None: - raise ImportError("Error importing from pyg") - - return HeteroSamplerOutput( - node=noi_index, - row=row_dict, - col=col_dict, - edge=None, - num_sampled_nodes=num_nodes_per_hop_dict, - num_sampled_edges=num_edges_per_hop_dict, - metadata=metadata, - ) - - -def _sampler_output_from_sampling_results_heterogeneous( - sampling_results: cudf.DataFrame, - renumber_map: cudf.Series, - graph_store: CuGraphStore, - metadata: Sequence = None, -) -> HeteroSamplerOutput: - """ - Parameters - ---------- - sampling_results: cudf.DataFrame - The dataframe containing sampling results. - renumber_map: cudf.Series - The series containing the renumber map, or None if there - is no renumber map. - graph_store: CuGraphStore - The graph store containing the structure of the sampled graph. - metadata: Tensor - The metadata for the sampled batch. - - Returns - ------- - HeteroSamplerOutput - """ - - hops = torch.arange(sampling_results.hop_id.max() + 1, device="cuda") - hops = torch.searchsorted( - torch.as_tensor(sampling_results.hop_id, device="cuda"), hops - ) - - num_nodes_per_hop_dict = {} - num_edges_per_hop_dict = {} - - # Fill out hop 0 in num_nodes_per_hop_dict, which is based on src instead of dst - sampling_results_hop_0 = sampling_results.iloc[ - 0 : (hops[1] if len(hops) > 1 else len(sampling_results)) - ] - - for node_type in graph_store.node_types: - num_unique_nodes = _get_unique_nodes( - sampling_results_hop_0, graph_store, node_type, "src" - ).nunique() - - if num_unique_nodes > 0: - num_nodes_per_hop_dict[node_type] = torch.zeros( - len(hops) + 1, dtype=torch.int64 - ) - num_nodes_per_hop_dict[node_type][0] = num_unique_nodes - - if renumber_map is not None: - raise ValueError( - "Precomputing the renumber map is currently " - "unsupported for heterogeneous graphs." - ) - - # Calculate nodes of interest based on unique nodes in order of appearance - # Use hop 0 sources since those are the only ones not included in destinations - # Use torch.concat based on benchmark performance (vs. cudf.concat) - - if sampling_results_hop_0 is None: - sampling_results_hop_0 = sampling_results.iloc[ - 0 : (hops[1] if len(hops) > 1 else len(sampling_results)) - ] - - nodes_of_interest = ( - cudf.Series( - torch.concat( - [ - torch.as_tensor(sampling_results_hop_0.majors, device="cuda"), - torch.as_tensor(sampling_results.minors, device="cuda"), - ] - ), - name="nodes_of_interest", - ) - .drop_duplicates() - .sort_index() - ) - - # Get the grouped node index (for creating the renumbered grouped edge index) - noi_index = graph_store._get_vertex_groups_from_sample( - torch.as_tensor(nodes_of_interest, device="cuda") - ) - del nodes_of_interest - - # Get the new edge index (by type as expected for HeteroData) - # FIXME handle edge ids/types after the C++ updates - row_dict, col_dict = graph_store._get_renumbered_edge_groups_from_sample( - sampling_results, noi_index - ) - - for hop in range(len(hops)): - hop_ix_start = hops[hop] - hop_ix_end = hops[hop + 1] if hop < len(hops) - 1 else len(sampling_results) - sampling_results_to_hop = sampling_results.iloc[0:hop_ix_end] - - for node_type in graph_store.node_types: - unique_nodes_hop = _get_unique_nodes( - sampling_results_to_hop, graph_store, node_type, "dst" - ) - - unique_nodes_0 = _get_unique_nodes( - sampling_results_hop_0, graph_store, node_type, "src" - ) - - num_unique_nodes = cudf.concat([unique_nodes_0, unique_nodes_hop]).nunique() - - if num_unique_nodes > 0: - if node_type not in num_nodes_per_hop_dict: - num_nodes_per_hop_dict[node_type] = torch.zeros( - len(hops) + 1, dtype=torch.int64 - ) - num_nodes_per_hop_dict[node_type][hop + 1] = num_unique_nodes - int( - num_nodes_per_hop_dict[node_type][: hop + 1].sum(0) - ) - - numeric_etypes, counts = torch.unique( - torch.as_tensor( - sampling_results.iloc[hop_ix_start:hop_ix_end].edge_type, - device="cuda", - ), - return_counts=True, - ) - numeric_etypes = list(numeric_etypes) - counts = list(counts) - for num_etype, count in zip(numeric_etypes, counts): - can_etype = graph_store.numeric_edge_type_to_canonical(num_etype) - if can_etype not in num_edges_per_hop_dict: - num_edges_per_hop_dict[can_etype] = torch.zeros( - len(hops), dtype=torch.int64 - ) - num_edges_per_hop_dict[can_etype][hop] = count - - if HeteroSamplerOutput is None: - raise ImportError("Error importing from pyg") - - return HeteroSamplerOutput( - node=noi_index, - row=row_dict, - col=col_dict, - edge=None, - num_sampled_nodes=num_nodes_per_hop_dict, - num_sampled_edges=num_edges_per_hop_dict, - metadata=metadata, - ) - - -def filter_cugraph_store_csc( - feature_store: torch_geometric.data.FeatureStore, - graph_store: torch_geometric.data.GraphStore, - node_dict: Dict[str, torch.Tensor], - row_dict: Dict[str, torch.Tensor], - col_dict: Dict[str, torch.Tensor], - edge_dict: Dict[str, Tuple[torch.Tensor]], -) -> torch_geometric.data.HeteroData: - data = torch_geometric.data.HeteroData() - - for attr in graph_store.get_all_edge_attrs(): - key = attr.edge_type - if key in row_dict and key in col_dict: - data.put_edge_index( - (row_dict[key], col_dict[key]), - edge_type=key, - layout="csc", - is_sorted=True, - ) - - required_attrs = [] - for attr in feature_store.get_all_tensor_attrs(): - if attr.group_name in node_dict: - attr.index = node_dict[attr.group_name] - required_attrs.append(attr) - data[attr.group_name].num_nodes = attr.index.size(0) - - tensors = feature_store.multi_get_tensor(required_attrs) - for i, attr in enumerate(required_attrs): - data[attr.group_name][attr.attr_name] = tensors[i] - - return data diff --git a/python/cugraph-pyg/build/lib/cugraph_pyg/tests/conftest.py b/python/cugraph-pyg/build/lib/cugraph_pyg/tests/conftest.py deleted file mode 100644 index ec59c411..00000000 --- a/python/cugraph-pyg/build/lib/cugraph_pyg/tests/conftest.py +++ /dev/null @@ -1,286 +0,0 @@ -# Copyright (c) 2021-2024, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import pytest - -from dask_cuda.initialize import initialize as dask_initialize -from dask_cuda import LocalCUDACluster -from dask.distributed import Client -from cugraph.dask.comms import comms as Comms -from cugraph.dask.common.mg_utils import get_visible_devices -from cugraph.testing.mg_utils import stop_dask_client - -import torch -import numpy as np -from cugraph.gnn import FeatureStore -from cugraph.experimental.datasets import karate - -import tempfile - -# module-wide fixtures - -# If the rapids-pytest-benchmark plugin is installed, the "gpubenchmark" -# fixture will be available automatically. Check that this fixture is available -# by trying to import rapids_pytest_benchmark, and if that fails, set -# "gpubenchmark" to the standard "benchmark" fixture provided by -# pytest-benchmark. -try: - import rapids_pytest_benchmark # noqa: F401 -except ImportError: - import pytest_benchmark - - gpubenchmark = pytest_benchmark.plugin.benchmark - - -@pytest.fixture(scope="module") -def dask_client(): - dask_scheduler_file = os.environ.get("SCHEDULER_FILE") - cuda_visible_devices = get_visible_devices() - - if dask_scheduler_file is not None: - dask_initialize() - dask_client = Client(scheduler_file=dask_scheduler_file) - else: - # The tempdir created by tempdir_object should be cleaned up once - # tempdir_object goes out-of-scope and is deleted. - tempdir_object = tempfile.TemporaryDirectory() - cluster = LocalCUDACluster( - local_directory=tempdir_object.name, - protocol="tcp", - CUDA_VISIBLE_DEVICES=cuda_visible_devices, - ) - - dask_client = Client(cluster) - dask_client.wait_for_workers(len(cuda_visible_devices)) - - if not Comms.is_initialized(): - Comms.initialize(p2p=True) - - yield dask_client - - stop_dask_client(dask_client) - print("\ndask_client fixture: client.close() called") - - -@pytest.fixture -def karate_gnn(): - el = karate.get_edgelist().reset_index(drop=True) - el.src = el.src.astype("int64") - el.dst = el.dst.astype("int64") - all_vertices = np.array_split(np.arange(34), 2) - - F = FeatureStore(backend="torch") - F.add_data( - torch.arange(len(all_vertices[0]), dtype=torch.float32) * 31, - "type0", - "prop0", - ) - F.add_data( - torch.arange(len(all_vertices[1]), dtype=torch.float32) * 41, - "type1", - "prop0", - ) - - N = { - "type0": len(all_vertices[0]), - "type1": len(all_vertices[1]), - } - - offsets = {"type0": 0, "type1": N["type0"]} - - G = { - ("type0", "et01", "type1"): el[ - el.src.isin(all_vertices[0]) & el.dst.isin(all_vertices[1]) - ].reset_index(drop=True), - ("type1", "et10", "type0"): el[ - el.src.isin(all_vertices[1]) & el.dst.isin(all_vertices[0]) - ].reset_index(drop=True), - ("type0", "et00", "type0"): el[ - el.src.isin(all_vertices[0]) & el.dst.isin(all_vertices[0]) - ], - ("type1", "et11", "type1"): el[ - el.src.isin(all_vertices[1]) & el.dst.isin(all_vertices[1]) - ].reset_index(drop=True), - } - - G = { - (src_type, edge_type, dst_type): ( - torch.tensor(elx["src"].values_host - offsets[src_type]), - torch.tensor(elx["dst"].values_host - offsets[dst_type]), - ) - for (src_type, edge_type, dst_type), elx in G.items() - } - - return F, G, N - - -@pytest.fixture -def basic_graph_1(): - G = { - ("vt1", "pig", "vt1"): [ - torch.tensor([0, 0, 1, 2, 2, 3]), - torch.tensor([1, 2, 4, 3, 4, 1]), - ] - } - - N = {"vt1": 5} - - F = FeatureStore() - F.add_data( - torch.tensor([100, 200, 300, 400, 500]), type_name="vt1", feat_name="prop1" - ) - - F.add_data(torch.tensor([5, 4, 3, 2, 1]), type_name="vt1", feat_name="prop2") - - return F, G, N - - -@pytest.fixture -def multi_edge_graph_1(): - G = { - ("vt1", "pig", "vt1"): [torch.tensor([0, 2, 3, 1]), torch.tensor([1, 3, 1, 4])], - ("vt1", "dog", "vt1"): [torch.tensor([0, 3, 4]), torch.tensor([2, 2, 3])], - ("vt1", "cat", "vt1"): [ - torch.tensor([1, 2, 2]), - torch.tensor([4, 3, 4]), - ], - } - - N = {"vt1": 5} - - F = FeatureStore() - F.add_data( - torch.tensor([100, 200, 300, 400, 500]), type_name="vt1", feat_name="prop1" - ) - - F.add_data(torch.tensor([5, 4, 3, 2, 1]), type_name="vt1", feat_name="prop2") - - return F, G, N - - -@pytest.fixture -def multi_edge_multi_vertex_graph_1(): - - G = { - ("brown", "horse", "brown"): [ - torch.tensor([0, 0]), - torch.tensor([1, 2]), - ], - ("brown", "tortoise", "black"): [ - torch.tensor([1, 1, 2]), - torch.tensor([1, 0, 1]), - ], - ("brown", "mongoose", "black"): [ - torch.tensor([2, 1]), - torch.tensor([0, 1]), - ], - ("black", "cow", "brown"): [ - torch.tensor([0, 0]), - torch.tensor([1, 2]), - ], - ("black", "snake", "black"): [ - torch.tensor([1]), - torch.tensor([0]), - ], - } - - N = {"brown": 3, "black": 2} - - F = FeatureStore() - F.add_data(torch.tensor([100, 200, 300]), type_name="brown", feat_name="prop1") - - F.add_data(torch.tensor([400, 500]), type_name="black", feat_name="prop1") - - F.add_data(torch.tensor([5, 4, 3]), type_name="brown", feat_name="prop2") - - F.add_data(torch.tensor([2, 1]), type_name="black", feat_name="prop2") - - return F, G, N - - -@pytest.fixture -def multi_edge_multi_vertex_no_graph_1(): - G = { - ("brown", "horse", "brown"): 2, - ("brown", "tortoise", "black"): 3, - ("brown", "mongoose", "black"): 3, - ("black", "cow", "brown"): 3, - ("black", "snake", "black"): 1, - } - - N = {"brown": 3, "black": 2} - - F = FeatureStore() - F.add_data(np.array([100, 200, 300]), type_name="brown", feat_name="prop1") - - F.add_data(np.array([400, 500]), type_name="black", feat_name="prop1") - - F.add_data(np.array([5, 4, 3]), type_name="brown", feat_name="prop2") - - F.add_data(np.array([2, 1]), type_name="black", feat_name="prop2") - - return F, G, N - - -@pytest.fixture -def abc_graph(): - N = { - "A": 2, # 0, 1 - "B": 3, # 2, 3, 4 - "C": 4, # 5, 6, 7, 8 - } - - G = { - # (0->2, 0->3, 1->3) - ("A", "ab", "B"): [ - torch.tensor([0, 0, 1], dtype=torch.int64), - torch.tensor([0, 1, 1], dtype=torch.int64), - ], - # (2->0, 2->1, 3->1, 4->0) - ("B", "ba", "A"): [ - torch.tensor([0, 0, 1, 2], dtype=torch.int64), - torch.tensor([0, 1, 1, 0], dtype=torch.int64), - ], - # (2->6, 2->8, 3->5, 3->7, 4->5, 4->8) - ("B", "bc", "C"): [ - torch.tensor([0, 0, 1, 1, 2, 2], dtype=torch.int64), - torch.tensor([1, 3, 0, 2, 0, 3], dtype=torch.int64), - ], - } - - F = FeatureStore() - F.add_data( - torch.tensor([3.2, 2.1], dtype=torch.float32), type_name="A", feat_name="prop1" - ) - - return F, G, N - - -@pytest.fixture -def basic_pyg_graph_1(): - edge_index = torch.tensor([[0, 1, 2, 3], [0, 0, 1, 1]]) - size = (4, 4) - return edge_index, size - - -@pytest.fixture -def basic_pyg_graph_2(): - edge_index = torch.tensor( - [ - [0, 1, 0, 2, 3, 0, 4, 0, 5, 0, 6, 7, 0, 8, 9], - [1, 9, 2, 9, 9, 4, 9, 5, 9, 6, 9, 9, 8, 9, 0], - ] - ) - size = (10, 10) - return edge_index, size diff --git a/python/cugraph-pyg/build/lib/cugraph_pyg/tests/mg/test_mg_cugraph_loader.py b/python/cugraph-pyg/build/lib/cugraph_pyg/tests/mg/test_mg_cugraph_loader.py deleted file mode 100644 index 8d59aa6d..00000000 --- a/python/cugraph-pyg/build/lib/cugraph_pyg/tests/mg/test_mg_cugraph_loader.py +++ /dev/null @@ -1,76 +0,0 @@ -# Copyright (c) 2023-2024, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import pytest - -from cugraph_pyg.loader import CuGraphNeighborLoader -from cugraph_pyg.data import CuGraphStore - -from cugraph.utilities.utils import import_optional, MissingModule - -torch = import_optional("torch") - - -@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") -def test_cugraph_loader_basic(dask_client, karate_gnn): - F, G, N = karate_gnn - cugraph_store = CuGraphStore(F, G, N, multi_gpu=True, order="CSR") - loader = CuGraphNeighborLoader( - (cugraph_store, cugraph_store), - torch.arange(N["type0"] + N["type1"], dtype=torch.int64), - 10, - num_neighbors=[4, 4], - random_state=62, - replace=False, - ) - - assert isinstance(cugraph_store._subgraph()._plc_graph, dict) - - samples = [s for s in loader] - - assert len(samples) == 3 - for sample in samples: - if "type0" in sample: - for prop in sample["type0"]["prop0"].tolist(): - assert prop % 31 == 0 - - if "type1" in sample: - for prop in sample["type1"]["prop0"].tolist(): - assert prop % 41 == 0 - - -@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") -def test_cugraph_loader_hetero(dask_client, karate_gnn): - F, G, N = karate_gnn - cugraph_store = CuGraphStore(F, G, N, multi_gpu=True, order="CSR") - loader = CuGraphNeighborLoader( - (cugraph_store, cugraph_store), - input_nodes=("type1", torch.tensor([0, 1, 2, 5], device="cuda")), - batch_size=2, - num_neighbors=[4, 4], - random_state=62, - replace=False, - ) - - samples = [s for s in loader] - - assert len(samples) == 2 - for sample in samples: - print(sample) - if "type0" in sample: - for prop in sample["type0"]["prop0"].tolist(): - assert prop % 31 == 0 - - if "type1" in sample: - for prop in sample["type1"]["prop0"].tolist(): - assert prop % 41 == 0 diff --git a/python/cugraph-pyg/build/lib/cugraph_pyg/tests/mg/test_mg_cugraph_sampler.py b/python/cugraph-pyg/build/lib/cugraph_pyg/tests/mg/test_mg_cugraph_sampler.py deleted file mode 100644 index ca4a3537..00000000 --- a/python/cugraph-pyg/build/lib/cugraph_pyg/tests/mg/test_mg_cugraph_sampler.py +++ /dev/null @@ -1,238 +0,0 @@ -# Copyright (c) 2022-2024, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import cudf -import cupy - -import pytest - -from cugraph_pyg.data import CuGraphStore -from cugraph_pyg.sampler.cugraph_sampler import ( - _sampler_output_from_sampling_results_heterogeneous, -) - -from cugraph.gnn import FeatureStore - -from cugraph.utilities.utils import import_optional, MissingModule -from cugraph.dask import uniform_neighbor_sample - -torch = import_optional("torch") - - -@pytest.mark.cugraph_ops -@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") -def test_neighbor_sample(dask_client, basic_graph_1): - F, G, N = basic_graph_1 - cugraph_store = CuGraphStore(F, G, N, multi_gpu=True, order="CSR") - - batches = cudf.DataFrame( - { - "start": cudf.Series([0, 1, 2, 3, 4], dtype="int64"), - "batch": cudf.Series(cupy.zeros(5, dtype="int32")), - } - ) - - sampling_results = ( - uniform_neighbor_sample( - cugraph_store._subgraph(), - batches, - with_batch_ids=True, - fanout_vals=[-1], - with_replacement=False, - with_edge_properties=True, - random_state=62, - return_offsets=False, - return_hops=True, - use_legacy_names=False, - ) - .compute() - .sort_values(by=["majors", "minors"]) - ) - - out = _sampler_output_from_sampling_results_heterogeneous( - sampling_results=sampling_results, - renumber_map=None, - graph_store=cugraph_store, - metadata=torch.arange(6, dtype=torch.int64), - ) - - noi_groups = out.node - row_dict = out.row - col_dict = out.col - metadata = out.metadata - - assert metadata.tolist() == list(range(6)) - - for node_type, node_ids in noi_groups.items(): - actual_vertex_ids = torch.arange(N[node_type]) - - assert sorted(node_ids.tolist()) == actual_vertex_ids.tolist() - - assert ( - row_dict[("vt1", "pig", "vt1")].tolist() == G[("vt1", "pig", "vt1")][0].tolist() - ) - assert ( - col_dict[("vt1", "pig", "vt1")].tolist() == G[("vt1", "pig", "vt1")][1].tolist() - ) - - # check the hop dictionaries - assert len(out.num_sampled_nodes) == 1 - assert out.num_sampled_nodes["vt1"].tolist() == [4, 1] - - assert len(out.num_sampled_edges) == 1 - assert out.num_sampled_edges[("vt1", "pig", "vt1")].tolist() == [6] - - -@pytest.mark.cugraph_ops -@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") -@pytest.mark.skip(reason="broken") -def test_neighbor_sample_multi_vertex(dask_client, multi_edge_multi_vertex_graph_1): - F, G, N = multi_edge_multi_vertex_graph_1 - cugraph_store = CuGraphStore(F, G, N, multi_gpu=True, order="CSR") - - batches = cudf.DataFrame( - { - "start": cudf.Series([0, 1, 2, 3, 4], dtype="int64"), - "batches": cudf.Series(cupy.zeros(5, dtype="int32")), - } - ) - - sampling_results = ( - uniform_neighbor_sample( - cugraph_store._subgraph(), - batches, - fanout_vals=[-1], - with_replacement=False, - with_edge_properties=True, - random_state=62, - return_offsets=False, - with_batch_ids=True, - use_legacy_names=False, - ) - .sort_values(by=["majors", "minors"]) - .compute() - ) - - out = _sampler_output_from_sampling_results_heterogeneous( - sampling_results=sampling_results, - renumber_map=None, - graph_store=cugraph_store, - metadata=torch.arange(6, dtype=torch.int64), - ) - - noi_groups = out.node - row_dict = out.row - col_dict = out.col - metadata = out.metadata - - assert metadata.tolist() == list(range(6)) - - for node_type, node_ids in noi_groups.items(): - actual_vertex_ids = torch.arange(N[node_type]) - - assert node_ids.tolist() == sorted(actual_vertex_ids.tolist()) - - for edge_type, ei in G.items(): - assert sorted(row_dict[edge_type].tolist()) == sorted(ei[0].tolist()) - assert sorted(col_dict[edge_type].tolist()) == sorted(ei[1].tolist()) - - # check the hop dictionaries - assert len(out.num_sampled_nodes) == 2 - assert out.num_sampled_nodes["black"].tolist() == [2, 0] - assert out.num_sampled_nodes["brown"].tolist() == [3, 0] - - assert len(out.num_sampled_edges) == 5 - assert out.num_sampled_edges[("brown", "horse", "brown")].tolist() == [2] - assert out.num_sampled_edges[("brown", "tortoise", "black")].tolist() == [3] - assert out.num_sampled_edges[("brown", "mongoose", "black")].tolist() == [2] - assert out.num_sampled_edges[("black", "cow", "brown")].tolist() == [2] - assert out.num_sampled_edges[("black", "snake", "black")].tolist() == [1] - - -@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") -def test_neighbor_sample_mock_sampling_results(dask_client): - N = { - "A": 2, # 0, 1 - "B": 3, # 2, 3, 4 - "C": 4, # 5, 6, 7, 8 - } - - G = { - # (0->2, 0->3, 1->3) - ("A", "ab", "B"): [ - torch.tensor([0, 0, 1], dtype=torch.int64), - torch.tensor([0, 1, 1], dtype=torch.int64), - ], - # (2->0, 2->1, 3->1, 4->0) - ("B", "ba", "A"): [ - torch.tensor([0, 0, 1, 2], dtype=torch.int64), - torch.tensor([0, 1, 1, 0], dtype=torch.int64), - ], - # (2->6, 2->8, 3->5, 3->7, 4->5, 4->8) - ("B", "bc", "C"): [ - torch.tensor([0, 0, 1, 1, 2, 2], dtype=torch.int64), - torch.tensor([1, 3, 0, 2, 0, 3], dtype=torch.int64), - ], - } - - F = FeatureStore() - F.add_data( - torch.tensor([3.2, 2.1], dtype=torch.float32), type_name="A", feat_name="prop1" - ) - - graph_store = CuGraphStore(F, G, N, multi_gpu=True, order="CSR") - - # let 0, 1 be the start vertices, fanout = [2, 1, 2, 3] - mock_sampling_results = cudf.DataFrame( - { - "majors": cudf.Series([0, 0, 1, 2, 3, 3, 1, 3, 3, 3], dtype="int64"), - "minors": cudf.Series([2, 3, 3, 8, 1, 7, 3, 1, 5, 7], dtype="int64"), - "hop_id": cudf.Series([0, 0, 0, 1, 1, 1, 2, 3, 3, 3], dtype="int32"), - "edge_type": cudf.Series([0, 0, 0, 2, 1, 2, 0, 1, 2, 2], dtype="int32"), - } - ) - - out = _sampler_output_from_sampling_results_heterogeneous( - mock_sampling_results, None, graph_store, None - ) - - assert out.metadata is None - assert len(out.node) == 3 - assert out.node["A"].tolist() == [0, 1] - assert out.node["B"].tolist() == [0, 1] - assert out.node["C"].tolist() == [3, 2, 0] - - assert len(out.row) == 3 - assert len(out.col) == 3 - assert out.row[("A", "ab", "B")].tolist() == [0, 0, 1, 1] - assert out.col[("A", "ab", "B")].tolist() == [0, 1, 1, 1] - assert out.row[("B", "bc", "C")].tolist() == [0, 1, 1, 1] - assert out.col[("B", "bc", "C")].tolist() == [0, 1, 2, 1] - assert out.row[("B", "ba", "A")].tolist() == [1, 1] - assert out.col[("B", "ba", "A")].tolist() == [1, 1] - - assert len(out.num_sampled_nodes) == 3 - assert out.num_sampled_nodes["A"].tolist() == [2, 0, 0, 0, 0] - assert out.num_sampled_nodes["B"].tolist() == [0, 2, 0, 0, 0] - assert out.num_sampled_nodes["C"].tolist() == [0, 0, 2, 0, 1] - - assert len(out.num_sampled_edges) == 3 - assert out.num_sampled_edges[("A", "ab", "B")].tolist() == [3, 0, 1, 0] - assert out.num_sampled_edges[("B", "ba", "A")].tolist() == [0, 1, 0, 1] - assert out.num_sampled_edges[("B", "bc", "C")].tolist() == [0, 2, 0, 2] - - -@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") -@pytest.mark.skip("needs to be written") -def test_neighbor_sample_renumbered(dask_client): - pass diff --git a/python/cugraph-pyg/build/lib/cugraph_pyg/tests/mg/test_mg_cugraph_store.py b/python/cugraph-pyg/build/lib/cugraph_pyg/tests/mg/test_mg_cugraph_store.py deleted file mode 100644 index 42eb80c6..00000000 --- a/python/cugraph-pyg/build/lib/cugraph_pyg/tests/mg/test_mg_cugraph_store.py +++ /dev/null @@ -1,388 +0,0 @@ -# Copyright (c) 2022-2024, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import cugraph -from cugraph_pyg.data.cugraph_store import ( - CuGraphTensorAttr, - CuGraphEdgeAttr, - EdgeLayout, -) -from cugraph_pyg.data import CuGraphStore - -import cudf -import dask_cudf -import cupy -import numpy as np - -from cugraph.utilities.utils import import_optional, MissingModule - -import pytest - - -torch = import_optional("torch") -torch_geometric = import_optional("torch_geometric") - - -@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") -def test_tensor_attr(): - ta = CuGraphTensorAttr("group0", "property1") - assert not ta.is_fully_specified() - assert not ta.is_set("index") - - ta.fully_specify() - assert ta.is_fully_specified() - - other_ta = CuGraphTensorAttr(index=[1, 2, 3]) - ta.update(other_ta) - assert ta.index == [1, 2, 3] - - casted_ta1 = CuGraphTensorAttr.cast(ta) - assert casted_ta1 == ta - - casted_ta2 = CuGraphTensorAttr.cast(index=[1, 2, 3]) - assert casted_ta2.index == [1, 2, 3] - assert not casted_ta2.is_fully_specified() - - casted_ta3 = CuGraphTensorAttr.cast( - "group2", - "property2", - [1, 2, 3], - ) - assert casted_ta3.group_name == "group2" - assert casted_ta3.attr_name == "property2" - assert casted_ta3.index == [1, 2, 3] - - -@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") -def test_edge_attr(): - ea = CuGraphEdgeAttr("type0", EdgeLayout.COO, False, 10) - assert ea.edge_type == "type0" - assert ea.layout == EdgeLayout.COO - assert not ea.is_sorted - assert ea.size == 10 - - ea = CuGraphEdgeAttr(edge_type="type1", layout="csr", is_sorted=True) - assert ea.size is None - - ea = CuGraphEdgeAttr.cast("type0", EdgeLayout.COO, False, 10) - assert ea.edge_type == "type0" - assert ea.layout == EdgeLayout.COO - assert not ea.is_sorted - assert ea.size == 10 - - -@pytest.fixture( - params=[ - "basic_graph_1", - "multi_edge_graph_1", - "multi_edge_multi_vertex_graph_1", - ] -) -def graph(request): - return request.getfixturevalue(request.param) - - -@pytest.fixture(params=["basic_graph_1", "multi_edge_graph_1"]) -def single_vertex_graph(request): - return request.getfixturevalue(request.param) - - -@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") -@pytest.mark.parametrize( - "edge_index_type", ["numpy", "torch-cpu", "torch-gpu", "cudf", "dask-cudf"] -) -def test_get_edge_index(graph, edge_index_type, dask_client): - F, G, N = graph - if "torch" in edge_index_type: - if edge_index_type == "torch-cpu": - device = "cpu" - else: - device = "cuda" - for et in list(G.keys()): - G[et][0] = torch.as_tensor(G[et][0], device=device) - G[et][1] = torch.as_tensor(G[et][1], device=device) - elif edge_index_type == "cudf": - for et in list(G.keys()): - G[et][0] = cudf.Series(G[et][0]) - G[et][1] = cudf.Series(G[et][1]) - elif edge_index_type == "dask-cudf": - for et in list(G.keys()): - G[et][0] = dask_cudf.from_cudf(cudf.Series(G[et][0]), npartitions=1) - G[et][1] = dask_cudf.from_cudf(cudf.Series(G[et][1]), npartitions=1) - - cugraph_store = CuGraphStore(F, G, N, multi_gpu=True) - - for pyg_can_edge_type in G: - src, dst = cugraph_store.get_edge_index( - edge_type=pyg_can_edge_type, layout="coo", is_sorted=False - ) - - if edge_index_type == "cudf": - assert G[pyg_can_edge_type][0].values_host.tolist() == src.tolist() - assert G[pyg_can_edge_type][1].values_host.tolist() == dst.tolist() - elif edge_index_type == "dask-cudf": - assert ( - G[pyg_can_edge_type][0].compute().values_host.tolist() == src.tolist() - ) - assert ( - G[pyg_can_edge_type][1].compute().values_host.tolist() == dst.tolist() - ) - else: - assert G[pyg_can_edge_type][0].tolist() == src.tolist() - assert G[pyg_can_edge_type][1].tolist() == dst.tolist() - - -@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") -def test_edge_types(graph, dask_client): - F, G, N = graph - cugraph_store = CuGraphStore(F, G, N, multi_gpu=True) - - eta = cugraph_store._edge_types_to_attrs - assert eta.keys() == G.keys() - - for attr_name, attr_repr in eta.items(): - src_size = N[attr_name[0]] - dst_size = N[attr_name[-1]] - assert src_size == attr_repr.size[0] - assert dst_size == attr_repr.size[-1] - assert attr_name == attr_repr.edge_type - - -@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") -def test_get_subgraph(graph, dask_client): - F, G, N = graph - cugraph_store = CuGraphStore(F, G, N, multi_gpu=True) - - if len(G.keys()) > 1: - for edge_type in G.keys(): - # Subgraphing is not implemented yet and should raise an error - with pytest.raises(ValueError): - sg = cugraph_store._subgraph([edge_type]) - - sg = cugraph_store._subgraph(list(G.keys())) - assert isinstance(sg, cugraph.MultiGraph) - - num_edges = sum([len(v[0]) for v in G.values()]) - assert sg.number_of_edges() == num_edges - - -@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") -def test_renumber_vertices_basic(single_vertex_graph, dask_client): - F, G, N = single_vertex_graph - cugraph_store = CuGraphStore(F, G, N, multi_gpu=True) - - nodes_of_interest = torch.as_tensor( - cupy.random.randint(0, sum(N.values()), 3), device="cuda" - ) - - index = cugraph_store._get_vertex_groups_from_sample(nodes_of_interest) - assert index["vt1"].tolist() == nodes_of_interest.tolist() - - -@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") -def test_renumber_vertices_multi_edge_multi_vertex( - multi_edge_multi_vertex_graph_1, dask_client -): - F, G, N = multi_edge_multi_vertex_graph_1 - cugraph_store = CuGraphStore(F, G, N, multi_gpu=True) - - nodes_of_interest = torch.as_tensor( - cupy.random.randint(0, sum(N.values()), 3), device="cuda" - ).unique() - - index = cugraph_store._get_vertex_groups_from_sample(nodes_of_interest) - - black_nodes = nodes_of_interest[nodes_of_interest <= 1] - brown_nodes = nodes_of_interest[nodes_of_interest > 1] - 2 - - if len(black_nodes) > 0: - assert index["black"].tolist() == sorted(black_nodes.tolist()) - if len(brown_nodes) > 0: - assert index["brown"].tolist() == sorted(brown_nodes.tolist()) - - -@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") -def test_renumber_edges(abc_graph, dask_client): - F, G, N = abc_graph - - graph_store = CuGraphStore(F, G, N, multi_gpu=True, order="CSR") - - # let 0, 1 be the start vertices, fanout = [2, 1, 2, 3] - mock_sampling_results = cudf.DataFrame( - { - "majors": cudf.Series([0, 0, 1, 2, 3, 3, 1, 3, 3, 3], dtype="int64"), - "minors": cudf.Series([2, 3, 3, 8, 1, 7, 3, 1, 5, 7], dtype="int64"), - "hop_id": cudf.Series([0, 0, 0, 1, 1, 1, 2, 3, 3, 3], dtype="int32"), - "edge_type": cudf.Series([0, 0, 0, 2, 1, 2, 0, 1, 2, 2], dtype="int32"), - } - ) - - mock_noi_index = { - "A": torch.tensor([0, 1], device="cuda"), - "B": torch.tensor([0, 1], device="cuda"), - "C": torch.tensor([3, 2, 0], device="cuda"), - } - - row_dict, col_dict = graph_store._get_renumbered_edge_groups_from_sample( - mock_sampling_results, mock_noi_index - ) - - assert len(row_dict) == 3 - assert len(col_dict) == 3 - assert row_dict[("A", "ab", "B")].tolist() == [0, 0, 1, 1] - assert col_dict[("A", "ab", "B")].tolist() == [0, 1, 1, 1] - assert row_dict[("B", "bc", "C")].tolist() == [0, 1, 1, 1] - assert col_dict[("B", "bc", "C")].tolist() == [0, 1, 2, 1] - assert row_dict[("B", "ba", "A")].tolist() == [1, 1] - assert col_dict[("B", "ba", "A")].tolist() == [1, 1] - - -@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") -def test_get_tensor(graph, dask_client): - F, G, N = graph - cugraph_store = CuGraphStore(F, G, N, multi_gpu=True) - - for feature_name, feature_on_types in F.get_feature_list().items(): - for type_name in feature_on_types: - v_ids = np.arange(N[type_name]) - base_series = F.get_data( - v_ids, - type_name=type_name, - feat_name=feature_name, - ).tolist() - - tsr = cugraph_store.get_tensor( - type_name, feature_name, v_ids, None, cupy.int64 - ).tolist() - - assert tsr == base_series - - -@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") -def test_get_tensor_empty_idx(karate_gnn, dask_client): - F, G, N = karate_gnn - cugraph_store = CuGraphStore(F, G, N, multi_gpu=True) - - t = cugraph_store.get_tensor( - CuGraphTensorAttr(group_name="type0", attr_name="prop0", index=None) - ) - assert t.tolist() == (torch.arange(17, dtype=torch.float32) * 31).tolist() - - -@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") -def test_multi_get_tensor(graph, dask_client): - F, G, N = graph - cugraph_store = CuGraphStore(F, G, N, multi_gpu=True) - - for vertex_type in sorted(N.keys()): - v_ids = np.arange(N[vertex_type]) - feat_names = list(F.get_feature_list().keys()) - base_series = None - for feat_name in feat_names: - if base_series is None: - base_series = F.get_data(v_ids, vertex_type, feat_name) - else: - base_series = np.stack( - [base_series, F.get_data(v_ids, vertex_type, feat_name)] - ) - - tsr = cugraph_store.multi_get_tensor( - [ - CuGraphTensorAttr(vertex_type, feat_name, v_ids) - for feat_name in feat_names - ] - ) - - assert torch.stack(tsr).tolist() == base_series.tolist() - - -@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") -def test_get_all_tensor_attrs(graph, dask_client): - F, G, N = graph - cugraph_store = CuGraphStore(F, G, N, multi_gpu=True) - - tensor_attrs = [] - for vertex_type in sorted(N.keys()): - for prop in ["prop1", "prop2"]: - tensor_attrs.append( - CuGraphTensorAttr( - vertex_type, - prop, - properties=None, - dtype=F.get_data([0], vertex_type, "prop1").dtype, - ) - ) - - assert sorted(tensor_attrs, key=lambda a: (a.group_name, a.attr_name)) == sorted( - cugraph_store.get_all_tensor_attrs(), key=lambda a: (a.group_name, a.attr_name) - ) - - -@pytest.mark.skip("not implemented") -def test_get_tensor_spec_props(graph, dask_client): - raise NotImplementedError("not implemented") - - -@pytest.mark.skip("not implemented") -def test_multi_get_tensor_spec_props(multi_edge_multi_vertex_graph_1, dask_client): - raise NotImplementedError("not implemented") - - -@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") -def test_get_tensor_from_tensor_attrs(graph, dask_client): - F, G, N = graph - cugraph_store = CuGraphStore(F, G, N, multi_gpu=True) - - tensor_attrs = cugraph_store.get_all_tensor_attrs() - for tensor_attr in tensor_attrs: - v_ids = np.arange(N[tensor_attr.group_name]) - data = F.get_data(v_ids, tensor_attr.group_name, tensor_attr.attr_name) - - tensor_attr.index = v_ids - assert cugraph_store.get_tensor(tensor_attr).tolist() == data.tolist() - - -@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") -def test_get_tensor_size(graph, dask_client): - F, G, N = graph - cugraph_store = CuGraphStore(F, G, N, multi_gpu=True) - - tensor_attrs = cugraph_store.get_all_tensor_attrs() - for tensor_attr in tensor_attrs: - sz = N[tensor_attr.group_name] - - tensor_attr.index = np.arange(sz) - assert cugraph_store.get_tensor_size(tensor_attr) == torch.Size((sz,)) - - -@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") -@pytest.mark.skipif( - isinstance(torch_geometric, MissingModule), reason="pyg not available" -) -def test_get_input_nodes(karate_gnn, dask_client): - F, G, N = karate_gnn - cugraph_store = CuGraphStore(F, G, N, multi_gpu=True) - - node_type, input_nodes = torch_geometric.loader.utils.get_input_nodes( - (cugraph_store, cugraph_store), "type0" - ) - - assert node_type == "type0" - assert input_nodes.tolist() == torch.arange(17, dtype=torch.int32).tolist() - - -@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") -def test_mg_frame_handle(graph, dask_client): - F, G, N = graph - cugraph_store = CuGraphStore(F, G, N, multi_gpu=True) - assert isinstance(cugraph_store._EXPERIMENTAL__CuGraphStore__graph._plc_graph, dict) diff --git a/python/cugraph-pyg/build/lib/cugraph_pyg/tests/nn/test_gat_conv.py b/python/cugraph-pyg/build/lib/cugraph_pyg/tests/nn/test_gat_conv.py deleted file mode 100644 index ee14d1e7..00000000 --- a/python/cugraph-pyg/build/lib/cugraph_pyg/tests/nn/test_gat_conv.py +++ /dev/null @@ -1,118 +0,0 @@ -# Copyright (c) 2023-2024, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import pytest - -from cugraph_pyg.nn import GATConv as CuGraphGATConv - -ATOL = 1e-6 - - -@pytest.mark.parametrize("bias", [True, False]) -@pytest.mark.parametrize("bipartite", [True, False]) -@pytest.mark.parametrize("concat", [True, False]) -@pytest.mark.parametrize("heads", [1, 2, 3, 5, 10, 16]) -@pytest.mark.parametrize("max_num_neighbors", [8, None]) -@pytest.mark.parametrize("use_edge_attr", [True, False]) -@pytest.mark.parametrize("graph", ["basic_pyg_graph_1", "basic_pyg_graph_2"]) -def test_gat_conv_equality( - bias, bipartite, concat, heads, max_num_neighbors, use_edge_attr, graph, request -): - pytest.importorskip("torch_geometric", reason="PyG not available") - import torch - from torch_geometric.nn import GATConv - - torch.manual_seed(12345) - edge_index, size = request.getfixturevalue(graph) - edge_index = edge_index.cuda() - - if bipartite: - in_channels = (5, 3) - x = ( - torch.rand(size[0], in_channels[0]).cuda(), - torch.rand(size[1], in_channels[1]).cuda(), - ) - else: - in_channels = 5 - x = torch.rand(size[0], in_channels).cuda() - out_channels = 2 - - if use_edge_attr: - edge_dim = 3 - edge_attr = torch.rand(edge_index.size(1), edge_dim).cuda() - csc, edge_attr_perm = CuGraphGATConv.to_csc( - edge_index, size, edge_attr=edge_attr - ) - else: - edge_dim = None - edge_attr = edge_attr_perm = None - csc = CuGraphGATConv.to_csc(edge_index, size) - - kwargs = dict(bias=bias, concat=concat, edge_dim=edge_dim) - - conv1 = GATConv( - in_channels, out_channels, heads, add_self_loops=False, **kwargs - ).cuda() - conv2 = CuGraphGATConv(in_channels, out_channels, heads, **kwargs).cuda() - - out_dim = heads * out_channels - with torch.no_grad(): - if bipartite: - conv2.lin_src.weight.data = conv1.lin_src.weight.data.detach().clone() - conv2.lin_dst.weight.data = conv1.lin_dst.weight.data.detach().clone() - else: - conv2.lin.weight.data = conv1.lin_src.weight.data.detach().clone() - - conv2.att.data[:out_dim] = conv1.att_src.data.flatten() - conv2.att.data[out_dim : 2 * out_dim] = conv1.att_dst.data.flatten() - if use_edge_attr: - conv2.att.data[2 * out_dim :] = conv1.att_edge.data.flatten() - conv2.lin_edge.weight.data = conv1.lin_edge.weight.data.detach().clone() - - out1 = conv1(x, edge_index, edge_attr=edge_attr) - out2 = conv2(x, csc, edge_attr=edge_attr_perm, max_num_neighbors=max_num_neighbors) - assert torch.allclose(out1, out2, atol=ATOL) - - grad_output = torch.rand_like(out1) - out1.backward(grad_output) - out2.backward(grad_output) - - if bipartite: - assert torch.allclose( - conv1.lin_src.weight.grad, conv2.lin_src.weight.grad, atol=ATOL - ) - assert torch.allclose( - conv1.lin_dst.weight.grad, conv2.lin_dst.weight.grad, atol=ATOL - ) - else: - assert torch.allclose( - conv1.lin_src.weight.grad, conv2.lin.weight.grad, atol=ATOL - ) - - assert torch.allclose( - conv1.att_src.grad.flatten(), conv2.att.grad[:out_dim], atol=ATOL - ) - assert torch.allclose( - conv1.att_dst.grad.flatten(), conv2.att.grad[out_dim : 2 * out_dim], atol=ATOL - ) - - if use_edge_attr: - assert torch.allclose( - conv1.att_edge.grad.flatten(), conv2.att.grad[2 * out_dim :], atol=ATOL - ) - assert torch.allclose( - conv1.lin_edge.weight.grad, conv2.lin_edge.weight.grad, atol=ATOL - ) - - if bias: - assert torch.allclose(conv1.bias.grad, conv2.bias.grad, atol=ATOL) diff --git a/python/cugraph-pyg/build/lib/cugraph_pyg/tests/nn/test_gatv2_conv.py b/python/cugraph-pyg/build/lib/cugraph_pyg/tests/nn/test_gatv2_conv.py deleted file mode 100644 index 5e916816..00000000 --- a/python/cugraph-pyg/build/lib/cugraph_pyg/tests/nn/test_gatv2_conv.py +++ /dev/null @@ -1,87 +0,0 @@ -# Copyright (c) 2023-2024, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import pytest - -from cugraph_pyg.nn import GATv2Conv as CuGraphGATv2Conv - -ATOL = 1e-6 - - -@pytest.mark.parametrize("bipartite", [True, False]) -@pytest.mark.parametrize("concat", [True, False]) -@pytest.mark.parametrize("heads", [1, 2, 3, 5, 10, 16]) -@pytest.mark.parametrize("use_edge_attr", [True, False]) -@pytest.mark.parametrize("graph", ["basic_pyg_graph_1", "basic_pyg_graph_2"]) -def test_gatv2_conv_equality(bipartite, concat, heads, use_edge_attr, graph, request): - pytest.importorskip("torch_geometric", reason="PyG not available") - import torch - from torch_geometric.nn import GATv2Conv - - torch.manual_seed(12345) - edge_index, size = request.getfixturevalue(graph) - edge_index = edge_index.cuda() - - if bipartite: - in_channels = (5, 3) - x = ( - torch.rand(size[0], in_channels[0]).cuda(), - torch.rand(size[1], in_channels[1]).cuda(), - ) - else: - in_channels = 5 - x = torch.rand(size[0], in_channels).cuda() - out_channels = 2 - - if use_edge_attr: - edge_dim = 3 - edge_attr = torch.rand(edge_index.size(1), edge_dim).cuda() - csc, edge_attr_perm = CuGraphGATv2Conv.to_csc( - edge_index, size, edge_attr=edge_attr - ) - else: - edge_dim = None - edge_attr = edge_attr_perm = None - csc = CuGraphGATv2Conv.to_csc(edge_index, size) - - kwargs = dict(bias=False, concat=concat, edge_dim=edge_dim) - - conv1 = GATv2Conv( - in_channels, out_channels, heads, add_self_loops=False, **kwargs - ).cuda() - conv2 = CuGraphGATv2Conv(in_channels, out_channels, heads, **kwargs).cuda() - - with torch.no_grad(): - conv2.lin_src.weight.data = conv1.lin_l.weight.data.detach().clone() - conv2.lin_dst.weight.data = conv1.lin_r.weight.data.detach().clone() - conv2.att.data = conv1.att.data.flatten().detach().clone() - if use_edge_attr: - conv2.lin_edge.weight.data = conv1.lin_edge.weight.data.detach().clone() - - out1 = conv1(x, edge_index, edge_attr=edge_attr) - out2 = conv2(x, csc, edge_attr=edge_attr_perm) - assert torch.allclose(out1, out2, atol=ATOL) - - grad_output = torch.rand_like(out1) - out1.backward(grad_output) - out2.backward(grad_output) - - assert torch.allclose(conv1.lin_l.weight.grad, conv2.lin_src.weight.grad, atol=ATOL) - assert torch.allclose(conv1.lin_r.weight.grad, conv2.lin_dst.weight.grad, atol=ATOL) - - assert torch.allclose(conv1.att.grad.flatten(), conv2.att.grad, atol=ATOL) - - if use_edge_attr: - assert torch.allclose( - conv1.lin_edge.weight.grad, conv2.lin_edge.weight.grad, atol=ATOL - ) diff --git a/python/cugraph-pyg/build/lib/cugraph_pyg/tests/nn/test_rgcn_conv.py b/python/cugraph-pyg/build/lib/cugraph_pyg/tests/nn/test_rgcn_conv.py deleted file mode 100644 index 76e95d93..00000000 --- a/python/cugraph-pyg/build/lib/cugraph_pyg/tests/nn/test_rgcn_conv.py +++ /dev/null @@ -1,72 +0,0 @@ -# Copyright (c) 2023-2024, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import pytest - -from cugraph_pyg.nn import RGCNConv as CuGraphRGCNConv - -ATOL = 1e-6 - - -@pytest.mark.parametrize("aggr", ["add", "sum", "mean"]) -@pytest.mark.parametrize("bias", [True, False]) -@pytest.mark.parametrize("max_num_neighbors", [8, None]) -@pytest.mark.parametrize("num_bases", [1, 2, None]) -@pytest.mark.parametrize("root_weight", [True, False]) -@pytest.mark.parametrize("graph", ["basic_pyg_graph_1", "basic_pyg_graph_2"]) -def test_rgcn_conv_equality( - aggr, bias, max_num_neighbors, num_bases, root_weight, graph, request -): - pytest.importorskip("torch_geometric", reason="PyG not available") - import torch - from torch_geometric.nn import FastRGCNConv as RGCNConv - - torch.manual_seed(12345) - in_channels, out_channels, num_relations = (4, 2, 3) - kwargs = dict(aggr=aggr, bias=bias, num_bases=num_bases, root_weight=root_weight) - - edge_index, size = request.getfixturevalue(graph) - edge_index = edge_index.cuda() - edge_type = torch.randint(num_relations, (edge_index.size(1),)).cuda() - - x = torch.rand(size[0], in_channels, device="cuda") - csc, edge_type_perm = CuGraphRGCNConv.to_csc(edge_index, size, edge_type) - - conv1 = RGCNConv(in_channels, out_channels, num_relations, **kwargs).cuda() - conv2 = CuGraphRGCNConv(in_channels, out_channels, num_relations, **kwargs).cuda() - - with torch.no_grad(): - if root_weight: - conv2.weight.data[:-1] = conv1.weight.data - conv2.weight.data[-1] = conv1.root.data - else: - conv2.weight.data = conv1.weight.data.detach().clone() - if num_bases is not None: - conv2.comp.data = conv1.comp.data.detach().clone() - - out1 = conv1(x, edge_index, edge_type) - out2 = conv2(x, csc, edge_type_perm, max_num_neighbors=max_num_neighbors) - assert torch.allclose(out1, out2, atol=ATOL) - - grad_out = torch.rand_like(out1) - out1.backward(grad_out) - out2.backward(grad_out) - - if root_weight: - assert torch.allclose(conv1.weight.grad, conv2.weight.grad[:-1], atol=ATOL) - assert torch.allclose(conv1.root.grad, conv2.weight.grad[-1], atol=ATOL) - else: - assert torch.allclose(conv1.weight.grad, conv2.weight.grad, atol=ATOL) - - if num_bases is not None: - assert torch.allclose(conv1.comp.grad, conv2.comp.grad, atol=ATOL) diff --git a/python/cugraph-pyg/build/lib/cugraph_pyg/tests/nn/test_sage_conv.py b/python/cugraph-pyg/build/lib/cugraph_pyg/tests/nn/test_sage_conv.py deleted file mode 100644 index 3f2045df..00000000 --- a/python/cugraph-pyg/build/lib/cugraph_pyg/tests/nn/test_sage_conv.py +++ /dev/null @@ -1,90 +0,0 @@ -# Copyright (c) 2023-2024, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import pytest - -from cugraph_pyg.nn import SAGEConv as CuGraphSAGEConv - -ATOL = 1e-6 - - -@pytest.mark.parametrize("aggr", ["sum", "mean", "min", "max"]) -@pytest.mark.parametrize("bias", [True, False]) -@pytest.mark.parametrize("bipartite", [True, False]) -@pytest.mark.parametrize("max_num_neighbors", [8, None]) -@pytest.mark.parametrize("normalize", [True, False]) -@pytest.mark.parametrize("root_weight", [True, False]) -@pytest.mark.parametrize("graph", ["basic_pyg_graph_1", "basic_pyg_graph_2"]) -def test_sage_conv_equality( - aggr, bias, bipartite, max_num_neighbors, normalize, root_weight, graph, request -): - pytest.importorskip("torch_geometric", reason="PyG not available") - import torch - from torch_geometric.nn import SAGEConv - - torch.manual_seed(12345) - edge_index, size = request.getfixturevalue(graph) - edge_index = edge_index.cuda() - csc = CuGraphSAGEConv.to_csc(edge_index, size) - - if bipartite: - in_channels = (7, 3) - x = ( - torch.rand(size[0], in_channels[0]).cuda(), - torch.rand(size[1], in_channels[1]).cuda(), - ) - else: - in_channels = 5 - x = torch.rand(size[0], in_channels).cuda() - out_channels = 4 - - kwargs = dict(aggr=aggr, bias=bias, normalize=normalize, root_weight=root_weight) - - conv1 = SAGEConv(in_channels, out_channels, **kwargs).cuda() - conv2 = CuGraphSAGEConv(in_channels, out_channels, **kwargs).cuda() - - in_channels_src = conv2.in_channels_src - with torch.no_grad(): - conv2.lin.weight.data[:, :in_channels_src] = conv1.lin_l.weight.data - if root_weight: - conv2.lin.weight.data[:, in_channels_src:] = conv1.lin_r.weight.data - if bias: - conv2.lin.bias.data[:] = conv1.lin_l.bias.data - - out1 = conv1(x, edge_index) - out2 = conv2(x, csc, max_num_neighbors=max_num_neighbors) - assert torch.allclose(out1, out2, atol=ATOL) - - grad_out = torch.rand_like(out1) - out1.backward(grad_out) - out2.backward(grad_out) - - assert torch.allclose( - conv1.lin_l.weight.grad, - conv2.lin.weight.grad[:, :in_channels_src], - atol=ATOL, - ) - - if root_weight: - assert torch.allclose( - conv1.lin_r.weight.grad, - conv2.lin.weight.grad[:, in_channels_src:], - atol=ATOL, - ) - - if bias: - assert torch.allclose( - conv1.lin_l.bias.grad, - conv2.lin.bias.grad, - atol=ATOL, - ) diff --git a/python/cugraph-pyg/build/lib/cugraph_pyg/tests/nn/test_transformer_conv.py b/python/cugraph-pyg/build/lib/cugraph_pyg/tests/nn/test_transformer_conv.py deleted file mode 100644 index b32ce190..00000000 --- a/python/cugraph-pyg/build/lib/cugraph_pyg/tests/nn/test_transformer_conv.py +++ /dev/null @@ -1,83 +0,0 @@ -# Copyright (c) 2023-2024, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import pytest - -from cugraph_pyg.nn import TransformerConv as CuGraphTransformerConv - -ATOL = 1e-6 - - -@pytest.mark.parametrize("bipartite", [True, False]) -@pytest.mark.parametrize("concat", [True, False]) -@pytest.mark.parametrize("heads", [1, 2, 3, 5, 10, 16]) -@pytest.mark.parametrize("graph", ["basic_pyg_graph_1", "basic_pyg_graph_2"]) -def test_transformer_conv_equality(bipartite, concat, heads, graph, request): - pytest.importorskip("torch_geometric", reason="PyG not available") - import torch - from torch_geometric.nn import TransformerConv - - torch.manual_seed(12345) - edge_index, size = request.getfixturevalue(graph) - edge_index = edge_index.cuda() - csc = CuGraphTransformerConv.to_csc(edge_index, size) - - out_channels = 2 - kwargs = dict(concat=concat, bias=False, root_weight=False) - - if bipartite: - in_channels = (5, 3) - x = ( - torch.rand(size[0], in_channels[0], device="cuda"), - torch.rand(size[1], in_channels[1], device="cuda"), - ) - else: - in_channels = 5 - x = torch.rand(size[0], in_channels, device="cuda") - - conv1 = TransformerConv(in_channels, out_channels, heads, **kwargs).cuda() - conv2 = CuGraphTransformerConv(in_channels, out_channels, heads, **kwargs).cuda() - - with torch.no_grad(): - conv2.lin_query.weight.data = conv1.lin_query.weight.data.detach().clone() - conv2.lin_key.weight.data = conv1.lin_key.weight.data.detach().clone() - conv2.lin_value.weight.data = conv1.lin_value.weight.data.detach().clone() - conv2.lin_query.bias.data = conv1.lin_query.bias.data.detach().clone() - conv2.lin_key.bias.data = conv1.lin_key.bias.data.detach().clone() - conv2.lin_value.bias.data = conv1.lin_value.bias.data.detach().clone() - - out1 = conv1(x, edge_index) - out2 = conv2(x, csc) - - assert torch.allclose(out1, out2, atol=ATOL) - - grad_output = torch.rand_like(out1) - out1.backward(grad_output) - out2.backward(grad_output) - - assert torch.allclose( - conv1.lin_query.weight.grad, conv2.lin_query.weight.grad, atol=ATOL - ) - assert torch.allclose( - conv1.lin_key.weight.grad, conv2.lin_key.weight.grad, atol=ATOL - ) - assert torch.allclose( - conv1.lin_value.weight.grad, conv2.lin_value.weight.grad, atol=ATOL - ) - assert torch.allclose( - conv1.lin_query.bias.grad, conv2.lin_query.bias.grad, atol=ATOL - ) - assert torch.allclose(conv1.lin_key.bias.grad, conv2.lin_key.bias.grad, atol=ATOL) - assert torch.allclose( - conv1.lin_value.bias.grad, conv2.lin_value.bias.grad, atol=ATOL - ) diff --git a/python/cugraph-pyg/build/lib/cugraph_pyg/tests/test_cugraph_loader.py b/python/cugraph-pyg/build/lib/cugraph_pyg/tests/test_cugraph_loader.py deleted file mode 100644 index a6e389bd..00000000 --- a/python/cugraph-pyg/build/lib/cugraph_pyg/tests/test_cugraph_loader.py +++ /dev/null @@ -1,492 +0,0 @@ -# Copyright (c) 2023-2024, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import pytest - -import tempfile -import os - -import cudf -import cupy -import numpy as np - -from cugraph_pyg.loader import CuGraphNeighborLoader -from cugraph_pyg.loader import BulkSampleLoader -from cugraph_pyg.data import CuGraphStore -from cugraph_pyg.nn import SAGEConv as CuGraphSAGEConv - -from cugraph.gnn import FeatureStore -from cugraph.utilities.utils import import_optional, MissingModule - -from typing import Dict, Tuple - -torch = import_optional("torch") -torch_geometric = import_optional("torch_geometric") -trim_to_layer = import_optional("torch_geometric.utils.trim_to_layer") - -try: - import torch_sparse # noqa: F401 - - HAS_TORCH_SPARSE = True -except: # noqa: E722 - HAS_TORCH_SPARSE = False - - -@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") -def test_cugraph_loader_basic( - karate_gnn: Tuple[ - FeatureStore, Dict[Tuple[str, str, str], np.ndarray], Dict[str, int] - ] -): - F, G, N = karate_gnn - cugraph_store = CuGraphStore(F, G, N, order="CSR") - loader = CuGraphNeighborLoader( - (cugraph_store, cugraph_store), - torch.arange(N["type0"] + N["type1"], dtype=torch.int64), - 10, - num_neighbors=[4, 4], - random_state=62, - replace=False, - ) - - samples = [s for s in loader] - - assert len(samples) == 3 - for sample in samples: - if "type0" in sample: - for prop in sample["type0"]["prop0"].tolist(): - assert prop % 31 == 0 - - if "type1" in sample: - for prop in sample["type1"]["prop0"].tolist(): - assert prop % 41 == 0 - - -@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") -def test_cugraph_loader_hetero( - karate_gnn: Tuple[ - FeatureStore, Dict[Tuple[str, str, str], np.ndarray], Dict[str, int] - ] -): - F, G, N = karate_gnn - cugraph_store = CuGraphStore(F, G, N, order="CSR") - loader = CuGraphNeighborLoader( - (cugraph_store, cugraph_store), - input_nodes=("type1", torch.tensor([0, 1, 2, 5], device="cuda")), - batch_size=2, - num_neighbors=[4, 4], - random_state=62, - replace=False, - ) - - samples = [s for s in loader] - - assert len(samples) == 2 - for sample in samples: - if "type0" in sample: - for prop in sample["type0"]["prop0"].tolist(): - assert prop % 31 == 0 - - if "type1" in sample: - for prop in sample["type1"]["prop0"].tolist(): - assert prop % 41 == 0 - - -@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") -def test_cugraph_loader_from_disk(): - m = [2, 9, 99, 82, 9, 3, 18, 1, 12] - n = torch.arange(1, 1 + len(m), dtype=torch.int32) - x = torch.zeros(256, dtype=torch.int32) - x[torch.tensor(m, dtype=torch.int32)] = n - F = FeatureStore() - F.add_data(x, "t0", "x") - - G = {("t0", "knows", "t0"): 9080} - N = {"t0": 256} - - cugraph_store = CuGraphStore(F, G, N, order="CSR") - - bogus_samples = cudf.DataFrame( - { - "majors": [0, 1, 2, 3, 4, 5, 6, 6], - "minors": [5, 4, 3, 2, 2, 6, 5, 2], - "edge_type": cudf.Series([0, 0, 0, 0, 0, 0, 0, 0], dtype="int32"), - "edge_id": [5, 10, 15, 20, 25, 30, 35, 40], - "hop_id": cudf.Series([0, 0, 0, 1, 1, 1, 2, 2], dtype="int32"), - } - ) - map = cudf.Series(m, name="map") - bogus_samples = bogus_samples.join(map, how="outer").sort_index() - - tempdir = tempfile.TemporaryDirectory() - for s in range(256): - bogus_samples["batch_id"] = cupy.int32(s) - bogus_samples.to_parquet(os.path.join(tempdir.name, f"batch={s}-{s}.parquet")) - - loader = BulkSampleLoader( - feature_store=cugraph_store, - graph_store=cugraph_store, - directory=tempdir, - ) - - num_samples = 0 - for sample in loader: - num_samples += 1 - assert sample["t0"]["num_nodes"] == 7 - # correct vertex order is [0, 1, 2, 5, 4, 3, 6]; x = [1, 2, 3, 6, 5, 4, 7] - assert sample["t0"]["x"].tolist() == [3, 4, 5, 6, 7, 8, 9] - - edge_index = sample[("t0", "knows", "t0")]["edge_index"] - assert list(edge_index.shape) == [2, 8] - - assert ( - edge_index[0].tolist() == bogus_samples.majors.dropna().values_host.tolist() - ) - assert ( - edge_index[1].tolist() == bogus_samples.minors.dropna().values_host.tolist() - ) - - assert num_samples == 256 - - -@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") -def test_cugraph_loader_from_disk_subset(): - m = [2, 9, 99, 82, 9, 3, 18, 1, 12] - n = torch.arange(1, 1 + len(m), dtype=torch.int32) - x = torch.zeros(256, dtype=torch.int32) - x[torch.tensor(m, dtype=torch.int32)] = n - F = FeatureStore() - F.add_data(x, "t0", "x") - - G = {("t0", "knows", "t0"): 9080} - N = {"t0": 256} - - cugraph_store = CuGraphStore(F, G, N, order="CSR") - - bogus_samples = cudf.DataFrame( - { - "majors": [0, 1, 2, 3, 4, 5, 6, 6], - "minors": [5, 4, 3, 2, 2, 6, 5, 2], - "edge_type": cudf.Series([0, 0, 0, 0, 0, 0, 0, 0], dtype="int32"), - "edge_id": [5, 10, 15, 20, 25, 30, 35, 40], - "hop_id": cudf.Series([0, 0, 0, 1, 1, 1, 2, 2], dtype="int32"), - } - ) - map = cudf.Series(m, name="map") - bogus_samples = bogus_samples.join(map, how="outer").sort_index() - - tempdir = tempfile.TemporaryDirectory() - for s in range(256): - bogus_samples["batch_id"] = cupy.int32(s) - bogus_samples.to_parquet(os.path.join(tempdir.name, f"batch={s}-{s}.parquet")) - - loader = BulkSampleLoader( - feature_store=cugraph_store, - graph_store=cugraph_store, - directory=tempdir, - input_files=list(os.listdir(tempdir.name))[100:200], - ) - - num_samples = 0 - for sample in loader: - num_samples += 1 - assert sample["t0"]["num_nodes"] == 7 - # correct vertex order is [0, 1, 2, 6, 4, 3, 5]; x = [1, 2, 3, 7, 5, 4, 6] - assert sample["t0"]["x"].tolist() == [3, 4, 5, 6, 7, 8, 9] - - edge_index = sample[("t0", "knows", "t0")]["edge_index"] - assert list(edge_index.shape) == [2, 8] - - assert ( - edge_index[0].tolist() == bogus_samples.majors.dropna().values_host.tolist() - ) - assert ( - edge_index[1].tolist() == bogus_samples.minors.dropna().values_host.tolist() - ) - - assert num_samples == 100 - - -@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") -@pytest.mark.skipif(not HAS_TORCH_SPARSE, reason="torch-sparse not available") -def test_cugraph_loader_from_disk_subset_csr(): - m = [2, 9, 99, 82, 11, 13] - n = torch.arange(1, 1 + len(m), dtype=torch.int32) - x = torch.zeros(256, dtype=torch.int32) - x[torch.tensor(m, dtype=torch.int32)] = n - F = FeatureStore() - F.add_data(x, "t0", "x") - - G = {("t0", "knows", "t0"): 9080} - N = {"t0": 256} - - cugraph_store = CuGraphStore(F, G, N) - - bogus_samples = cudf.DataFrame( - { - "major_offsets": [0, 3, 5, 7, 8, None, None, None], - "minors": [1, 2, 3, 0, 3, 4, 5, 1], - "edge_type": cudf.Series([0, 0, 0, 0, 0, 0, 0, 0], dtype="int32"), - "edge_id": [5, 10, 15, 20, 25, 30, 35, 40], - "label_hop_offsets": cudf.Series( - [0, 1, 4, None, None, None, None, None], dtype="int32" - ), - "renumber_map_offsets": cudf.Series([0, 6], dtype="int32"), - } - ) - map = cudf.Series(m, name="map") - bogus_samples["map"] = map - - tempdir = tempfile.TemporaryDirectory() - for s in range(256): - # offset the offsets - bogus_samples["batch_id"] = cupy.int32(s) - bogus_samples.to_parquet(os.path.join(tempdir.name, f"batch={s}-{s}.parquet")) - - loader = BulkSampleLoader( - feature_store=cugraph_store, - graph_store=cugraph_store, - directory=tempdir, - input_files=list(os.listdir(tempdir.name))[100:200], - ) - - num_samples = 0 - for sample in loader: - num_samples += 1 - assert sample["t0"]["num_nodes"] == 6 - - assert sample["t0"]["x"].tolist() == [1, 2, 3, 4, 5, 6] - - edge_index = sample[("t0", "knows", "t0")]["adj_t"] - assert edge_index.size(0) == 4 - assert edge_index.size(1) == 6 - - colptr, row, _ = edge_index.csr() - - assert ( - colptr.tolist() == bogus_samples.major_offsets.dropna().values_host.tolist() - ) - assert row.tolist() == bogus_samples.minors.dropna().values_host.tolist() - - assert sample["t0"]["num_sampled_nodes"].tolist() == [1, 3, 2] - assert sample["t0", "knows", "t0"]["num_sampled_edges"].tolist() == [3, 5] - - assert num_samples == 100 - - -@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") -def test_cugraph_loader_e2e_coo(): - m = [2, 9, 99, 82, 9, 3, 18, 1, 12] - x = torch.randint(3000, (256, 256)).to(torch.float32) - F = FeatureStore() - F.add_data(x, "t0", "x") - - G = {("t0", "knows", "t0"): 9999} - N = {"t0": 256} - - cugraph_store = CuGraphStore(F, G, N, order="CSR") - - bogus_samples = cudf.DataFrame( - { - "majors": [0, 1, 2, 3, 4, 5, 6, 6], - "minors": [5, 4, 3, 2, 2, 6, 5, 2], - "edge_type": cudf.Series([0, 0, 0, 0, 0, 0, 0, 0], dtype="int32"), - "edge_id": [5, 10, 15, 20, 25, 30, 35, 40], - "hop_id": cudf.Series([0, 0, 0, 1, 1, 1, 2, 2], dtype="int32"), - } - ) - map = cudf.Series(m, name="map") - bogus_samples = bogus_samples.join(map, how="outer").sort_index() - - tempdir = tempfile.TemporaryDirectory() - for s in range(256): - bogus_samples["batch_id"] = cupy.int32(s) - bogus_samples.to_parquet(os.path.join(tempdir.name, f"batch={s}-{s}.parquet")) - - loader = BulkSampleLoader( - feature_store=cugraph_store, - graph_store=cugraph_store, - directory=tempdir, - input_files=list(os.listdir(tempdir.name))[100:200], - ) - - convs = [ - torch_geometric.nn.SAGEConv(256, 64, aggr="mean").cuda(), - torch_geometric.nn.SAGEConv(64, 8, aggr="mean").cuda(), - torch_geometric.nn.SAGEConv(8, 1, aggr="mean").cuda(), - ] - - trim = trim_to_layer.TrimToLayer() - relu = torch.nn.functional.relu - dropout = torch.nn.functional.dropout - - for hetero_data in loader: - ei = hetero_data["t0", "knows", "t0"]["edge_index"] - x = hetero_data["t0"]["x"].cuda() - num_sampled_nodes = hetero_data["t0"]["num_sampled_nodes"] - num_sampled_edges = hetero_data["t0", "knows", "t0"]["num_sampled_edges"] - - for i in range(len(convs)): - x, ei, _ = trim(i, num_sampled_nodes, num_sampled_edges, x, ei, None) - - s = x.shape[0] - - x = convs[i](x, ei, size=(s, s)) - x = relu(x) - x = dropout(x, p=0.5) - - x = x.narrow(dim=0, start=0, length=x.shape[0] - num_sampled_nodes[1]) - - assert list(x.shape) == [3, 1] - - -@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") -@pytest.mark.skipif(not HAS_TORCH_SPARSE, reason="torch-sparse not available") -@pytest.mark.parametrize("framework", ["pyg", "cugraph-ops"]) -def test_cugraph_loader_e2e_csc(framework: str): - m = [2, 9, 99, 82, 9, 3, 18, 1, 12] - x = torch.randint(3000, (256, 256)).to(torch.float32) - F = FeatureStore() - F.add_data(x, "t0", "x") - - G = {("t0", "knows", "t0"): 9999} - N = {"t0": 256} - - cugraph_store = CuGraphStore(F, G, N) - - bogus_samples = cudf.DataFrame( - { - "major_offsets": [0, 3, 5, 7, 8, None, None, None], - "minors": [1, 2, 3, 0, 3, 4, 5, 1], - "edge_type": cudf.Series([0, 0, 0, 0, 0, 0, 0, 0], dtype="int32"), - "edge_id": [5, 10, 15, 20, 25, 30, 35, 40], - "label_hop_offsets": cudf.Series( - [0, 1, 4, None, None, None, None, None], dtype="int32" - ), - "renumber_map_offsets": cudf.Series([0, 6], dtype="int32"), - } - ) - map = cudf.Series(m, name="map") - bogus_samples = bogus_samples.join(map, how="outer").sort_index() - - tempdir = tempfile.TemporaryDirectory() - for s in range(256): - bogus_samples["batch_id"] = cupy.int32(s) - bogus_samples.to_parquet(os.path.join(tempdir.name, f"batch={s}-{s}.parquet")) - - loader = BulkSampleLoader( - feature_store=cugraph_store, - graph_store=cugraph_store, - directory=tempdir, - input_files=list(os.listdir(tempdir.name))[100:200], - ) - - if framework == "pyg": - convs = [ - torch_geometric.nn.SAGEConv(256, 64, aggr="mean").cuda(), - torch_geometric.nn.SAGEConv(64, 1, aggr="mean").cuda(), - ] - else: - convs = [ - CuGraphSAGEConv(256, 64, aggr="mean").cuda(), - CuGraphSAGEConv(64, 1, aggr="mean").cuda(), - ] - - trim = trim_to_layer.TrimToLayer() - relu = torch.nn.functional.relu - dropout = torch.nn.functional.dropout - - for hetero_data in loader: - x = hetero_data["t0"]["x"].cuda() - - if framework == "pyg": - ei = hetero_data["t0", "knows", "t0"]["adj_t"].coo() - ei = torch.stack((ei[0], ei[1])) - else: - ei = hetero_data["t0", "knows", "t0"]["adj_t"].csr() - ei = [ei[1], ei[0], x.shape[0]] - - num_sampled_nodes = hetero_data["t0"]["num_sampled_nodes"] - num_sampled_edges = hetero_data["t0", "knows", "t0"]["num_sampled_edges"] - - s = x.shape[0] - for i in range(len(convs)): - if framework == "pyg": - x, ei, _ = trim(i, num_sampled_nodes, num_sampled_edges, x, ei, None) - else: - if i > 0: - x = x.narrow( - dim=0, - start=0, - length=s - num_sampled_nodes[-i], - ) - - ei[0] = ei[0].narrow( - dim=0, - start=0, - length=ei[0].size(0) - num_sampled_edges[-i], - ) - ei[1] = ei[1].narrow( - dim=0, start=0, length=ei[1].size(0) - num_sampled_nodes[-i] - ) - ei[2] = x.size(0) - - s = x.shape[0] - - if framework == "pyg": - x = convs[i](x, ei, size=(s, s)) - else: - x = convs[i](x, ei) - x = relu(x) - x = dropout(x, p=0.5) - - x = x.narrow(dim=0, start=0, length=s - num_sampled_nodes[1]) - - assert list(x.shape) == [1, 1] - - -@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") -@pytest.mark.parametrize("directory", ["local", "temp"]) -def test_load_directory( - karate_gnn: Tuple[ - FeatureStore, Dict[Tuple[str, str, str], np.ndarray], Dict[str, int] - ], - directory: str, -): - if directory == "local": - local_dir = tempfile.TemporaryDirectory(dir=".") - - cugraph_store = CuGraphStore(*karate_gnn) - cugraph_loader = CuGraphNeighborLoader( - (cugraph_store, cugraph_store), - torch.arange(8, dtype=torch.int64), - 2, - num_neighbors=[8, 4, 2], - random_state=62, - replace=False, - directory=None if directory == "temp" else local_dir.name, - batches_per_partition=1, - ) - - it = iter(cugraph_loader) - next_batch = next(it) - assert next_batch is not None - - if directory == "local": - assert len(os.listdir(local_dir.name)) == 4 - - count = 1 - while next(it, None) is not None: - count += 1 - - assert count == 4 diff --git a/python/cugraph-pyg/build/lib/cugraph_pyg/tests/test_cugraph_sampler.py b/python/cugraph-pyg/build/lib/cugraph_pyg/tests/test_cugraph_sampler.py deleted file mode 100644 index a6699dac..00000000 --- a/python/cugraph-pyg/build/lib/cugraph_pyg/tests/test_cugraph_sampler.py +++ /dev/null @@ -1,199 +0,0 @@ -# Copyright (c) 2022-2024, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import cudf -import cupy - -import pytest - -from cugraph_pyg.data import CuGraphStore -from cugraph_pyg.sampler.cugraph_sampler import ( - _sampler_output_from_sampling_results_heterogeneous, -) - -from cugraph.utilities.utils import import_optional, MissingModule -from cugraph import uniform_neighbor_sample - -torch = import_optional("torch") - - -@pytest.mark.cugraph_ops -@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") -def test_neighbor_sample(basic_graph_1): - F, G, N = basic_graph_1 - cugraph_store = CuGraphStore(F, G, N, order="CSR") - - batches = cudf.DataFrame( - { - "start": cudf.Series([0, 1, 2, 3, 4], dtype="int64"), - "batch": cudf.Series(cupy.zeros(5, dtype="int32")), - } - ) - - sampling_results = uniform_neighbor_sample( - cugraph_store._subgraph(), - batches, - fanout_vals=[-1], - with_replacement=False, - with_edge_properties=True, - with_batch_ids=True, - random_state=62, - return_offsets=False, - use_legacy_names=False, - ).sort_values(by=["majors", "minors"]) - - out = _sampler_output_from_sampling_results_heterogeneous( - sampling_results=sampling_results, - renumber_map=None, - graph_store=cugraph_store, - metadata=torch.arange(6, dtype=torch.int64), - ) - - noi_groups = out.node - row_dict = out.row - col_dict = out.col - metadata = out.metadata - - assert metadata.tolist() == list(range(6)) - - for node_type, node_ids in noi_groups.items(): - actual_vertex_ids = torch.arange(N[node_type]) - - assert sorted(node_ids.tolist()) == actual_vertex_ids.tolist() - - assert ( - row_dict[("vt1", "pig", "vt1")].tolist() == G[("vt1", "pig", "vt1")][0].tolist() - ) - assert ( - col_dict[("vt1", "pig", "vt1")].tolist() == G[("vt1", "pig", "vt1")][1].tolist() - ) - - # check the hop dictionaries - assert len(out.num_sampled_nodes) == 1 - assert out.num_sampled_nodes["vt1"].tolist() == [4, 1] - - assert len(out.num_sampled_edges) == 1 - assert out.num_sampled_edges[("vt1", "pig", "vt1")].tolist() == [6] - - -@pytest.mark.cugraph_ops -@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") -def test_neighbor_sample_multi_vertex(multi_edge_multi_vertex_graph_1): - F, G, N = multi_edge_multi_vertex_graph_1 - cugraph_store = CuGraphStore(F, G, N, order="CSR") - - batches = cudf.DataFrame( - { - "start": cudf.Series([0, 1, 2, 3, 4], dtype="int64"), - "batch": cudf.Series(cupy.zeros(5, dtype="int32")), - } - ) - - sampling_results = uniform_neighbor_sample( - cugraph_store._subgraph(), - batches, - fanout_vals=[-1], - with_replacement=False, - with_edge_properties=True, - random_state=62, - return_offsets=False, - with_batch_ids=True, - use_legacy_names=False, - ).sort_values(by=["majors", "minors"]) - - out = _sampler_output_from_sampling_results_heterogeneous( - sampling_results=sampling_results, - renumber_map=None, - graph_store=cugraph_store, - metadata=torch.arange(6, dtype=torch.int64), - ) - - noi_groups = out.node - row_dict = out.row - col_dict = out.col - metadata = out.metadata - - assert metadata.tolist() == list(range(6)) - - for node_type, node_ids in noi_groups.items(): - actual_vertex_ids = torch.arange(N[node_type]) - - assert node_ids.tolist() == sorted(actual_vertex_ids.tolist()) - - for edge_type, ei in G.items(): - assert sorted(row_dict[edge_type].tolist()) == sorted(ei[0].tolist()) - assert sorted(col_dict[edge_type].tolist()) == sorted(ei[1].tolist()) - - # check the hop dictionaries - assert len(out.num_sampled_nodes) == 2 - assert out.num_sampled_nodes["black"].tolist() == [2, 0] - assert out.num_sampled_nodes["brown"].tolist() == [3, 0] - - assert len(out.num_sampled_edges) == 5 - assert out.num_sampled_edges[("brown", "horse", "brown")].tolist() == [2] - assert out.num_sampled_edges[("brown", "tortoise", "black")].tolist() == [3] - assert out.num_sampled_edges[("brown", "mongoose", "black")].tolist() == [2] - assert out.num_sampled_edges[("black", "cow", "brown")].tolist() == [2] - assert out.num_sampled_edges[("black", "snake", "black")].tolist() == [1] - - -@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") -def test_neighbor_sample_mock_sampling_results(abc_graph): - F, G, N = abc_graph - - graph_store = CuGraphStore(F, G, N, order="CSR") - - # let 0, 1 be the start vertices, fanout = [2, 1, 2, 3] - mock_sampling_results = cudf.DataFrame( - { - "majors": cudf.Series([0, 0, 1, 2, 3, 3, 1, 3, 3, 3], dtype="int64"), - "minors": cudf.Series([2, 3, 3, 8, 1, 7, 3, 1, 5, 7], dtype="int64"), - "hop_id": cudf.Series([0, 0, 0, 1, 1, 1, 2, 3, 3, 3], dtype="int32"), - "edge_type": cudf.Series([0, 0, 0, 2, 1, 2, 0, 1, 2, 2], dtype="int32"), - } - ) - - out = _sampler_output_from_sampling_results_heterogeneous( - mock_sampling_results, None, graph_store, None - ) - - assert out.metadata is None - assert len(out.node) == 3 - assert out.node["A"].tolist() == [0, 1] - assert out.node["B"].tolist() == [0, 1] - assert out.node["C"].tolist() == [3, 2, 0] - - assert len(out.row) == 3 - assert len(out.col) == 3 - assert out.row[("A", "ab", "B")].tolist() == [0, 0, 1, 1] - assert out.col[("A", "ab", "B")].tolist() == [0, 1, 1, 1] - assert out.row[("B", "bc", "C")].tolist() == [0, 1, 1, 1] - assert out.col[("B", "bc", "C")].tolist() == [0, 1, 2, 1] - assert out.row[("B", "ba", "A")].tolist() == [1, 1] - assert out.col[("B", "ba", "A")].tolist() == [1, 1] - - assert len(out.num_sampled_nodes) == 3 - assert out.num_sampled_nodes["A"].tolist() == [2, 0, 0, 0, 0] - assert out.num_sampled_nodes["B"].tolist() == [0, 2, 0, 0, 0] - assert out.num_sampled_nodes["C"].tolist() == [0, 0, 2, 0, 1] - - assert len(out.num_sampled_edges) == 3 - assert out.num_sampled_edges[("A", "ab", "B")].tolist() == [3, 0, 1, 0] - assert out.num_sampled_edges[("B", "ba", "A")].tolist() == [0, 1, 0, 1] - assert out.num_sampled_edges[("B", "bc", "C")].tolist() == [0, 2, 0, 2] - - -@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") -@pytest.mark.skip("needs to be written") -def test_neighbor_sample_renumbered(): - pass diff --git a/python/cugraph-pyg/build/lib/cugraph_pyg/tests/test_cugraph_store.py b/python/cugraph-pyg/build/lib/cugraph_pyg/tests/test_cugraph_store.py deleted file mode 100644 index 3e684695..00000000 --- a/python/cugraph-pyg/build/lib/cugraph_pyg/tests/test_cugraph_store.py +++ /dev/null @@ -1,397 +0,0 @@ -# Copyright (c) 2022-2024, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import cugraph -from cugraph_pyg.data.cugraph_store import ( - CuGraphTensorAttr, - CuGraphEdgeAttr, - EdgeLayout, -) -from cugraph_pyg.data import CuGraphStore - -import cudf -import cupy -import numpy as np - -from cugraph.utilities.utils import import_optional, MissingModule - -import pytest - - -torch = import_optional("torch") -torch_geometric = import_optional("torch_geometric") - - -@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") -def test_tensor_attr(): - ta = CuGraphTensorAttr("group0", "property1") - assert not ta.is_fully_specified() - assert not ta.is_set("index") - - ta.fully_specify() - assert ta.is_fully_specified() - - other_ta = CuGraphTensorAttr(index=[1, 2, 3]) - ta.update(other_ta) - assert ta.index == [1, 2, 3] - - casted_ta1 = CuGraphTensorAttr.cast(ta) - assert casted_ta1 == ta - - casted_ta2 = CuGraphTensorAttr.cast(index=[1, 2, 3]) - assert casted_ta2.index == [1, 2, 3] - assert not casted_ta2.is_fully_specified() - - casted_ta3 = CuGraphTensorAttr.cast( - "group2", - "property2", - [1, 2, 3], - ) - assert casted_ta3.group_name == "group2" - assert casted_ta3.attr_name == "property2" - assert casted_ta3.index == [1, 2, 3] - - -@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") -def test_edge_attr(): - ea = CuGraphEdgeAttr("type0", EdgeLayout.COO, False, 10) - assert ea.edge_type == "type0" - assert ea.layout == EdgeLayout.COO - assert not ea.is_sorted - assert ea.size == 10 - - ea = CuGraphEdgeAttr(edge_type="type1", layout="csr", is_sorted=True) - assert ea.size is None - - ea = CuGraphEdgeAttr.cast("type0", EdgeLayout.COO, False, 10) - assert ea.edge_type == "type0" - assert ea.layout == EdgeLayout.COO - assert not ea.is_sorted - assert ea.size == 10 - - -@pytest.fixture( - params=[ - "basic_graph_1", - "multi_edge_graph_1", - "multi_edge_multi_vertex_graph_1", - ] -) -def graph(request): - return request.getfixturevalue(request.param) - - -@pytest.fixture(params=["basic_graph_1", "multi_edge_graph_1"]) -def single_vertex_graph(request): - return request.getfixturevalue(request.param) - - -@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") -@pytest.mark.parametrize("edge_index_type", ["numpy", "torch-cpu", "torch-gpu", "cudf"]) -def test_get_edge_index(graph, edge_index_type): - F, G, N = graph - if "torch" in edge_index_type: - if edge_index_type == "torch-cpu": - device = "cpu" - else: - device = "cuda" - for et in list(G.keys()): - G[et][0] = torch.as_tensor(G[et][0], device=device) - G[et][1] = torch.as_tensor(G[et][1], device=device) - elif edge_index_type == "cudf": - for et in list(G.keys()): - G[et][0] = cudf.Series(G[et][0]) - G[et][1] = cudf.Series(G[et][1]) - - cugraph_store = CuGraphStore(F, G, N) - - for pyg_can_edge_type in G: - src, dst = cugraph_store.get_edge_index( - edge_type=pyg_can_edge_type, layout="coo", is_sorted=False - ) - - if edge_index_type == "cudf": - assert G[pyg_can_edge_type][0].values_host.tolist() == src.tolist() - assert G[pyg_can_edge_type][1].values_host.tolist() == dst.tolist() - else: - assert G[pyg_can_edge_type][0].tolist() == src.tolist() - assert G[pyg_can_edge_type][1].tolist() == dst.tolist() - - -@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") -def test_edge_types(graph): - F, G, N = graph - cugraph_store = CuGraphStore(F, G, N) - - eta = cugraph_store._edge_types_to_attrs - assert eta.keys() == G.keys() - - for attr_name, attr_repr in eta.items(): - src_size = N[attr_name[0]] - dst_size = N[attr_name[-1]] - assert src_size == attr_repr.size[0] - assert dst_size == attr_repr.size[-1] - assert attr_name == attr_repr.edge_type - - -@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") -def test_get_subgraph(graph): - F, G, N = graph - cugraph_store = CuGraphStore(F, G, N) - - if len(G.keys()) > 1: - for edge_type in G.keys(): - # Subgraphing is not implemented yet and should raise an error - with pytest.raises(ValueError): - sg = cugraph_store._subgraph([edge_type]) - - sg = cugraph_store._subgraph(list(G.keys())) - assert isinstance(sg, cugraph.MultiGraph) - - num_edges = sum([len(v[0]) for v in G.values()]) - assert sg.number_of_edges() == num_edges - - -@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") -def test_renumber_vertices_basic(single_vertex_graph): - F, G, N = single_vertex_graph - cugraph_store = CuGraphStore(F, G, N) - - nodes_of_interest = torch.as_tensor( - cupy.random.randint(0, sum(N.values()), 3), device="cuda" - ) - - index = cugraph_store._get_vertex_groups_from_sample(nodes_of_interest) - assert index["vt1"].tolist() == nodes_of_interest.tolist() - - -@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") -def test_renumber_vertices_multi_edge_multi_vertex(multi_edge_multi_vertex_graph_1): - F, G, N = multi_edge_multi_vertex_graph_1 - cugraph_store = CuGraphStore(F, G, N) - - nodes_of_interest = torch.as_tensor( - cupy.random.randint(0, sum(N.values()), 3), device="cuda" - ).unique() - - index = cugraph_store._get_vertex_groups_from_sample(nodes_of_interest) - - black_nodes = nodes_of_interest[nodes_of_interest <= 1] - brown_nodes = nodes_of_interest[nodes_of_interest > 1] - 2 - - if len(black_nodes) > 0: - assert index["black"].tolist() == sorted(black_nodes.tolist()) - if len(brown_nodes) > 0: - assert index["brown"].tolist() == sorted(brown_nodes.tolist()) - - -@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") -def test_renumber_edges(abc_graph): - F, G, N = abc_graph - - graph_store = CuGraphStore(F, G, N, order="CSR") - - # let 0, 1 be the start vertices, fanout = [2, 1, 2, 3] - mock_sampling_results = cudf.DataFrame( - { - "majors": cudf.Series([0, 0, 1, 2, 3, 3, 1, 3, 3, 3], dtype="int64"), - "minors": cudf.Series([2, 3, 3, 8, 1, 7, 3, 1, 5, 7], dtype="int64"), - "hop_id": cudf.Series([0, 0, 0, 1, 1, 1, 2, 3, 3, 3], dtype="int32"), - "edge_type": cudf.Series([0, 0, 0, 2, 1, 2, 0, 1, 2, 2], dtype="int32"), - } - ) - - mock_noi_index = { - "A": torch.tensor([0, 1], device="cuda"), - "B": torch.tensor([0, 1], device="cuda"), - "C": torch.tensor([3, 2, 0], device="cuda"), - } - - row_dict, col_dict = graph_store._get_renumbered_edge_groups_from_sample( - mock_sampling_results, mock_noi_index - ) - - assert len(row_dict) == 3 - assert len(col_dict) == 3 - assert row_dict[("A", "ab", "B")].tolist() == [0, 0, 1, 1] - assert col_dict[("A", "ab", "B")].tolist() == [0, 1, 1, 1] - assert row_dict[("B", "bc", "C")].tolist() == [0, 1, 1, 1] - assert col_dict[("B", "bc", "C")].tolist() == [0, 1, 2, 1] - assert row_dict[("B", "ba", "A")].tolist() == [1, 1] - assert col_dict[("B", "ba", "A")].tolist() == [1, 1] - - -@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") -def test_get_tensor(graph): - F, G, N = graph - cugraph_store = CuGraphStore(F, G, N) - - for feature_name, feature_on_types in F.get_feature_list().items(): - for type_name in feature_on_types: - v_ids = np.arange(N[type_name]) - base_series = F.get_data( - v_ids, - type_name=type_name, - feat_name=feature_name, - ).tolist() - - tsr = cugraph_store.get_tensor( - type_name, feature_name, v_ids, None, cupy.int64 - ).tolist() - - assert tsr == base_series - - -@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") -def test_get_tensor_empty_idx(karate_gnn): - F, G, N = karate_gnn - cugraph_store = CuGraphStore(F, G, N) - - t = cugraph_store.get_tensor( - CuGraphTensorAttr(group_name="type0", attr_name="prop0", index=None) - ) - assert t.tolist() == (torch.arange(17, dtype=torch.float32) * 31).tolist() - - -@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") -def test_multi_get_tensor(graph): - F, G, N = graph - cugraph_store = CuGraphStore(F, G, N) - - for vertex_type in sorted(N.keys()): - v_ids = np.arange(N[vertex_type]) - feat_names = list(F.get_feature_list().keys()) - base_series = None - for feat_name in feat_names: - if base_series is None: - base_series = F.get_data(v_ids, vertex_type, feat_name) - else: - base_series = np.stack( - [base_series, F.get_data(v_ids, vertex_type, feat_name)] - ) - - tsr = cugraph_store.multi_get_tensor( - [ - CuGraphTensorAttr(vertex_type, feat_name, v_ids) - for feat_name in feat_names - ] - ) - - assert torch.stack(tsr).tolist() == base_series.tolist() - - -@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") -def test_get_all_tensor_attrs(graph): - F, G, N = graph - cugraph_store = CuGraphStore(F, G, N) - - tensor_attrs = [] - for vertex_type in sorted(N.keys()): - for prop in ["prop1", "prop2"]: - tensor_attrs.append( - CuGraphTensorAttr( - vertex_type, - prop, - properties=None, - dtype=F.get_data([0], vertex_type, "prop1").dtype, - ) - ) - - for t in tensor_attrs: - print(t) - - print("\n\n") - - for t in cugraph_store.get_all_tensor_attrs(): - print(t) - - assert sorted(tensor_attrs, key=lambda a: (a.group_name, a.attr_name)) == sorted( - cugraph_store.get_all_tensor_attrs(), key=lambda a: (a.group_name, a.attr_name) - ) - - -@pytest.mark.skip("not implemented") -def test_get_tensor_spec_props(graph): - raise NotImplementedError("not implemented") - - -@pytest.mark.skip("not implemented") -def test_multi_get_tensor_spec_props(multi_edge_multi_vertex_graph_1): - raise NotImplementedError("not implemented") - - -@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") -def test_get_tensor_from_tensor_attrs(graph): - F, G, N = graph - cugraph_store = CuGraphStore(F, G, N) - - tensor_attrs = cugraph_store.get_all_tensor_attrs() - for tensor_attr in tensor_attrs: - v_ids = np.arange(N[tensor_attr.group_name]) - data = F.get_data(v_ids, tensor_attr.group_name, tensor_attr.attr_name) - - tensor_attr.index = v_ids - assert cugraph_store.get_tensor(tensor_attr).tolist() == data.tolist() - - -@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") -def test_get_tensor_size(graph): - F, G, N = graph - cugraph_store = CuGraphStore(F, G, N) - - tensor_attrs = cugraph_store.get_all_tensor_attrs() - for tensor_attr in tensor_attrs: - sz = N[tensor_attr.group_name] - - tensor_attr.index = np.arange(sz) - assert cugraph_store.get_tensor_size(tensor_attr) == torch.Size((sz,)) - - -@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") -@pytest.mark.skipif( - isinstance(torch_geometric, MissingModule), reason="pyg not available" -) -def test_get_input_nodes(karate_gnn): - F, G, N = karate_gnn - cugraph_store = CuGraphStore(F, G, N) - - node_type, input_nodes = torch_geometric.loader.utils.get_input_nodes( - (cugraph_store, cugraph_store), "type0" - ) - - assert node_type == "type0" - assert input_nodes.tolist() == torch.arange(17, dtype=torch.int32).tolist() - - -def test_serialize(multi_edge_multi_vertex_no_graph_1): - import pickle - - F, G, N = multi_edge_multi_vertex_no_graph_1 - cugraph_store = CuGraphStore(F, G, N) - - cugraph_store_copy = pickle.loads(pickle.dumps(cugraph_store)) - - for tensor_attr in cugraph_store.get_all_tensor_attrs(): - sz = cugraph_store.get_tensor_size(tensor_attr)[0] - tensor_attr.index = np.arange(sz) - assert ( - cugraph_store.get_tensor(tensor_attr).tolist() - == cugraph_store_copy.get_tensor(tensor_attr).tolist() - ) - - # Currently does not store edgelist properly for SG - """ - for edge_attr in cugraph_store.get_all_edge_attrs(): - assert cugraph_store.get_edge_index(edge_attr) \ - == cugraph_store_copy.get_edge_index(edge_attr) - """