diff --git a/python/cugraph-dgl/cugraph_dgl/nn/conv/base.py b/python/cugraph-dgl/cugraph_dgl/nn/conv/base.py index 9600dca..7401fa7 100644 --- a/python/cugraph-dgl/cugraph_dgl/nn/conv/base.py +++ b/python/cugraph-dgl/cugraph_dgl/nn/conv/base.py @@ -277,112 +277,3 @@ def to(self, device: Union[torch.device, str, int]) -> "cugraph_dgl.nn.SparseGra ) return sg - - -def conditional_class(import_name): - def decorator(cls): - try: - __import__(import_name) - return cls - except ImportError: - return None - - return decorator - - -@conditional_class("pylibcugraphops") -class BaseConv(torch.nn.Module): - r"""An abstract base class for cugraph-ops nn module.""" - - def __init__(self): - super().__init__() - - def reset_parameters(self): - r"""Resets all learnable parameters of the module.""" - raise NotImplementedError - - def forward(self, *args): - r"""Runs the forward pass of the module.""" - raise NotImplementedError - - def get_cugraph_ops_CSC( - self, - g: Union[SparseGraph, dgl.DGLHeteroGraph], - is_bipartite: bool = False, - max_in_degree: Optional[int] = None, - ) -> "ops_torch.CSC": - """Create CSC structure needed by cugraph-ops.""" - - if not isinstance(g, (SparseGraph, dgl.DGLHeteroGraph)): - raise TypeError( - f"The graph has to be either a 'cugraph_dgl.nn.SparseGraph' or " - f"'dgl.DGLHeteroGraph', but got '{type(g)}'." - ) - - # TODO: max_in_degree should default to None in pylibcugraphops - if max_in_degree is None: - max_in_degree = -1 - - if isinstance(g, SparseGraph): - offsets, indices, _ = g.csc() - else: - offsets, indices, _ = g.adj_tensors("csc") - - graph = ops_torch.CSC( - offsets=offsets, - indices=indices, - num_src_nodes=g.num_src_nodes(), - dst_max_in_degree=max_in_degree, - is_bipartite=is_bipartite, - ) - - return graph - - def get_cugraph_ops_HeteroCSC( - self, - g: Union[SparseGraph, dgl.DGLHeteroGraph], - num_edge_types: int, - etypes: Optional[torch.Tensor] = None, - is_bipartite: bool = False, - max_in_degree: Optional[int] = None, - ) -> "ops_torch.HeteroCSC": - """Create HeteroCSC structure needed by cugraph-ops.""" - - if not isinstance(g, (SparseGraph, dgl.DGLHeteroGraph)): - raise TypeError( - f"The graph has to be either a 'cugraph_dgl.nn.SparseGraph' or " - f"'dgl.DGLHeteroGraph', but got '{type(g)}'." - ) - - # TODO: max_in_degree should default to None in pylibcugraphops - if max_in_degree is None: - max_in_degree = -1 - - if isinstance(g, SparseGraph): - offsets, indices, etypes = g.csc() - if etypes is None: - raise ValueError( - "SparseGraph must have 'values' to create HeteroCSC. " - "Pass in edge types as 'values' when creating the SparseGraph." - ) - etypes = etypes.int() - else: - if etypes is None: - raise ValueError( - "'etypes' is required when creating HeteroCSC " - "from dgl.DGLHeteroGraph." - ) - offsets, indices, perm = g.adj_tensors("csc") - etypes = etypes[perm].int() - - graph = ops_torch.HeteroCSC( - offsets=offsets, - indices=indices, - edge_types=etypes, - num_src_nodes=g.num_src_nodes(), - num_edge_types=num_edge_types, - dst_max_in_degree=max_in_degree, - is_bipartite=is_bipartite, - ) - - return graph diff --git a/python/cugraph-dgl/cugraph_dgl/nn/conv/gatconv.py b/python/cugraph-dgl/cugraph_dgl/nn/conv/gatconv.py deleted file mode 100644 index e881327..0000000 --- a/python/cugraph-dgl/cugraph_dgl/nn/conv/gatconv.py +++ /dev/null @@ -1,314 +0,0 @@ -# Copyright (c) 2023-2024, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from typing import Optional, Union - -from cugraph_dgl.nn.conv.base import BaseConv, SparseGraph -from cugraph.utilities.utils import import_optional - -dgl = import_optional("dgl") -torch = import_optional("torch") -nn = import_optional("torch.nn") -ops_torch = import_optional("pylibcugraphops.pytorch") - - -class GATConv(BaseConv): - r"""Graph attention layer from `Graph Attention Network - `__, with the sparse aggregation - accelerated by cugraph-ops. - - Parameters - ---------- - in_feats : int or (int, int) - Input feature size. A pair denotes feature sizes of source and - destination nodes. - out_feats : int - Output feature size. - num_heads : int - Number of heads in multi-head attention. - feat_drop : float, optional - Dropout rate on feature. Defaults: ``0``. - concat : bool, optional - If False, the multi-head attentions are averaged instead of concatenated. - Default: ``True``. - edge_feats : int, optional - Edge feature size. Default: ``None``. - negative_slope : float, optional - LeakyReLU angle of negative slope. Defaults: ``0.2``. - residual : bool, optional - If True, use residual connection. Defaults: ``False``. - allow_zero_in_degree : bool, optional - If there are 0-in-degree nodes in the graph, output for those nodes will - be invalid since no message will be passed to those nodes. This is - harmful for some applications causing silent performance regression. - This module will raise a DGLError if it detects 0-in-degree nodes in - input graph. By setting ``True``, it will suppress the check and let the - users handle it by themselves. Defaults: ``False``. - bias : bool, optional - If True, learns a bias term. Defaults: ``True``. - - Examples - -------- - >>> import dgl - >>> import torch - >>> from cugraph_dgl.nn import GATConv - ... - >>> device = 'cuda' - >>> g = dgl.graph(([0,1,2,3,2,5], [1,2,3,4,0,3])).to(device) - >>> g = dgl.add_self_loop(g) - >>> feat = torch.ones(6, 10).to(device) - >>> conv = GATConv(10, 2, num_heads=3).to(device) - >>> res = conv(g, feat) - >>> res - tensor([[[ 0.2340, 1.9226], - [ 1.6477, -1.9986], - [ 1.1138, -1.9302]], - [[ 0.2340, 1.9226], - [ 1.6477, -1.9986], - [ 1.1138, -1.9302]], - [[ 0.2340, 1.9226], - [ 1.6477, -1.9986], - [ 1.1138, -1.9302]], - [[ 0.2340, 1.9226], - [ 1.6477, -1.9986], - [ 1.1138, -1.9302]], - [[ 0.2340, 1.9226], - [ 1.6477, -1.9986], - [ 1.1138, -1.9302]], - [[ 0.2340, 1.9226], - [ 1.6477, -1.9986], - [ 1.1138, -1.9302]]], device='cuda:0', grad_fn=) - """ - - def __init__( - self, - in_feats: Union[int, tuple[int, int]], - out_feats: int, - num_heads: int, - feat_drop: float = 0.0, - concat: bool = True, - edge_feats: Optional[int] = None, - negative_slope: float = 0.2, - residual: bool = False, - allow_zero_in_degree: bool = False, - bias: bool = True, - ): - super().__init__() - - if isinstance(in_feats, int): - self.in_feats_src = self.in_feats_dst = in_feats - else: - self.in_feats_src, self.in_feats_dst = in_feats - self.in_feats = in_feats - self.out_feats = out_feats - self.num_heads = num_heads - self.feat_drop = nn.Dropout(feat_drop) - self.concat = concat - self.edge_feats = edge_feats - self.negative_slope = negative_slope - self.residual = residual - self.allow_zero_in_degree = allow_zero_in_degree - - if isinstance(in_feats, int): - self.lin = nn.Linear(in_feats, num_heads * out_feats, bias=False) - else: - self.lin_src = nn.Linear( - self.in_feats_src, num_heads * out_feats, bias=False - ) - self.lin_dst = nn.Linear( - self.in_feats_dst, num_heads * out_feats, bias=False - ) - - if edge_feats is not None: - self.lin_edge = nn.Linear(edge_feats, num_heads * out_feats, bias=False) - self.attn_weights = nn.Parameter(torch.empty(3 * num_heads * out_feats)) - else: - self.register_parameter("lin_edge", None) - self.attn_weights = nn.Parameter(torch.empty(2 * num_heads * out_feats)) - - out_dim = num_heads * out_feats if concat else out_feats - if residual: - if self.in_feats_dst != out_dim: - self.lin_res = nn.Linear(self.in_feats_dst, out_dim, bias=bias) - else: - self.lin_res = nn.Identity() - else: - self.register_buffer("lin_res", None) - - if bias and not isinstance(self.lin_res, nn.Linear): - if concat: - self.bias = nn.Parameter(torch.empty(num_heads, out_feats)) - else: - self.bias = nn.Parameter(torch.empty(out_feats)) - else: - self.register_buffer("bias", None) - - self.reset_parameters() - - def set_allow_zero_in_degree(self, set_value): - r"""Set allow_zero_in_degree flag.""" - self.allow_zero_in_degree = set_value - - def reset_parameters(self): - r"""Reinitialize learnable parameters.""" - gain = nn.init.calculate_gain("relu") - if hasattr(self, "lin"): - nn.init.xavier_normal_(self.lin.weight, gain=gain) - else: - nn.init.xavier_normal_(self.lin_src.weight, gain=gain) - nn.init.xavier_normal_(self.lin_dst.weight, gain=gain) - - nn.init.xavier_normal_( - self.attn_weights.view(-1, self.num_heads, self.out_feats), gain=gain - ) - if self.lin_edge is not None: - self.lin_edge.reset_parameters() - - if self.lin_res is not None: - self.lin_res.reset_parameters() - - if self.bias is not None: - nn.init.zeros_(self.bias) - - def forward( - self, - g: Union[SparseGraph, dgl.DGLHeteroGraph], - nfeat: Union[torch.Tensor, tuple[torch.Tensor, torch.Tensor]], - efeat: Optional[torch.Tensor] = None, - max_in_degree: Optional[int] = None, - deterministic_dgrad: bool = False, - deterministic_wgrad: bool = False, - high_precision_dgrad: bool = False, - high_precision_wgrad: bool = False, - ) -> torch.Tensor: - r"""Forward computation. - - Parameters - ---------- - graph : DGLGraph or SparseGraph - The graph. - nfeat : torch.Tensor or (torch.Tensor, torch.Tensor) - Node features. If given as a tuple, the two elements correspond to - the source and destination node features, respectively, in a - bipartite graph. - efeat: torch.Tensor, optional - Optional edge features. - max_in_degree : int - Maximum in-degree of destination nodes. When :attr:`g` is generated - from a neighbor sampler, the value should be set to the corresponding - :attr:`fanout`. This option is used to invoke the MFG-variant of - cugraph-ops kernel. - deterministic_dgrad : bool, default=False - Optional flag indicating whether the feature gradients - are computed deterministically using a dedicated workspace buffer. - deterministic_wgrad: bool, default=False - Optional flag indicating whether the weight gradients - are computed deterministically using a dedicated workspace buffer. - high_precision_dgrad: bool, default=False - Optional flag indicating whether gradients for inputs in half precision - are kept in single precision as long as possible and only casted to - the corresponding input type at the very end. - high_precision_wgrad: bool, default=False - Optional flag indicating whether gradients for weights in half precision - are kept in single precision as long as possible and only casted to - the corresponding input type at the very end. - - Returns - ------- - torch.Tensor - The output feature of shape :math:`(N, H, D_{out})` where - :math:`H` is the number of heads, and :math:`D_{out}` is size of - output feature. - """ - if isinstance(g, dgl.DGLHeteroGraph): - if not self.allow_zero_in_degree: - if (g.in_degrees() == 0).any(): - raise dgl.base.DGLError( - "There are 0-in-degree nodes in the graph, " - "output for those nodes will be invalid. " - "This is harmful for some applications, " - "causing silent performance regression. " - "Adding self-loop on the input graph by " - "calling `g = dgl.add_self_loop(g)` will resolve " - "the issue. Setting ``allow_zero_in_degree`` " - "to be `True` when constructing this module will " - "suppress the check and let the code run." - ) - - bipartite = isinstance(nfeat, (list, tuple)) - - _graph = self.get_cugraph_ops_CSC( - g, is_bipartite=bipartite, max_in_degree=max_in_degree - ) - if deterministic_dgrad: - _graph.add_reverse_graph() - - if bipartite: - nfeat = (self.feat_drop(nfeat[0]), self.feat_drop(nfeat[1])) - nfeat_dst_orig = nfeat[1] - else: - nfeat = self.feat_drop(nfeat) - nfeat_dst_orig = nfeat[: g.num_dst_nodes()] - - if efeat is not None: - if self.lin_edge is None: - raise RuntimeError( - f"{self.__class__.__name__}.edge_feats must be set to " - f"accept edge features." - ) - efeat = self.lin_edge(efeat) - - if bipartite: - if not hasattr(self, "lin_src"): - nfeat_src = self.lin(nfeat[0]) - nfeat_dst = self.lin(nfeat[1]) - else: - nfeat_src = self.lin_src(nfeat[0]) - nfeat_dst = self.lin_dst(nfeat[1]) - else: - if not hasattr(self, "lin"): - raise RuntimeError( - f"{self.__class__.__name__}.in_feats is expected to be an " - f"integer when the graph is not bipartite, " - f"but got {self.in_feats}." - ) - nfeat = self.lin(nfeat) - - out = ops_torch.operators.mha_gat_n2n( - (nfeat_src, nfeat_dst) if bipartite else nfeat, - self.attn_weights, - _graph, - num_heads=self.num_heads, - activation="LeakyReLU", - negative_slope=self.negative_slope, - concat_heads=self.concat, - edge_feat=efeat, - deterministic_dgrad=deterministic_dgrad, - deterministic_wgrad=deterministic_wgrad, - high_precision_dgrad=high_precision_dgrad, - high_precision_wgrad=high_precision_wgrad, - )[: g.num_dst_nodes()] - - if self.concat: - out = out.view(-1, self.num_heads, self.out_feats) - - if self.residual: - res = self.lin_res(nfeat_dst_orig).view(-1, self.num_heads, self.out_feats) - if not self.concat: - res = res.mean(dim=1) - out = out + res - - if self.bias is not None: - out = out + self.bias - - return out diff --git a/python/cugraph-dgl/cugraph_dgl/nn/conv/gatv2conv.py b/python/cugraph-dgl/cugraph_dgl/nn/conv/gatv2conv.py deleted file mode 100644 index 4f47005..0000000 --- a/python/cugraph-dgl/cugraph_dgl/nn/conv/gatv2conv.py +++ /dev/null @@ -1,254 +0,0 @@ -# Copyright (c) 2023-2024, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from typing import Optional, Union - -from cugraph_dgl.nn.conv.base import BaseConv, SparseGraph -from cugraph.utilities.utils import import_optional - -dgl = import_optional("dgl") -torch = import_optional("torch") -nn = import_optional("torch.nn") -ops_torch = import_optional("pylibcugraphops.pytorch") - - -class GATv2Conv(BaseConv): - r"""GATv2 from `How Attentive are Graph Attention Networks? - `__, with the sparse aggregation - accelerated by cugraph-ops. - - Parameters - ---------- - in_feats : int or (int, int) - Input feature size. A pair denotes feature sizes of source and - destination nodes. - out_feats : int - Output feature size. - num_heads : int - Number of heads in Multi-Head Attention. - feat_drop : float, optional - Dropout rate on feature. Defaults: ``0``. - concat : bool, optional - If False, the multi-head attentions are averaged instead of concatenated. - Default: ``True``. - edge_feats : int, optional - Edge feature size. Default: ``None``. - negative_slope : float, optional - LeakyReLU angle of negative slope. Defaults: ``0.2``. - residual : bool, optional - If True, use residual connection. Defaults: ``False``. - allow_zero_in_degree : bool, optional - If there are 0-in-degree nodes in the graph, output for those nodes will - be invalid since no message will be passed to those nodes. This is - harmful for some applications causing silent performance regression. - This module will raise a DGLError if it detects 0-in-degree nodes in - input graph. By setting ``True``, it will suppress the check and let the - users handle it by themselves. Defaults: ``False``. - bias : bool, optional - If True, learns a bias term. Defaults: ``True``. - share_weights : bool, optional - If ``True``, the same matrix will be applied to the source and the - destination node features. Defaults: ``False``. - """ - - def __init__( - self, - in_feats: Union[int, tuple[int, int]], - out_feats: int, - num_heads: int, - feat_drop: float = 0.0, - concat: bool = True, - edge_feats: Optional[int] = None, - negative_slope: float = 0.2, - residual: bool = False, - allow_zero_in_degree: bool = False, - bias: bool = True, - share_weights: bool = False, - ): - super().__init__() - - if isinstance(in_feats, int): - self.in_feats_src = self.in_feats_dst = in_feats - else: - self.in_feats_src, self.in_feats_dst = in_feats - self.in_feats = in_feats - self.out_feats = out_feats - self.num_heads = num_heads - self.feat_drop = nn.Dropout(feat_drop) - self.concat = concat - self.edge_feats = edge_feats - self.negative_slope = negative_slope - self.residual = residual - self.allow_zero_in_degree = allow_zero_in_degree - self.share_weights = share_weights - self.bias = bias - - self.lin_src = nn.Linear(self.in_feats_src, num_heads * out_feats, bias=bias) - if share_weights: - if self.in_feats_src != self.in_feats_dst: - raise ValueError( - f"Input feature size of source and destination " - f"nodes must be identical when share_weights is enabled, " - f"but got {self.in_feats_src} and {self.in_feats_dst}." - ) - self.lin_dst = self.lin_src - else: - self.lin_dst = nn.Linear( - self.in_feats_dst, num_heads * out_feats, bias=bias - ) - - self.attn_weights = nn.Parameter(torch.empty(num_heads * out_feats)) - - if edge_feats is not None: - self.lin_edge = nn.Linear(edge_feats, num_heads * out_feats, bias=False) - else: - self.register_parameter("lin_edge", None) - - out_dim = num_heads * out_feats if concat else out_feats - if residual: - if self.in_feats_dst != out_dim: - self.lin_res = nn.Linear(self.in_feats_dst, out_dim, bias=bias) - else: - self.lin_res = nn.Identity() - else: - self.register_buffer("lin_res", None) - - self.reset_parameters() - - def set_allow_zero_in_degree(self, set_value): - r"""Set allow_zero_in_degree flag.""" - self.allow_zero_in_degree = set_value - - def reset_parameters(self): - r"""Reinitialize learnable parameters.""" - gain = nn.init.calculate_gain("relu") - nn.init.xavier_normal_(self.lin_src.weight, gain=gain) - nn.init.xavier_normal_(self.lin_dst.weight, gain=gain) - - nn.init.xavier_normal_( - self.attn_weights.view(-1, self.num_heads, self.out_feats), gain=gain - ) - if self.lin_edge is not None: - self.lin_edge.reset_parameters() - - if self.lin_res is not None: - self.lin_res.reset_parameters() - - def forward( - self, - g: Union[SparseGraph, dgl.DGLHeteroGraph], - nfeat: Union[torch.Tensor, tuple[torch.Tensor, torch.Tensor]], - efeat: Optional[torch.Tensor] = None, - max_in_degree: Optional[int] = None, - deterministic_dgrad: bool = False, - deterministic_wgrad: bool = False, - ) -> torch.Tensor: - r"""Forward computation. - - Parameters - ---------- - graph : DGLGraph or SparseGraph - The graph. - nfeat : torch.Tensor - Input features of shape :math:`(N, D_{in})`. - efeat: torch.Tensor, optional - Optional edge features. - max_in_degree : int - Maximum in-degree of destination nodes. When :attr:`g` is generated - from a neighbor sampler, the value should be set to the corresponding - :attr:`fanout`. This option is used to invoke the MFG-variant of - cugraph-ops kernel. - deterministic_dgrad : bool, default=False - Optional flag indicating whether the feature gradients - are computed deterministically using a dedicated workspace buffer. - deterministic_wgrad: bool, default=False - Optional flag indicating whether the weight gradients - are computed deterministically using a dedicated workspace buffer. - - Returns - ------- - torch.Tensor - The output feature of shape :math:`(N, H, D_{out})` where - :math:`H` is the number of heads, and :math:`D_{out}` is size of - output feature. - """ - - if isinstance(g, dgl.DGLHeteroGraph): - if not self.allow_zero_in_degree: - if (g.in_degrees() == 0).any(): - raise dgl.base.DGLError( - "There are 0-in-degree nodes in the graph, " - "output for those nodes will be invalid. " - "This is harmful for some applications, " - "causing silent performance regression. " - "Adding self-loop on the input graph by " - "calling `g = dgl.add_self_loop(g)` will resolve " - "the issue. Setting ``allow_zero_in_degree`` " - "to be `True` when constructing this module will " - "suppress the check and let the code run." - ) - - nfeat_bipartite = isinstance(nfeat, (list, tuple)) - graph_bipartite = nfeat_bipartite or self.share_weights is False - - _graph = self.get_cugraph_ops_CSC( - g, is_bipartite=graph_bipartite, max_in_degree=max_in_degree - ) - if deterministic_dgrad: - _graph.add_reverse_graph() - - if nfeat_bipartite: - nfeat = (self.feat_drop(nfeat[0]), self.feat_drop(nfeat[1])) - nfeat_dst_orig = nfeat[1] - else: - nfeat = self.feat_drop(nfeat) - nfeat_dst_orig = nfeat[: g.num_dst_nodes()] - - if efeat is not None: - if self.lin_edge is None: - raise RuntimeError( - f"{self.__class__.__name__}.edge_feats must be set to " - f"accept edge features." - ) - efeat = self.lin_edge(efeat) - - if nfeat_bipartite: - nfeat = (self.lin_src(nfeat[0]), self.lin_dst(nfeat[1])) - elif graph_bipartite: - nfeat = (self.lin_src(nfeat), self.lin_dst(nfeat[: g.num_dst_nodes()])) - else: - nfeat = self.lin_src(nfeat) - - out = ops_torch.operators.mha_gat_v2_n2n( - nfeat, - self.attn_weights, - _graph, - num_heads=self.num_heads, - activation="LeakyReLU", - negative_slope=self.negative_slope, - concat_heads=self.concat, - edge_feat=efeat, - deterministic_dgrad=deterministic_dgrad, - deterministic_wgrad=deterministic_wgrad, - )[: g.num_dst_nodes()] - - if self.concat: - out = out.view(-1, self.num_heads, self.out_feats) - - if self.residual: - res = self.lin_res(nfeat_dst_orig).view(-1, self.num_heads, self.out_feats) - if not self.concat: - res = res.mean(dim=1) - out = out + res - - return out diff --git a/python/cugraph-dgl/cugraph_dgl/nn/conv/relgraphconv.py b/python/cugraph-dgl/cugraph_dgl/nn/conv/relgraphconv.py deleted file mode 100644 index 5c4b5de..0000000 --- a/python/cugraph-dgl/cugraph_dgl/nn/conv/relgraphconv.py +++ /dev/null @@ -1,192 +0,0 @@ -# Copyright (c) 2023-2024, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import math -from typing import Optional, Union - -from cugraph_dgl.nn.conv.base import BaseConv, SparseGraph -from cugraph.utilities.utils import import_optional - -dgl = import_optional("dgl") -torch = import_optional("torch") -nn = import_optional("torch.nn") -ops_torch = import_optional("pylibcugraphops.pytorch") - - -class RelGraphConv(BaseConv): - r"""An accelerated relational graph convolution layer from `Modeling - Relational Data with Graph Convolutional Networks - `__, with the sparse aggregation - accelerated by cugraph-ops. - - Parameters - ---------- - in_feats : int - Input feature size. - out_feats : int - Output feature size. - num_rels : int - Number of relations. - regularizer : str, optional - Which weight regularizer to use ("basis" or ``None``): - - "basis" is for basis-decomposition. - - ``None`` applies no regularization. - Default: ``None``. - num_bases : int, optional - Number of bases. It comes into effect when a regularizer is applied. - Default: ``None``. - bias : bool, optional - True if bias is added. Default: ``True``. - self_loop : bool, optional - True to include self loop message. Default: ``True``. - dropout : float, optional - Dropout rate. Default: ``0.0``. - apply_norm : bool, optional - True to normalize aggregation output by the in-degree of the destination - node per edge type, i.e. :math:`|\mathcal{N}^r_i|`. Default: ``True``. - - Examples - -------- - >>> import dgl - >>> import torch - >>> from cugraph_dgl.nn import RelGraphConv - ... - >>> device = 'cuda' - >>> g = dgl.graph(([0,1,2,3,2,5], [1,2,3,4,0,3])).to(device) - >>> feat = torch.ones(6, 10).to(device) - >>> conv = RelGraphConv( - ... 10, 2, 3, regularizer='basis', num_bases=2).to(device) - >>> etypes = torch.tensor([0,1,2,0,1,2]).to(device) - >>> res = conv(g, feat, etypes) - >>> res - tensor([[-1.7774, -2.0184], - [-1.4335, -2.3758], - [-1.7774, -2.0184], - [-0.4698, -3.0876], - [-1.4335, -2.3758], - [-1.4331, -2.3295]], device='cuda:0', grad_fn=) - """ - - def __init__( - self, - in_feats: int, - out_feats: int, - num_rels: int, - regularizer: Optional[str] = None, - num_bases: Optional[int] = None, - bias: bool = True, - self_loop: bool = True, - dropout: float = 0.0, - apply_norm: bool = False, - ): - super().__init__() - self.in_feats = in_feats - self.out_feats = out_feats - self.num_rels = num_rels - self.apply_norm = apply_norm - self.dropout = nn.Dropout(dropout) - - dim_self_loop = 1 if self_loop else 0 - self.self_loop = self_loop - if regularizer is None: - self.W = nn.Parameter( - torch.empty(num_rels + dim_self_loop, in_feats, out_feats) - ) - self.coeff = None - elif regularizer == "basis": - if num_bases is None: - raise ValueError('Missing "num_bases" for basis regularization.') - self.W = nn.Parameter( - torch.empty(num_bases + dim_self_loop, in_feats, out_feats) - ) - self.coeff = nn.Parameter(torch.empty(num_rels, num_bases)) - self.num_bases = num_bases - else: - raise ValueError( - f"Supported regularizer options: 'basis' or None, but got " - f"'{regularizer}'." - ) - self.regularizer = regularizer - - if bias: - self.bias = nn.Parameter(torch.empty(out_feats)) - else: - self.register_parameter("bias", None) - - self.reset_parameters() - - def reset_parameters(self): - r"""Reinitialize learnable parameters.""" - bound = 1 / math.sqrt(self.in_feats) - end = -1 if self.self_loop else None - nn.init.uniform_(self.W[:end], -bound, bound) - if self.regularizer == "basis": - nn.init.xavier_uniform_(self.coeff, gain=nn.init.calculate_gain("relu")) - if self.self_loop: - nn.init.xavier_uniform_(self.W[-1], nn.init.calculate_gain("relu")) - if self.bias is not None: - nn.init.zeros_(self.bias) - - def forward( - self, - g: Union[SparseGraph, dgl.DGLHeteroGraph], - feat: torch.Tensor, - etypes: torch.Tensor, - max_in_degree: Optional[int] = None, - ) -> torch.Tensor: - r"""Forward computation. - - Parameters - ---------- - g : DGLGraph - The graph. - feat : torch.Tensor - A 2D tensor of node features. Shape: :math:`(|V|, D_{in})`. - etypes : torch.Tensor - A 1D integer tensor of edge types. Shape: :math:`(|E|,)`. - Note that cugraph-ops only accepts edge type tensors in int32, - so any input of other integer types will be casted into int32, - thus introducing some overhead. Pass in int32 tensors directly - for best performance. - max_in_degree : int - Maximum in-degree of destination nodes. When :attr:`g` is generated - from a neighbor sampler, the value should be set to the corresponding - :attr:`fanout`. This option is used to invoke the MFG-variant of - cugraph-ops kernel. - - Returns - ------- - torch.Tensor - New node features. Shape: :math:`(|V|, D_{out})`. - """ - _graph = self.get_cugraph_ops_HeteroCSC( - g, - num_edge_types=self.num_rels, - etypes=etypes, - is_bipartite=False, - max_in_degree=max_in_degree, - ) - - h = ops_torch.operators.agg_hg_basis_n2n_post( - feat, - self.coeff, - _graph, - concat_own=self.self_loop, - norm_by_out_degree=self.apply_norm, - )[: g.num_dst_nodes()] - h = h @ self.W.view(-1, self.out_feats) - if self.bias is not None: - h = h + self.bias - h = self.dropout(h) - - return h diff --git a/python/cugraph-dgl/cugraph_dgl/nn/conv/sageconv.py b/python/cugraph-dgl/cugraph_dgl/nn/conv/sageconv.py deleted file mode 100644 index b619890..0000000 --- a/python/cugraph-dgl/cugraph_dgl/nn/conv/sageconv.py +++ /dev/null @@ -1,166 +0,0 @@ -# Copyright (c) 2023-2024, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from typing import Optional, Union - -from cugraph_dgl.nn.conv.base import BaseConv, SparseGraph -from cugraph.utilities.utils import import_optional - -dgl = import_optional("dgl") -torch = import_optional("torch") -nn = import_optional("torch.nn") -ops_torch = import_optional("pylibcugraphops.pytorch") - - -class SAGEConv(BaseConv): - r"""An accelerated GraphSAGE layer from `Inductive Representation Learning - on Large Graphs `, with the sparse - aggregation accelerated by cugraph-ops. - - Parameters - ---------- - in_feats : int or tuple - Input feature size. If a scalar is given, the source and destination - nodes are required to be the same. - out_feats : int - Output feature size. - aggregator_type : str - Aggregator type to use ("mean", "sum", "min", "max", "pool", "gcn"). - feat_drop : float - Dropout rate on features, default: ``0``. - bias : bool - If True, adds a learnable bias to the output. Default: ``True``. - - Examples - -------- - >>> import dgl - >>> import torch - >>> from cugraph_dgl.nn import SAGEConv - ... - >>> device = 'cuda' - >>> g = dgl.graph(([0,1,2,3,2,5], [1,2,3,4,0,3])).to(device) - >>> g = dgl.add_self_loop(g) - >>> feat = torch.ones(6, 10).to(device) - >>> conv = SAGEConv(10, 2, 'mean').to(device) - >>> res = conv(g, feat) - >>> res - tensor([[-1.1690, 0.1952], - [-1.1690, 0.1952], - [-1.1690, 0.1952], - [-1.1690, 0.1952], - [-1.1690, 0.1952], - [-1.1690, 0.1952]], device='cuda:0', grad_fn=) - """ - valid_aggr_types = {"mean", "sum", "min", "max", "pool", "gcn"} - - def __init__( - self, - in_feats: Union[int, tuple[int, int]], - out_feats: int, - aggregator_type: str = "mean", - feat_drop: float = 0.0, - bias: bool = True, - ): - super().__init__() - - if aggregator_type not in self.valid_aggr_types: - raise ValueError( - f"Invalid aggregator_type. Must be one of {self.valid_aggr_types}. " - f"But got '{aggregator_type}' instead." - ) - - self.aggregator_type = aggregator_type - self._aggr = aggregator_type - self.in_feats = in_feats - self.out_feats = out_feats - self.in_feats_src, self.in_feats_dst = dgl.utils.expand_as_pair(in_feats) - self.feat_drop = nn.Dropout(feat_drop) - - if self.aggregator_type == "gcn": - self._aggr = "mean" - self.lin = nn.Linear(self.in_feats_src, out_feats, bias=bias) - else: - self.lin = nn.Linear( - self.in_feats_src + self.in_feats_dst, out_feats, bias=bias - ) - - if self.aggregator_type == "pool": - self._aggr = "max" - self.pre_lin = nn.Linear(self.in_feats_src, self.in_feats_src) - else: - self.register_parameter("pre_lin", None) - - self.reset_parameters() - - def reset_parameters(self): - r"""Reinitialize learnable parameters.""" - self.lin.reset_parameters() - if self.pre_lin is not None: - self.pre_lin.reset_parameters() - - def forward( - self, - g: Union[SparseGraph, dgl.DGLHeteroGraph], - feat: Union[torch.Tensor, tuple[torch.Tensor, torch.Tensor]], - max_in_degree: Optional[int] = None, - ) -> torch.Tensor: - r"""Forward computation. - - Parameters - ---------- - g : DGLGraph or SparseGraph - The graph. - feat : torch.Tensor or tuple - Node features. Shape: :math:`(|V|, D_{in})`. - max_in_degree : int - Maximum in-degree of destination nodes. When :attr:`g` is generated - from a neighbor sampler, the value should be set to the corresponding - :attr:`fanout`. This option is used to invoke the MFG-variant of - cugraph-ops kernel. - - Returns - ------- - torch.Tensor - Output node features. Shape: :math:`(|V|, D_{out})`. - """ - feat_bipartite = isinstance(feat, (list, tuple)) - graph_bipartite = feat_bipartite or self.aggregator_type == "pool" - - _graph = self.get_cugraph_ops_CSC( - g, is_bipartite=graph_bipartite, max_in_degree=max_in_degree - ) - - if feat_bipartite: - feat = (self.feat_drop(feat[0]), self.feat_drop(feat[1])) - else: - feat = self.feat_drop(feat) - - if self.aggregator_type == "pool": - if feat_bipartite: - feat = (self.pre_lin(feat[0]).relu(), feat[1]) - else: - feat = (self.pre_lin(feat).relu(), feat[: g.num_dst_nodes()]) - # force ctx.needs_input_grad=True in cugraph-ops autograd function - feat[0].requires_grad_() - feat[1].requires_grad_() - - out = ops_torch.operators.agg_concat_n2n(feat, _graph, self._aggr)[ - : g.num_dst_nodes() - ] - - if self.aggregator_type == "gcn": - out = out[:, : self.in_feats_src] - - out = self.lin(out) - - return out diff --git a/python/cugraph-dgl/cugraph_dgl/nn/conv/transformerconv.py b/python/cugraph-dgl/cugraph_dgl/nn/conv/transformerconv.py deleted file mode 100644 index e77556f..0000000 --- a/python/cugraph-dgl/cugraph_dgl/nn/conv/transformerconv.py +++ /dev/null @@ -1,173 +0,0 @@ -# Copyright (c) 2023-2024, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from typing import Optional, Union - -from cugraph_dgl.nn.conv.base import BaseConv, SparseGraph -from cugraph.utilities.utils import import_optional - -dgl = import_optional("dgl") -torch = import_optional("torch") -nn = import_optional("torch.nn") -ops_torch = import_optional("pylibcugraphops.pytorch") - - -class TransformerConv(BaseConv): - r"""The graph transformer layer from the `"Masked Label Prediction: - Unified Message Passing Model for Semi-Supervised Classification" - `_ paper. - - Parameters - ---------- - in_node_feats : int or pair of ints - Input feature size. A pair denotes feature sizes of source and - destination nodes. - out_node_feats : int - Output feature size. - num_heads : int - Number of multi-head-attentions. - concat : bool, optional - If False, the multi-head attentions are averaged instead of concatenated. - Default: ``True``. - beta : bool, optional - If True, use a gated residual connection. Default: ``True``. - edge_feats: int, optional - Edge feature size. Default: ``None``. - bias: bool, optional - If True, learns a bias term. Default: ``True``. - root_weight: bool, optional - If False, will skip to learn a root weight matrix. Default: ``True``. - """ - - def __init__( - self, - in_node_feats: Union[int, tuple[int, int]], - out_node_feats: int, - num_heads: int, - concat: bool = True, - beta: bool = False, - edge_feats: Optional[int] = None, - bias: bool = True, - root_weight: bool = True, - ): - super().__init__() - - self.in_node_feats = in_node_feats - self.out_node_feats = out_node_feats - self.num_heads = num_heads - self.concat = concat - self.beta = beta - self.edge_feats = edge_feats - self.bias = bias - self.root_weight = root_weight - - if isinstance(in_node_feats, int): - in_node_feats = (in_node_feats, in_node_feats) - - self.lin_key = nn.Linear(in_node_feats[0], num_heads * out_node_feats) - self.lin_query = nn.Linear(in_node_feats[1], num_heads * out_node_feats) - self.lin_value = nn.Linear(in_node_feats[0], num_heads * out_node_feats) - - if edge_feats is not None: - self.lin_edge = nn.Linear( - edge_feats, num_heads * out_node_feats, bias=False - ) - else: - self.lin_edge = self.register_parameter("lin_edge", None) - - if concat: - self.lin_skip = nn.Linear( - in_node_feats[1], num_heads * out_node_feats, bias=bias - ) - if self.beta: - self.lin_beta = nn.Linear(3 * num_heads * out_node_feats, 1, bias=bias) - else: - self.lin_beta = self.register_parameter("lin_beta", None) - else: - self.lin_skip = nn.Linear(in_node_feats[1], out_node_feats, bias=bias) - if self.beta: - self.lin_beta = nn.Linear(3 * out_node_feats, 1, bias=False) - else: - self.lin_beta = self.register_parameter("lin_beta", None) - - self.reset_parameters() - - def reset_parameters(self): - self.lin_key.reset_parameters() - self.lin_query.reset_parameters() - self.lin_value.reset_parameters() - if self.lin_edge is not None: - self.lin_edge.reset_parameters() - if self.lin_skip is not None: - self.lin_skip.reset_parameters() - if self.lin_beta is not None: - self.lin_beta.reset_parameters() - - def forward( - self, - g: Union[SparseGraph, dgl.DGLHeteroGraph], - nfeat: Union[torch.Tensor, tuple[torch.Tensor, torch.Tensor]], - efeat: Optional[torch.Tensor] = None, - ) -> torch.Tensor: - """Forward computation. - - Parameters - ---------- - g: DGLGraph - The graph. - nfeat: torch.Tensor or a pair of torch.Tensor - Node feature tensor. A pair denotes features for source and - destination nodes, respectively. - efeat: torch.Tensor, optional - Edge feature tensor. Default: ``None``. - """ - feat_bipartite = isinstance(nfeat, (list, tuple)) - if not feat_bipartite: - nfeat = (nfeat, nfeat) - - _graph = self.get_cugraph_ops_CSC(g, is_bipartite=True) - - query = self.lin_query(nfeat[1][: g.num_dst_nodes()]) - key = self.lin_key(nfeat[0]) - value = self.lin_value(nfeat[0]) - - if efeat is not None: - if self.lin_edge is None: - raise RuntimeError( - f"{self.__class__.__name__}.edge_feats must be set to allow " - f"edge features." - ) - efeat = self.lin_edge(efeat) - - out = ops_torch.operators.mha_simple_n2n( - key_emb=key, - query_emb=query, - value_emb=value, - graph=_graph, - num_heads=self.num_heads, - concat_heads=self.concat, - edge_emb=efeat, - norm_by_dim=True, - score_bias=None, - )[: g.num_dst_nodes()] - - if self.root_weight: - res = self.lin_skip(nfeat[1][: g.num_dst_nodes()]) - if self.lin_beta is not None: - beta = self.lin_beta(torch.cat([out, res, out - res], dim=-1)) - beta = beta.sigmoid() - out = beta * res + (1 - beta) * out - else: - out = out + res - - return out diff --git a/python/cugraph-dgl/cugraph_dgl/tests/conftest.py b/python/cugraph-dgl/cugraph_dgl/tests/conftest.py index 07086f2..ee1183f 100644 --- a/python/cugraph-dgl/cugraph_dgl/tests/conftest.py +++ b/python/cugraph-dgl/cugraph_dgl/tests/conftest.py @@ -1,4 +1,4 @@ -# Copyright (c) 2021-2025, NVIDIA CORPORATION. +# Copyright (c) 2021-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -22,12 +22,6 @@ ) -def pytest_ignore_collect(collection_path, config): - """Return True to prevent considering this path for collection.""" - if "nn" in collection_path.name: - return True - - @pytest.fixture(scope="module") def dask_client(): # start_dask_client will check for the SCHEDULER_FILE and diff --git a/python/cugraph-dgl/cugraph_dgl/tests/nn/test_gatconv.py b/python/cugraph-dgl/cugraph_dgl/tests/nn/test_gatconv.py deleted file mode 100644 index de27efc..0000000 --- a/python/cugraph-dgl/cugraph_dgl/tests/nn/test_gatconv.py +++ /dev/null @@ -1,185 +0,0 @@ -# Copyright (c) 2023-2024, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import pytest - -from cugraph_dgl.nn.conv.base import SparseGraph -from cugraph_dgl.nn import GATConv as CuGraphGATConv - -dgl = pytest.importorskip("dgl", reason="DGL not available") -torch = pytest.importorskip("torch", reason="PyTorch not available") - -ATOL = 1e-6 - - -@pytest.mark.parametrize("mode", ["bipartite", "share_weights", "regular"]) -@pytest.mark.parametrize("idx_type", [torch.int32, torch.int64]) -@pytest.mark.parametrize("max_in_degree", [None, 8]) -@pytest.mark.parametrize("num_heads", [1, 2, 7]) -@pytest.mark.parametrize("residual", [False, True]) -@pytest.mark.parametrize("to_block", [False, True]) -@pytest.mark.parametrize("sparse_format", ["coo", "csc", None]) -def test_gatconv_equality( - dgl_graph_1, - mode, - idx_type, - max_in_degree, - num_heads, - residual, - to_block, - sparse_format, -): - from dgl.nn.pytorch import GATConv - - torch.manual_seed(12345) - device = torch.device("cuda") - g = dgl_graph_1.to(device).astype(idx_type) - - if to_block: - g = dgl.to_block(g) - - size = (g.num_src_nodes(), g.num_dst_nodes()) - - if mode == "bipartite": - in_feats = (10, 3) - nfeat = ( - torch.randn(size[0], in_feats[0]).to(device), - torch.randn(size[1], in_feats[1]).to(device), - ) - elif mode == "share_weights": - in_feats = 5 - nfeat = ( - torch.randn(size[0], in_feats).to(device), - torch.randn(size[1], in_feats).to(device), - ) - else: - in_feats = 7 - nfeat = torch.randn(size[0], in_feats).to(device) - out_feats = 2 - - if sparse_format == "coo": - sg = SparseGraph( - size=size, src_ids=g.edges()[0], dst_ids=g.edges()[1], formats="csc" - ) - elif sparse_format == "csc": - offsets, indices, _ = g.adj_tensors("csc") - sg = SparseGraph(size=size, src_ids=indices, cdst_ids=offsets, formats="csc") - - args = (in_feats, out_feats, num_heads) - kwargs = {"bias": False, "allow_zero_in_degree": True, "residual": residual} - - conv1 = GATConv(*args, **kwargs).to(device) - conv2 = CuGraphGATConv(*args, **kwargs).to(device) - - dim = num_heads * out_feats - with torch.no_grad(): - conv2.attn_weights[:dim].copy_(conv1.attn_l.flatten()) - conv2.attn_weights[dim:].copy_(conv1.attn_r.flatten()) - if mode == "bipartite": - conv2.lin_src.weight.copy_(conv1.fc_src.weight) - conv2.lin_dst.weight.copy_(conv1.fc_dst.weight) - else: - conv2.lin.weight.copy_(conv1.fc.weight) - if residual and conv1.has_linear_res: - conv2.lin_res.weight.copy_(conv1.res_fc.weight) - - out1 = conv1(g, nfeat) - if sparse_format is not None: - out2 = conv2(sg, nfeat, max_in_degree=max_in_degree) - else: - out2 = conv2(g, nfeat, max_in_degree=max_in_degree) - - assert torch.allclose(out1, out2, atol=ATOL) - - grad_out1 = torch.randn_like(out1) - grad_out2 = grad_out1.detach().clone() - out1.backward(grad_out1) - out2.backward(grad_out2) - - if mode == "bipartite": - assert torch.allclose( - conv1.fc_src.weight.grad, conv2.lin_src.weight.grad, atol=ATOL - ) - assert torch.allclose( - conv1.fc_dst.weight.grad, conv2.lin_dst.weight.grad, atol=ATOL - ) - else: - assert torch.allclose(conv1.fc.weight.grad, conv2.lin.weight.grad, atol=ATOL) - - if residual and conv1.has_linear_res: - assert torch.allclose( - conv1.res_fc.weight.grad, conv2.lin_res.weight.grad, atol=ATOL - ) - - assert torch.allclose( - torch.cat((conv1.attn_l.grad, conv1.attn_r.grad), dim=0), - conv2.attn_weights.grad.view(2, num_heads, out_feats), - atol=1e-5, # Note: using a loosened tolerance here due to numerical error - ) - - -@pytest.mark.parametrize("bias", [False, True]) -@pytest.mark.parametrize("bipartite", [False, True]) -@pytest.mark.parametrize("concat", [False, True]) -@pytest.mark.parametrize("max_in_degree", [None, 8]) -@pytest.mark.parametrize("num_heads", [1, 2, 7]) -@pytest.mark.parametrize("to_block", [False, True]) -@pytest.mark.parametrize("use_edge_feats", [False, True]) -def test_gatconv_edge_feats( - dgl_graph_1, - bias, - bipartite, - concat, - max_in_degree, - num_heads, - to_block, - use_edge_feats, -): - torch.manual_seed(12345) - device = torch.device("cuda") - g = dgl_graph_1.to(device) - - if to_block: - g = dgl.to_block(g) - - if bipartite: - in_feats = (10, 3) - nfeat = ( - torch.rand(g.num_src_nodes(), in_feats[0]).to(device), - torch.rand(g.num_dst_nodes(), in_feats[1]).to(device), - ) - else: - in_feats = 10 - nfeat = torch.rand(g.num_src_nodes(), in_feats).to(device) - out_feats = 2 - - if use_edge_feats: - edge_feats = 3 - efeat = torch.rand(g.num_edges(), edge_feats).to(device) - else: - edge_feats = None - efeat = None - - conv = CuGraphGATConv( - in_feats, - out_feats, - num_heads, - concat=concat, - edge_feats=edge_feats, - bias=bias, - allow_zero_in_degree=True, - ).to(device) - out = conv(g, nfeat, efeat=efeat, max_in_degree=max_in_degree) - - grad_out = torch.randn_like(out) - out.backward(grad_out) diff --git a/python/cugraph-dgl/cugraph_dgl/tests/nn/test_gatv2conv.py b/python/cugraph-dgl/cugraph_dgl/tests/nn/test_gatv2conv.py deleted file mode 100644 index 2d26b7f..0000000 --- a/python/cugraph-dgl/cugraph_dgl/tests/nn/test_gatv2conv.py +++ /dev/null @@ -1,182 +0,0 @@ -# Copyright (c) 2023-2024, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import pytest - -from cugraph_dgl.nn.conv.base import SparseGraph -from cugraph_dgl.nn import GATv2Conv as CuGraphGATv2Conv - -dgl = pytest.importorskip("dgl", reason="DGL not available") -torch = pytest.importorskip("torch", reason="PyTorch not available") - -ATOL = 1e-5 - - -@pytest.mark.parametrize("mode", ["bipartite", "share_weights", "regular"]) -@pytest.mark.parametrize("idx_type", [torch.int32, torch.int64]) -@pytest.mark.parametrize("max_in_degree", [None, 8]) -@pytest.mark.parametrize("num_heads", [1, 2, 7]) -@pytest.mark.parametrize("residual", [False, True]) -@pytest.mark.parametrize("to_block", [False, True]) -@pytest.mark.parametrize("sparse_format", ["coo", "csc", None]) -def test_gatv2conv_equality( - dgl_graph_1, - mode, - idx_type, - max_in_degree, - num_heads, - residual, - to_block, - sparse_format, -): - from dgl.nn.pytorch import GATv2Conv - - torch.manual_seed(12345) - device = torch.device("cuda") - g = dgl_graph_1.to(device).astype(idx_type) - - if to_block: - g = dgl.to_block(g) - - size = (g.num_src_nodes(), g.num_dst_nodes()) - - if mode == "bipartite": - in_feats = (10, 3) - nfeat = ( - torch.randn(size[0], in_feats[0]).to(device), - torch.randn(size[1], in_feats[1]).to(device), - ) - elif mode == "share_weights": - in_feats = 5 - nfeat = ( - torch.randn(size[0], in_feats).to(device), - torch.randn(size[1], in_feats).to(device), - ) - else: - in_feats = 7 - nfeat = torch.randn(size[0], in_feats).to(device) - out_feats = 2 - - if sparse_format == "coo": - sg = SparseGraph( - size=size, src_ids=g.edges()[0], dst_ids=g.edges()[1], formats="csc" - ) - elif sparse_format == "csc": - offsets, indices, _ = g.adj_tensors("csc") - sg = SparseGraph(size=size, src_ids=indices, cdst_ids=offsets, formats="csc") - - args = (in_feats, out_feats, num_heads) - kwargs = { - "bias": False, - "allow_zero_in_degree": True, - "residual": residual, - "share_weights": mode == "share_weights", - } - - conv1 = GATv2Conv(*args, **kwargs).to(device) - conv2 = CuGraphGATv2Conv(*args, **kwargs).to(device) - - with torch.no_grad(): - conv2.attn_weights.copy_(conv1.attn.flatten()) - conv2.lin_src.weight.copy_(conv1.fc_src.weight) - conv2.lin_dst.weight.copy_(conv1.fc_dst.weight) - if residual: - conv2.lin_res.weight.copy_(conv1.res_fc.weight) - - out1 = conv1(g, nfeat) - if sparse_format is not None: - out2 = conv2(sg, nfeat, max_in_degree=max_in_degree) - else: - out2 = conv2(g, nfeat, max_in_degree=max_in_degree) - - assert torch.allclose(out1, out2, atol=ATOL) - - grad_out1 = torch.randn_like(out1) - grad_out2 = grad_out1.detach().clone() - out1.backward(grad_out1) - out2.backward(grad_out2) - - assert torch.allclose( - conv1.fc_src.weight.grad, conv2.lin_src.weight.grad, atol=ATOL - ) - assert torch.allclose( - conv1.fc_dst.weight.grad, conv2.lin_dst.weight.grad, atol=ATOL - ) - - if residual: - assert torch.allclose( - conv1.res_fc.weight.grad, conv2.lin_res.weight.grad, atol=ATOL - ) - - assert torch.allclose( - conv1.attn.grad, - conv2.attn_weights.grad.view(1, num_heads, out_feats), - atol=ATOL, - ) - - -@pytest.mark.parametrize("bias", [False, True]) -@pytest.mark.parametrize("bipartite", [False, True]) -@pytest.mark.parametrize("concat", [False, True]) -@pytest.mark.parametrize("max_in_degree", [None, 8]) -@pytest.mark.parametrize("num_heads", [1, 2, 7]) -@pytest.mark.parametrize("to_block", [False, True]) -@pytest.mark.parametrize("use_edge_feats", [False, True]) -def test_gatv2conv_edge_feats( - dgl_graph_1, - bias, - bipartite, - concat, - max_in_degree, - num_heads, - to_block, - use_edge_feats, -): - torch.manual_seed(12345) - device = torch.device("cuda") - g = dgl_graph_1.to(device) - - if to_block: - g = dgl.to_block(g) - - if bipartite: - in_feats = (10, 3) - nfeat = ( - torch.rand(g.num_src_nodes(), in_feats[0]).to(device), - torch.rand(g.num_dst_nodes(), in_feats[1]).to(device), - ) - else: - in_feats = 10 - nfeat = torch.rand(g.num_src_nodes(), in_feats).to(device) - out_feats = 2 - - if use_edge_feats: - edge_feats = 3 - efeat = torch.rand(g.num_edges(), edge_feats).to(device) - else: - edge_feats = None - efeat = None - - conv = CuGraphGATv2Conv( - in_feats, - out_feats, - num_heads, - concat=concat, - edge_feats=edge_feats, - bias=bias, - allow_zero_in_degree=True, - ).to(device) - out = conv(g, nfeat, efeat=efeat, max_in_degree=max_in_degree) - - grad_out = torch.randn_like(out) - out.backward(grad_out) diff --git a/python/cugraph-dgl/cugraph_dgl/tests/nn/test_relgraphconv.py b/python/cugraph-dgl/cugraph_dgl/tests/nn/test_relgraphconv.py deleted file mode 100644 index b5d3686..0000000 --- a/python/cugraph-dgl/cugraph_dgl/tests/nn/test_relgraphconv.py +++ /dev/null @@ -1,112 +0,0 @@ -# Copyright (c) 2023-2024, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import pytest - -from cugraph_dgl.nn.conv.base import SparseGraph -from cugraph_dgl.nn import RelGraphConv as CuGraphRelGraphConv - -dgl = pytest.importorskip("dgl", reason="DGL not available") -torch = pytest.importorskip("torch", reason="PyTorch not available") - -ATOL = 1e-6 - - -@pytest.mark.parametrize("idx_type", [torch.int32, torch.int64]) -@pytest.mark.parametrize("max_in_degree", [None, 8]) -@pytest.mark.parametrize("num_bases", [1, 2, 5]) -@pytest.mark.parametrize("regularizer", [None, "basis"]) -@pytest.mark.parametrize("self_loop", [False, True]) -@pytest.mark.parametrize("to_block", [False, True]) -@pytest.mark.parametrize("sparse_format", ["coo", "csc", None]) -def test_relgraphconv_equality( - dgl_graph_1, - idx_type, - max_in_degree, - num_bases, - regularizer, - self_loop, - to_block, - sparse_format, -): - from dgl.nn.pytorch import RelGraphConv - - torch.manual_seed(12345) - device = torch.device("cuda") - g = dgl_graph_1.to(device).astype(idx_type) - - if to_block: - g = dgl.to_block(g) - - in_feat, out_feat, num_rels = 10, 2, 3 - args = (in_feat, out_feat, num_rels) - kwargs = { - "num_bases": num_bases, - "regularizer": regularizer, - "bias": False, - "self_loop": self_loop, - } - - g.edata[dgl.ETYPE] = torch.randint(num_rels, (g.num_edges(),)).to(device) - size = (g.num_src_nodes(), g.num_dst_nodes()) - feat = torch.rand(g.num_src_nodes(), in_feat).to(device) - - if sparse_format == "coo": - sg = SparseGraph( - size=size, - src_ids=g.edges()[0], - dst_ids=g.edges()[1], - values=g.edata[dgl.ETYPE], - formats="csc", - ) - elif sparse_format == "csc": - offsets, indices, perm = g.adj_tensors("csc") - etypes = g.edata[dgl.ETYPE][perm] - sg = SparseGraph( - size=size, src_ids=indices, cdst_ids=offsets, values=etypes, formats="csc" - ) - - conv1 = RelGraphConv(*args, **kwargs).to(device) - conv2 = CuGraphRelGraphConv(*args, **kwargs, apply_norm=False).to(device) - - with torch.no_grad(): - if self_loop: - conv2.W[:-1].copy_(conv1.linear_r.W) - conv2.W[-1].copy_(conv1.loop_weight) - else: - conv2.W.copy_(conv1.linear_r.W) - - if regularizer is not None: - conv2.coeff.copy_(conv1.linear_r.coeff) - - out1 = conv1(g, feat, g.edata[dgl.ETYPE]) - - if sparse_format is not None: - out2 = conv2(sg, feat, sg.values(), max_in_degree=max_in_degree) - else: - out2 = conv2(g, feat, g.edata[dgl.ETYPE], max_in_degree=max_in_degree) - - assert torch.allclose(out1, out2, atol=ATOL) - - grad_out = torch.randn_like(out1) - out1.backward(grad_out) - out2.backward(grad_out) - - end = -1 if self_loop else None - assert torch.allclose(conv1.linear_r.W.grad, conv2.W.grad[:end], atol=ATOL) - - if self_loop: - assert torch.allclose(conv1.loop_weight.grad, conv2.W.grad[-1], atol=ATOL) - - if regularizer is not None: - assert torch.allclose(conv1.linear_r.coeff.grad, conv2.coeff.grad, atol=ATOL) diff --git a/python/cugraph-dgl/cugraph_dgl/tests/nn/test_sageconv.py b/python/cugraph-dgl/cugraph_dgl/tests/nn/test_sageconv.py deleted file mode 100644 index 3f1c2b1..0000000 --- a/python/cugraph-dgl/cugraph_dgl/tests/nn/test_sageconv.py +++ /dev/null @@ -1,100 +0,0 @@ -# Copyright (c) 2023-2024, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import pytest - -from cugraph_dgl.nn.conv.base import SparseGraph -from cugraph_dgl.nn import SAGEConv as CuGraphSAGEConv - -dgl = pytest.importorskip("dgl", reason="DGL not available") -torch = pytest.importorskip("torch", reason="PyTorch not available") - -ATOL = 1e-6 - - -@pytest.mark.parametrize("aggr", ["mean", "pool"]) -@pytest.mark.parametrize("bias", [False, True]) -@pytest.mark.parametrize("bipartite", [False, True]) -@pytest.mark.parametrize("idx_type", [torch.int32, torch.int64]) -@pytest.mark.parametrize("max_in_degree", [None, 8]) -@pytest.mark.parametrize("to_block", [False, True]) -@pytest.mark.parametrize("sparse_format", ["coo", "csc", None]) -def test_sageconv_equality( - dgl_graph_1, aggr, bias, bipartite, idx_type, max_in_degree, to_block, sparse_format -): - from dgl.nn.pytorch import SAGEConv - - torch.manual_seed(12345) - device = torch.device("cuda") - g = dgl_graph_1.to(device).astype(idx_type) - - if to_block: - g = dgl.to_block(g) - - size = (g.num_src_nodes(), g.num_dst_nodes()) - - if bipartite: - in_feats = (5, 3) - feat = ( - torch.rand(size[0], in_feats[0], requires_grad=True).to(device), - torch.rand(size[1], in_feats[1], requires_grad=True).to(device), - ) - else: - in_feats = 5 - feat = torch.rand(size[0], in_feats).to(device) - out_feats = 2 - - if sparse_format == "coo": - sg = SparseGraph( - size=size, src_ids=g.edges()[0], dst_ids=g.edges()[1], formats="csc" - ) - elif sparse_format == "csc": - offsets, indices, _ = g.adj_tensors("csc") - sg = SparseGraph(size=size, src_ids=indices, cdst_ids=offsets, formats="csc") - - kwargs = {"aggregator_type": aggr, "bias": bias} - conv1 = SAGEConv(in_feats, out_feats, **kwargs).to(device) - conv2 = CuGraphSAGEConv(in_feats, out_feats, **kwargs).to(device) - - in_feats_src = conv2.in_feats_src - with torch.no_grad(): - conv2.lin.weight[:, :in_feats_src].copy_(conv1.fc_neigh.weight) - conv2.lin.weight[:, in_feats_src:].copy_(conv1.fc_self.weight) - if bias: - conv2.lin.bias.copy_(conv1.fc_self.bias) - if aggr == "pool": - conv2.pre_lin.weight.copy_(conv1.fc_pool.weight) - conv2.pre_lin.bias.copy_(conv1.fc_pool.bias) - - out1 = conv1(g, feat) - if sparse_format is not None: - out2 = conv2(sg, feat, max_in_degree=max_in_degree) - else: - out2 = conv2(g, feat, max_in_degree=max_in_degree) - assert torch.allclose(out1, out2, atol=ATOL) - - grad_out = torch.randn_like(out1) - out1.backward(grad_out) - out2.backward(grad_out) - assert torch.allclose( - conv1.fc_neigh.weight.grad, - conv2.lin.weight.grad[:, :in_feats_src], - atol=ATOL, - ) - assert torch.allclose( - conv1.fc_self.weight.grad, - conv2.lin.weight.grad[:, in_feats_src:], - atol=ATOL, - ) - if bias: - assert torch.allclose(conv1.fc_self.bias.grad, conv2.lin.bias.grad, atol=ATOL) diff --git a/python/cugraph-dgl/cugraph_dgl/tests/nn/test_transformerconv.py b/python/cugraph-dgl/cugraph_dgl/tests/nn/test_transformerconv.py deleted file mode 100644 index 28d13de..0000000 --- a/python/cugraph-dgl/cugraph_dgl/tests/nn/test_transformerconv.py +++ /dev/null @@ -1,93 +0,0 @@ -# Copyright (c) 2023-2024, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import pytest - -from cugraph_dgl.nn.conv.base import SparseGraph -from cugraph_dgl.nn import TransformerConv - -dgl = pytest.importorskip("dgl", reason="DGL not available") -torch = pytest.importorskip("torch", reason="PyTorch not available") - -ATOL = 1e-6 - - -@pytest.mark.parametrize("beta", [False, True]) -@pytest.mark.parametrize("bipartite_node_feats", [False, True]) -@pytest.mark.parametrize("concat", [False, True]) -@pytest.mark.parametrize("idx_type", [torch.int32, torch.int64]) -@pytest.mark.parametrize("num_heads", [1, 3, 4]) -@pytest.mark.parametrize("to_block", [False, True]) -@pytest.mark.parametrize("use_edge_feats", [False, True]) -@pytest.mark.parametrize("sparse_format", ["coo", "csc", None]) -def test_transformerconv( - dgl_graph_1, - beta, - bipartite_node_feats, - concat, - idx_type, - num_heads, - to_block, - use_edge_feats, - sparse_format, -): - torch.manual_seed(12345) - device = torch.device("cuda") - g = dgl_graph_1.to(device).astype(idx_type) - - if to_block: - g = dgl.to_block(g) - - size = (g.num_src_nodes(), g.num_dst_nodes()) - if sparse_format == "coo": - sg = SparseGraph( - size=size, src_ids=g.edges()[0], dst_ids=g.edges()[1], formats="csc" - ) - elif sparse_format == "csc": - offsets, indices, _ = g.adj_tensors("csc") - sg = SparseGraph(size=size, src_ids=indices, cdst_ids=offsets, formats="csc") - - if bipartite_node_feats: - in_node_feats = (5, 3) - nfeat = ( - torch.rand(g.num_src_nodes(), in_node_feats[0], device=device), - torch.rand(g.num_dst_nodes(), in_node_feats[1], device=device), - ) - else: - in_node_feats = 3 - nfeat = torch.rand(g.num_src_nodes(), in_node_feats, device=device) - out_node_feats = 2 - - if use_edge_feats: - edge_feats = 3 - efeat = torch.rand(g.num_edges(), edge_feats, device=device) - else: - edge_feats = None - efeat = None - - conv = TransformerConv( - in_node_feats, - out_node_feats, - num_heads=num_heads, - concat=concat, - beta=beta, - edge_feats=edge_feats, - ).to(device) - - if sparse_format is not None: - out = conv(sg, nfeat, efeat) - else: - out = conv(g, nfeat, efeat) - - grad_out = torch.randn_like(out) - out.backward(grad_out) diff --git a/python/cugraph-dgl/cugraph_dgl/tests/test_utils.py b/python/cugraph-dgl/cugraph_dgl/tests/test_utils.py index df60304..4ac4346 100644 --- a/python/cugraph-dgl/cugraph_dgl/tests/test_utils.py +++ b/python/cugraph-dgl/cugraph_dgl/tests/test_utils.py @@ -180,7 +180,6 @@ def test_get_source_destination_range(): assert output_d == expected_output -@pytest.mark.skip(reason="Skipping due to missing cugraph-ops backend.") def test__create_homogeneous_cugraph_dgl_nn_sparse_graph(): tensor_d = { "sources_range": 1, @@ -198,7 +197,6 @@ def test__create_homogeneous_cugraph_dgl_nn_sparse_graph(): assert isinstance(sparse_graph, cugraph_dgl.nn.SparseGraph) -@pytest.mark.skip(reason="Skipping due to missing cugraph-ops backend.") def test_create_homogeneous_sampled_graphs_from_dataframe_csc(): df = get_dummy_sampled_df_csc() batches = create_homogeneous_sampled_graphs_from_dataframe_csc(df) diff --git a/python/cugraph-pyg/cugraph_pyg/nn/__init__.py b/python/cugraph-pyg/cugraph_pyg/nn/__init__.py deleted file mode 100644 index 65136a8..0000000 --- a/python/cugraph-pyg/cugraph_pyg/nn/__init__.py +++ /dev/null @@ -1,14 +0,0 @@ -# Copyright (c) 2023-2024, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from .conv import * diff --git a/python/cugraph-pyg/cugraph_pyg/nn/conv/__init__.py b/python/cugraph-pyg/cugraph_pyg/nn/conv/__init__.py deleted file mode 100644 index a0cda91..0000000 --- a/python/cugraph-pyg/cugraph_pyg/nn/conv/__init__.py +++ /dev/null @@ -1,41 +0,0 @@ -# Copyright (c) 2023-2025, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import warnings - -HAVE_CUGRAPH_OPS = False -try: - import pylibcugraphops - - HAVE_CUGRAPH_OPS = True -except ImportError: - pass -except Exception as e: - warnings.warn(f"Unexpected error while importing pylibcugraphops: {e}") - -if HAVE_CUGRAPH_OPS: - from .gat_conv import GATConv - from .gatv2_conv import GATv2Conv - from .hetero_gat_conv import HeteroGATConv - from .rgcn_conv import RGCNConv - from .sage_conv import SAGEConv - from .transformer_conv import TransformerConv - - __all__ = [ - "GATConv", - "GATv2Conv", - "HeteroGATConv", - "RGCNConv", - "SAGEConv", - "TransformerConv", - ] diff --git a/python/cugraph-pyg/cugraph_pyg/nn/conv/base.py b/python/cugraph-pyg/cugraph_pyg/nn/conv/base.py deleted file mode 100644 index 713448a..0000000 --- a/python/cugraph-pyg/cugraph_pyg/nn/conv/base.py +++ /dev/null @@ -1,190 +0,0 @@ -# Copyright (c) 2023-2024, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import warnings -from typing import Optional, Tuple, Union - -from cugraph.utilities.utils import import_optional -import pylibcugraphops.pytorch - - -torch = import_optional("torch") -torch_geometric = import_optional("torch_geometric") - -# A tuple of (row, colptr, num_src_nodes) -CSC = Tuple[torch.Tensor, torch.Tensor, int] - - -class BaseConv(torch.nn.Module): # pragma: no cover - r"""An abstract base class for implementing cugraph-ops message passing layers.""" - - def reset_parameters(self): - r"""Resets all learnable parameters of the module.""" - pass - - @staticmethod - def to_csc( - edge_index: torch.Tensor, - size: Optional[Tuple[int, int]] = None, - edge_attr: Optional[torch.Tensor] = None, - ) -> Union[CSC, Tuple[CSC, torch.Tensor],]: - r"""Returns a CSC representation of an :obj:`edge_index` tensor to be - used as input to cugraph-ops conv layers. - - Args: - edge_index (torch.Tensor): The edge indices. - size ((int, int), optional). The shape of :obj:`edge_index` in each - dimension. (default: :obj:`None`) - edge_attr (torch.Tensor, optional): The edge features. - (default: :obj:`None`) - """ - if size is None: - warnings.warn( - f"Inferring the graph size from 'edge_index' causes " - f"a decline in performance and does not work for " - f"bipartite graphs. To suppress this warning, pass " - f"the 'size' explicitly in '{__name__}.to_csc()'." - ) - num_src_nodes = num_dst_nodes = int(edge_index.max()) + 1 - else: - num_src_nodes, num_dst_nodes = size - - row, col = edge_index - col, perm = torch_geometric.utils.index_sort(col, max_value=num_dst_nodes) - row = row[perm] - - colptr = torch_geometric.utils.sparse.index2ptr(col, num_dst_nodes) - - if edge_attr is not None: - return (row, colptr, num_src_nodes), edge_attr[perm] - - return row, colptr, num_src_nodes - - def get_cugraph( - self, - edge_index: Union[torch_geometric.EdgeIndex, CSC], - bipartite: bool = False, - max_num_neighbors: Optional[int] = None, - ) -> Tuple[pylibcugraphops.pytorch.CSC, Optional[torch.Tensor]]: - r"""Constructs a :obj:`cugraph-ops` graph object from CSC representation. - Supports both bipartite and non-bipartite graphs. - - Args: - edge_index (EdgeIndex, (torch.Tensor, torch.Tensor, int)): The edge - indices, or a tuple of :obj:`(row, colptr, num_src_nodes)` for - CSC representation. - bipartite (bool): If set to :obj:`True`, will create the bipartite - structure in cugraph-ops. (default: :obj:`False`) - max_num_neighbors (int, optional): The maximum number of neighbors - of a destination node. When enabled, it allows models to use - the message-flow-graph primitives in cugraph-ops. - (default: :obj:`None`) - """ - perm = None - if isinstance(edge_index, torch_geometric.EdgeIndex): - edge_index, perm = edge_index.sort_by("col") - num_src_nodes = edge_index.get_sparse_size(0) - (colptr, row), _ = edge_index.get_csc() - else: - row, colptr, num_src_nodes = edge_index - - if not row.is_cuda: - raise RuntimeError( - f"'{self.__class__.__name__}' requires GPU-based processing " - f"but got CPU tensor." - ) - - if max_num_neighbors is None: - max_num_neighbors = -1 - - return ( - pylibcugraphops.pytorch.CSC( - offsets=colptr, - indices=row, - num_src_nodes=num_src_nodes, - dst_max_in_degree=max_num_neighbors, - is_bipartite=bipartite, - ), - perm, - ) - - def get_typed_cugraph( - self, - edge_index: Union[torch_geometric.EdgeIndex, CSC], - edge_type: torch.Tensor, - num_edge_types: Optional[int] = None, - bipartite: bool = False, - max_num_neighbors: Optional[int] = None, - ) -> Tuple[pylibcugraphops.pytorch.HeteroCSC, Optional[torch.Tensor]]: - r"""Constructs a typed :obj:`cugraph` graph object from a CSC - representation where each edge corresponds to a given edge type. - Supports both bipartite and non-bipartite graphs. - - Args: - edge_index (EdgeIndex, (torch.Tensor, torch.Tensor, int)): The edge - indices, or a tuple of :obj:`(row, colptr, num_src_nodes)` for - CSC representation. - edge_type (torch.Tensor): The edge type. - num_edge_types (int, optional): The maximum number of edge types. - When not given, will be computed on-the-fly, leading to - slightly worse performance. (default: :obj:`None`) - bipartite (bool): If set to :obj:`True`, will create the bipartite - structure in cugraph-ops. (default: :obj:`False`) - max_num_neighbors (int, optional): The maximum number of neighbors - of a destination node. When enabled, it allows models to use - the message-flow-graph primitives in cugraph-ops. - (default: :obj:`None`) - """ - if num_edge_types is None: - num_edge_types = int(edge_type.max()) + 1 - - if max_num_neighbors is None: - max_num_neighbors = -1 - - perm = None - if isinstance(edge_index, torch_geometric.EdgeIndex): - edge_index, perm = edge_index.sort_by("col") - edge_type = edge_type[perm] - num_src_nodes = edge_index.get_sparse_size(0) - (colptr, row), _ = edge_index.get_csc() - else: - row, colptr, num_src_nodes = edge_index - edge_type = edge_type.int() - - return ( - pylibcugraphops.pytorch.HeteroCSC( - offsets=colptr, - indices=row, - edge_types=edge_type, - num_src_nodes=num_src_nodes, - num_edge_types=num_edge_types, - dst_max_in_degree=max_num_neighbors, - is_bipartite=bipartite, - ), - perm, - ) - - def forward( - self, - x: Union[torch.Tensor, Tuple[torch.Tensor, torch.Tensor]], - edge_index: Union[torch_geometric.EdgeIndex, CSC], - ) -> torch.Tensor: - r"""Runs the forward pass of the module. - - Args: - x (torch.Tensor): The node features. - edge_index (EdgeIndex, (torch.Tensor, torch.Tensor, int)): The edge - indices, or a tuple of :obj:`(row, colptr, num_src_nodes)` for - CSC representation. - """ - raise NotImplementedError diff --git a/python/cugraph-pyg/cugraph_pyg/nn/conv/gat_conv.py b/python/cugraph-pyg/cugraph_pyg/nn/conv/gat_conv.py deleted file mode 100644 index 981b1c5..0000000 --- a/python/cugraph-pyg/cugraph_pyg/nn/conv/gat_conv.py +++ /dev/null @@ -1,259 +0,0 @@ -# Copyright (c) 2023-2024, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from typing import Optional, Tuple, Union - -from cugraph.utilities.utils import import_optional -from pylibcugraphops.pytorch.operators import mha_gat_n2n - -from .base import BaseConv, CSC - -torch = import_optional("torch") -nn = import_optional("torch.nn") -torch_geometric = import_optional("torch_geometric") - - -class GATConv(BaseConv): - r"""The graph attentional operator from the `"Graph Attention Networks" - `_ paper. - - .. math:: - \mathbf{x}^{\prime}_i = \alpha_{i,i}\mathbf{\Theta}\mathbf{x}_{i} + - \sum_{j \in \mathcal{N}(i)} \alpha_{i,j}\mathbf{\Theta}\mathbf{x}_{j}, - - where the attention coefficients :math:`\alpha_{i,j}` are computed as - - .. math:: - \alpha_{i,j} = - \frac{ - \exp\left(\mathrm{LeakyReLU}\left(\mathbf{a}^{\top} - [\mathbf{\Theta}\mathbf{x}_i \, \Vert \, \mathbf{\Theta}\mathbf{x}_j] - \right)\right)} - {\sum_{k \in \mathcal{N}(i) \cup \{ i \}} - \exp\left(\mathrm{LeakyReLU}\left(\mathbf{a}^{\top} - [\mathbf{\Theta}\mathbf{x}_i \, \Vert \, \mathbf{\Theta}\mathbf{x}_k] - \right)\right)}. - - If the graph has multi-dimensional edge features :math:`\mathbf{e}_{i,j}`, - the attention coefficients :math:`\alpha_{i,j}` are computed as - - .. math:: - \alpha_{i,j} = - \frac{ - \exp\left(\mathrm{LeakyReLU}\left(\mathbf{a}^{\top} - [\mathbf{\Theta}\mathbf{x}_i \, \Vert \, \mathbf{\Theta}\mathbf{x}_j - \, \Vert \, \mathbf{\Theta}_{e} \mathbf{e}_{i,j}]\right)\right)} - {\sum_{k \in \mathcal{N}(i) \cup \{ i \}} - \exp\left(\mathrm{LeakyReLU}\left(\mathbf{a}^{\top} - [\mathbf{\Theta}\mathbf{x}_i \, \Vert \, \mathbf{\Theta}\mathbf{x}_k - \, \Vert \, \mathbf{\Theta}_{e} \mathbf{e}_{i,k}]\right)\right)}. - - Args: - in_channels (int or tuple): Size of each input sample, or :obj:`-1` to - derive the size from the first input(s) to the forward method. - A tuple corresponds to the sizes of source and target - dimensionalities. - out_channels (int): Size of each output sample. - heads (int, optional): Number of multi-head-attentions. - (default: :obj:`1`) - concat (bool, optional): If set to :obj:`False`, the multi-head - attentions are averaged instead of concatenated. - (default: :obj:`True`) - negative_slope (float, optional): LeakyReLU angle of the negative - slope. (default: :obj:`0.2`) - edge_dim (int, optional): Edge feature dimensionality (in case - there are any). (default: :obj:`None`) - bias (bool, optional): If set to :obj:`False`, the layer will not learn - an additive bias. (default: :obj:`True`) - """ - - def __init__( - self, - in_channels: Union[int, Tuple[int, int]], - out_channels: int, - heads: int = 1, - concat: bool = True, - negative_slope: float = 0.2, - edge_dim: Optional[int] = None, - bias: bool = True, - ): - super().__init__() - - self.in_channels = in_channels - self.out_channels = out_channels - self.heads = heads - self.concat = concat - self.negative_slope = negative_slope - self.edge_dim = edge_dim - - Linear = torch_geometric.nn.Linear - - if isinstance(in_channels, int): - self.lin = Linear( - in_channels, - heads * out_channels, - bias=False, - weight_initializer="glorot", - ) - else: - self.lin_src = Linear( - in_channels[0], - heads * out_channels, - bias=False, - weight_initializer="glorot", - ) - self.lin_dst = Linear( - in_channels[1], - heads * out_channels, - bias=False, - weight_initializer="glorot", - ) - - if edge_dim is not None: - self.lin_edge = Linear( - edge_dim, - heads * out_channels, - bias=False, - weight_initializer="glorot", - ) - self.att = nn.Parameter(torch.Tensor(3 * heads * out_channels)) - else: - self.register_parameter("lin_edge", None) - self.att = nn.Parameter(torch.Tensor(2 * heads * out_channels)) - - if bias and concat: - self.bias = nn.Parameter(torch.Tensor(heads * out_channels)) - elif bias and not concat: - self.bias = nn.Parameter(torch.Tensor(out_channels)) - else: - self.register_parameter("bias", None) - - self.reset_parameters() - - def reset_parameters(self): - if isinstance(self.in_channels, int): - self.lin.reset_parameters() - else: - self.lin_src.reset_parameters() - self.lin_dst.reset_parameters() - - torch_geometric.nn.inits.glorot( - self.att.view(-1, self.heads, self.out_channels) - ) - - if self.lin_edge is not None: - self.lin_edge.reset_parameters() - - torch_geometric.nn.inits.zeros(self.bias) - - def forward( - self, - x: Union[torch.Tensor, Tuple[torch.Tensor, torch.Tensor]], - edge_index: Union[torch_geometric.EdgeIndex, CSC], - edge_attr: Optional[torch.Tensor] = None, - max_num_neighbors: Optional[int] = None, - deterministic_dgrad: bool = False, - deterministic_wgrad: bool = False, - high_precision_dgrad: bool = False, - high_precision_wgrad: bool = False, - ) -> torch.Tensor: - r"""Runs the forward pass of the module. - - Args: - x (torch.Tensor or tuple): The node features. Can be a tuple of - tensors denoting source and destination node features. - edge_index (EdgeIndex or CSC): The edge indices. - edge_attr: (torch.Tensor, optional) The edge features. - max_num_neighbors (int, optional): The maximum number of neighbors - of a destination node. When enabled, it allows models to use - the message-flow-graph primitives in cugraph-ops. - (default: :obj:`None`) - deterministic_dgrad : bool, default=False - Optional flag indicating whether the feature gradients - are computed deterministically using a dedicated workspace buffer. - deterministic_wgrad: bool, default=False - Optional flag indicating whether the weight gradients - are computed deterministically using a dedicated workspace buffer. - high_precision_dgrad: bool, default=False - Optional flag indicating whether gradients for inputs in half precision - are kept in single precision as long as possible and only casted to - the corresponding input type at the very end. - high_precision_wgrad: bool, default=False - Optional flag indicating whether gradients for weights in half precision - are kept in single precision as long as possible and only casted to - the corresponding input type at the very end. - """ - bipartite = not isinstance(x, torch.Tensor) - graph, perm = self.get_cugraph( - edge_index=edge_index, - bipartite=bipartite, - max_num_neighbors=max_num_neighbors, - ) - - if deterministic_dgrad: - graph.add_reverse_graph() - - if edge_attr is not None: - if self.lin_edge is None: - raise RuntimeError( - f"{self.__class__.__name__}.edge_dim must be set to accept " - f"edge features." - ) - if edge_attr.dim() == 1: - edge_attr = edge_attr.view(-1, 1) - if perm is not None: - edge_attr = edge_attr[perm] - edge_attr = self.lin_edge(edge_attr) - - if bipartite: - if not hasattr(self, "lin_src"): - raise RuntimeError( - f"{self.__class__.__name__}.in_channels must be a pair of " - f"integers to allow bipartite node features, but got " - f"{self.in_channels}." - ) - x_src = self.lin_src(x[0]) - x_dst = self.lin_dst(x[1]) - else: - if not hasattr(self, "lin"): - raise RuntimeError( - f"{self.__class__.__name__}.in_channels is expected to be an " - f"integer, but got {self.in_channels}." - ) - x = self.lin(x) - - out = mha_gat_n2n( - (x_src, x_dst) if bipartite else x, - self.att, - graph, - num_heads=self.heads, - activation="LeakyReLU", - negative_slope=self.negative_slope, - concat_heads=self.concat, - edge_feat=edge_attr, - deterministic_dgrad=deterministic_dgrad, - deterministic_wgrad=deterministic_wgrad, - high_precision_dgrad=high_precision_dgrad, - high_precision_wgrad=high_precision_wgrad, - ) - - if self.bias is not None: - out = out + self.bias - - return out - - def __repr__(self) -> str: - return ( - f"{self.__class__.__name__}({self.in_channels}, " - f"{self.out_channels}, heads={self.heads})" - ) diff --git a/python/cugraph-pyg/cugraph_pyg/nn/conv/gatv2_conv.py b/python/cugraph-pyg/cugraph_pyg/nn/conv/gatv2_conv.py deleted file mode 100644 index ebb30de..0000000 --- a/python/cugraph-pyg/cugraph_pyg/nn/conv/gatv2_conv.py +++ /dev/null @@ -1,241 +0,0 @@ -# Copyright (c) 2023-2024, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from typing import Optional, Tuple, Union - -from cugraph.utilities.utils import import_optional -from pylibcugraphops.pytorch.operators import mha_gat_v2_n2n - -from .base import BaseConv, CSC - -torch = import_optional("torch") -nn = import_optional("torch.nn") -torch_geometric = import_optional("torch_geometric") - - -class GATv2Conv(BaseConv): - r"""The GATv2 operator from the `"How Attentive are Graph Attention - Networks?" `_ paper, which fixes the - static attention problem of the standard - :class:`~torch_geometric.conv.GATConv` layer. - Since the linear layers in the standard GAT are applied right after each - other, the ranking of attended nodes is unconditioned on the query node. - In contrast, in :class:`GATv2`, every node can attend to any other node. - - .. math:: - \mathbf{x}^{\prime}_i = \alpha_{i,i}\mathbf{\Theta}\mathbf{x}_{i} + - \sum_{j \in \mathcal{N}(i)} \alpha_{i,j}\mathbf{\Theta}\mathbf{x}_{j}, - - where the attention coefficients :math:`\alpha_{i,j}` are computed as - - .. math:: - \alpha_{i,j} = - \frac{ - \exp\left(\mathbf{a}^{\top}\mathrm{LeakyReLU}\left(\mathbf{\Theta} - [\mathbf{x}_i \, \Vert \, \mathbf{x}_j] - \right)\right)} - {\sum_{k \in \mathcal{N}(i) \cup \{ i \}} - \exp\left(\mathbf{a}^{\top}\mathrm{LeakyReLU}\left(\mathbf{\Theta} - [\mathbf{x}_i \, \Vert \, \mathbf{x}_k] - \right)\right)}. - - If the graph has multi-dimensional edge features :math:`\mathbf{e}_{i,j}`, - the attention coefficients :math:`\alpha_{i,j}` are computed as - - .. math:: - \alpha_{i,j} = - \frac{ - \exp\left(\mathbf{a}^{\top}\mathrm{LeakyReLU}\left(\mathbf{\Theta} - [\mathbf{x}_i \, \Vert \, \mathbf{x}_j \, \Vert \, \mathbf{e}_{i,j}] - \right)\right)} - {\sum_{k \in \mathcal{N}(i) \cup \{ i \}} - \exp\left(\mathbf{a}^{\top}\mathrm{LeakyReLU}\left(\mathbf{\Theta} - [\mathbf{x}_i \, \Vert \, \mathbf{x}_k \, \Vert \, \mathbf{e}_{i,k}] - \right)\right)}. - - Args: - in_channels (int or tuple): Size of each input sample, or :obj:`-1` to - derive the size from the first input(s) to the forward method. - A tuple corresponds to the sizes of source and target - dimensionalities. - out_channels (int): Size of each output sample. - heads (int, optional): Number of multi-head-attentions. - (default: :obj:`1`) - concat (bool, optional): If set to :obj:`False`, the multi-head - attentions are averaged instead of concatenated. - (default: :obj:`True`) - negative_slope (float, optional): LeakyReLU angle of the negative - slope. (default: :obj:`0.2`) - edge_dim (int, optional): Edge feature dimensionality (in case - there are any). (default: :obj:`None`) - bias (bool, optional): If set to :obj:`False`, the layer will not learn - an additive bias. (default: :obj:`True`) - share_weights (bool, optional): If set to :obj:`True`, the same matrix - will be applied to the source and the target node of every edge. - (default: :obj:`False`) - """ - - def __init__( - self, - in_channels: Union[int, Tuple[int, int]], - out_channels: int, - heads: int = 1, - concat: bool = True, - negative_slope: float = 0.2, - edge_dim: Optional[int] = None, - bias: bool = True, - share_weights: bool = False, - ): - super().__init__() - - self.in_channels = in_channels - self.out_channels = out_channels - self.heads = heads - self.concat = concat - self.negative_slope = negative_slope - self.edge_dim = edge_dim - self.share_weights = share_weights - - Linear = torch_geometric.nn.Linear - - if isinstance(in_channels, int): - self.lin_src = Linear( - in_channels, - heads * out_channels, - bias=bias, - weight_initializer="glorot", - ) - - if share_weights: - self.lin_dst = self.lin_src - else: - self.lin_dst = Linear( - in_channels, - heads * out_channels, - bias=bias, - weight_initializer="glorot", - ) - else: - self.lin_src = Linear( - in_channels[0], - heads * out_channels, - bias=bias, - weight_initializer="glorot", - ) - self.lin_dst = Linear( - in_channels[1], - heads * out_channels, - bias=bias, - weight_initializer="glorot", - ) - - self.att = nn.Parameter(torch.Tensor(heads * out_channels)) - - if edge_dim is not None: - self.lin_edge = Linear( - edge_dim, heads * out_channels, bias=False, weight_initializer="glorot" - ) - else: - self.register_parameter("lin_edge", None) - - if bias and concat: - self.bias = nn.Parameter(torch.Tensor(heads * out_channels)) - elif bias and not concat: - self.bias = nn.Parameter(torch.Tensor(out_channels)) - else: - self.register_parameter("bias", None) - - self.reset_parameters() - - def reset_parameters(self): - self.lin_src.reset_parameters() - self.lin_dst.reset_parameters() - if self.lin_edge is not None: - self.lin_edge.reset_parameters() - - torch_geometric.nn.inits.glorot( - self.att.view(-1, self.heads, self.out_channels) - ) - - torch_geometric.nn.inits.zeros(self.bias) - - def forward( - self, - x: Union[torch.Tensor, Tuple[torch.Tensor, torch.Tensor]], - edge_index: Union[torch_geometric.EdgeIndex, CSC], - edge_attr: Optional[torch.Tensor] = None, - deterministic_dgrad: bool = False, - deterministic_wgrad: bool = False, - ) -> torch.Tensor: - r"""Runs the forward pass of the module. - - Args: - x (torch.Tensor or tuple): The node features. Can be a tuple of - tensors denoting source and destination node features. - edge_index (EdgeIndex or CSC): The edge indices. - edge_attr: (torch.Tensor, optional) The edge features. - deterministic_dgrad : bool, default=False - Optional flag indicating whether the feature gradients - are computed deterministically using a dedicated workspace buffer. - deterministic_wgrad: bool, default=False - Optional flag indicating whether the weight gradients - are computed deterministically using a dedicated workspace buffer. - """ - bipartite = not isinstance(x, torch.Tensor) or not self.share_weights - graph, perm = self.get_cugraph(edge_index, bipartite=bipartite) - if deterministic_dgrad: - graph.add_reverse_graph() - - if edge_attr is not None: - if self.lin_edge is None: - raise RuntimeError( - f"{self.__class__.__name__}.edge_dim must be set to accept " - f"edge features." - ) - if edge_attr.dim() == 1: - edge_attr = edge_attr.view(-1, 1) - if perm is not None: - edge_attr = edge_attr[perm] - edge_attr = self.lin_edge(edge_attr) - - if bipartite: - if isinstance(x, torch.Tensor): - x = (x, x) - x_src = self.lin_src(x[0]) - x_dst = self.lin_dst(x[1]) - else: - x = self.lin_src(x) - - out = mha_gat_v2_n2n( - (x_src, x_dst) if bipartite else x, - self.att, - graph, - num_heads=self.heads, - activation="LeakyReLU", - negative_slope=self.negative_slope, - concat_heads=self.concat, - edge_feat=edge_attr, - deterministic_dgrad=deterministic_dgrad, - deterministic_wgrad=deterministic_wgrad, - ) - - if self.bias is not None: - out = out + self.bias - - return out - - def __repr__(self) -> str: - return ( - f"{self.__class__.__name__}({self.in_channels}, " - f"{self.out_channels}, heads={self.heads})" - ) diff --git a/python/cugraph-pyg/cugraph_pyg/nn/conv/hetero_gat_conv.py b/python/cugraph-pyg/cugraph_pyg/nn/conv/hetero_gat_conv.py deleted file mode 100644 index a73dd8e..0000000 --- a/python/cugraph-pyg/cugraph_pyg/nn/conv/hetero_gat_conv.py +++ /dev/null @@ -1,266 +0,0 @@ -# Copyright (c) 2023-2024, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from typing import Optional, Union -from collections import defaultdict - -from cugraph.utilities.utils import import_optional -from pylibcugraphops.pytorch.operators import mha_gat_n2n - -from .base import BaseConv -from cugraph_pyg.utils.imports import package_available - -torch = import_optional("torch") -torch_geometric = import_optional("torch_geometric") - - -class HeteroGATConv(BaseConv): - r"""The graph attentional operator on heterogeneous graphs, where a separate - `GATConv` is applied on the homogeneous graph for each edge type. Compared - with directly wrapping `GATConv`s with `HeteroConv`, `HeteroGATConv` fuses - all the linear transformation associated with each node type together into 1 - GEMM call, to improve the performance on GPUs. - - Parameters - ---------- - in_channels : int or Dict[str, int]) - Size of each input sample of every node type. - - out_channels : int - Size of each output sample. - - node_types : List[str] - List of Node types. - - edge_types : List[Tuple[str, str, str]] - List of Edge types. - - heads : int, optional (default=1) - Number of multi-head-attentions. - - concat : bool, optional (default=True): - If set to :obj:`False`, the multi-head attentions are averaged instead - of concatenated. - - negative_slope : float, optional (default=0.2) - LeakyReLU angle of the negative slope. - - bias : bool, optional (default=True) - If set to :obj:`False`, the layer will not learn an additive bias. - - aggr : str, optional (default="sum") - The aggregation scheme to use for grouping node embeddings generated by - different relations. Choose from "sum", "mean", "min", "max". - """ - - def __init__( - self, - in_channels: Union[int, dict[str, int]], - out_channels: int, - node_types: list[str], - edge_types: list[tuple[str, str, str]], - heads: int = 1, - concat: bool = True, - negative_slope: float = 0.2, - bias: bool = True, - aggr: str = "sum", - ): - if not package_available("torch_geometric>=2.4.0"): - raise RuntimeError( - f"{self.__class__.__name__} requires torch_geometric>=2.4.0." - ) - - super().__init__() - - if isinstance(in_channels, int): - in_channels = dict.fromkeys(node_types, in_channels) - self.in_channels = in_channels - self.out_channels = out_channels - - self.node_types = node_types - self.edge_types = edge_types - self.num_heads = heads - self.concat_heads = concat - - self.negative_slope = negative_slope - self.aggr = aggr - - self.relations_per_ntype = defaultdict(lambda: ([], [])) - - lin_weights = dict.fromkeys(self.node_types) - attn_weights = dict.fromkeys(self.edge_types) - biases = dict.fromkeys(self.edge_types) - - ParameterDict = torch_geometric.nn.parameter_dict.ParameterDict - - for edge_type in self.edge_types: - src_type, _, dst_type = edge_type - self.relations_per_ntype[src_type][0].append(edge_type) - if src_type != dst_type: - self.relations_per_ntype[dst_type][1].append(edge_type) - - attn_weights[edge_type] = torch.empty( - 2 * self.num_heads * self.out_channels - ) - - if bias and concat: - biases[edge_type] = torch.empty(self.num_heads * out_channels) - elif bias: - biases[edge_type] = torch.empty(out_channels) - else: - biases[edge_type] = None - - for ntype in self.node_types: - n_src_rel = len(self.relations_per_ntype[ntype][0]) - n_dst_rel = len(self.relations_per_ntype[ntype][1]) - n_rel = n_src_rel + n_dst_rel - - lin_weights[ntype] = torch.empty( - (n_rel * self.num_heads * self.out_channels, self.in_channels[ntype]) - ) - - self.lin_weights = ParameterDict(lin_weights) - self.attn_weights = ParameterDict(attn_weights) - - if bias: - self.bias = ParameterDict(biases) - else: - self.register_parameter("bias", None) - - self.reset_parameters() - - def split_tensors( - self, x_fused_dict: dict[str, torch.Tensor], dim: int - ) -> tuple[dict[str, torch.Tensor], dict[str, torch.Tensor]]: - """Split fused tensors into chunks based on edge types. - - Parameters - ---------- - x_fused_dict : dict[str, torch.Tensor] - A dictionary to hold node feature for each node type. The key is - node type; the value is a fused tensor that account for all - relations for that node type. - - dim : int - Dimension along which to split the fused tensor. - - Returns - ------- - x_src_dict : dict[str, torch.Tensor] - A dictionary to hold source node feature for each relation graph. - - x_dst_dict : dict[str, torch.Tensor] - A dictionary to hold destination node feature for each relation graph. - """ - x_src_dict = dict.fromkeys(self.edge_types) - x_dst_dict = dict.fromkeys(self.edge_types) - - for ntype, t in x_fused_dict.items(): - n_src_rel = len(self.relations_per_ntype[ntype][0]) - n_dst_rel = len(self.relations_per_ntype[ntype][1]) - n_rel = n_src_rel + n_dst_rel - t_list = torch.chunk(t, chunks=n_rel, dim=dim) - - for i, src_rel in enumerate(self.relations_per_ntype[ntype][0]): - x_src_dict[src_rel] = t_list[i] - - for i, dst_rel in enumerate(self.relations_per_ntype[ntype][1]): - x_dst_dict[dst_rel] = t_list[i + n_src_rel] - - return x_src_dict, x_dst_dict - - def reset_parameters(self, seed: Optional[int] = None): - if seed is not None: - torch.manual_seed(seed) - - w_src, w_dst = self.split_tensors(self.lin_weights, dim=0) - - for edge_type in self.edge_types: - src_type, _, dst_type = edge_type - - # lin_src - torch_geometric.nn.inits.glorot(w_src[edge_type]) - - # lin_dst - if src_type != dst_type: - torch_geometric.nn.inits.glorot(w_dst[edge_type]) - - # attn_weights - torch_geometric.nn.inits.glorot( - self.attn_weights[edge_type].view(-1, self.num_heads, self.out_channels) - ) - - # bias - if self.bias is not None: - torch_geometric.nn.inits.zeros(self.bias[edge_type]) - - def forward( - self, - x_dict: dict[str, torch.Tensor], - edge_index_dict: dict[tuple[str, str, str], torch.Tensor], - ) -> dict[str, torch.Tensor]: - feat_dict = dict.fromkeys(x_dict.keys()) - - for ntype, x in x_dict.items(): - feat_dict[ntype] = x @ self.lin_weights[ntype].T - - x_src_dict, x_dst_dict = self.split_tensors(feat_dict, dim=1) - - out_dict = defaultdict(list) - - for edge_type, edge_index in edge_index_dict.items(): - src_type, _, dst_type = edge_type - - csc = BaseConv.to_csc( - edge_index, (x_dict[src_type].size(0), x_dict[dst_type].size(0)) - ) - - if src_type == dst_type: - graph, _ = self.get_cugraph( - csc, - bipartite=False, - ) - out = mha_gat_n2n( - x_src_dict[edge_type], - self.attn_weights[edge_type], - graph, - num_heads=self.num_heads, - activation="LeakyReLU", - negative_slope=self.negative_slope, - concat_heads=self.concat_heads, - ) - - else: - graph, _ = self.get_cugraph( - csc, - bipartite=True, - ) - out = mha_gat_n2n( - (x_src_dict[edge_type], x_dst_dict[edge_type]), - self.attn_weights[edge_type], - graph, - num_heads=self.num_heads, - activation="LeakyReLU", - negative_slope=self.negative_slope, - concat_heads=self.concat_heads, - ) - - if self.bias is not None: - out = out + self.bias[edge_type] - - out_dict[dst_type].append(out) - - for key, value in out_dict.items(): - out_dict[key] = torch_geometric.nn.conv.hetero_conv.group(value, self.aggr) - - return out_dict diff --git a/python/cugraph-pyg/cugraph_pyg/nn/conv/rgcn_conv.py b/python/cugraph-pyg/cugraph_pyg/nn/conv/rgcn_conv.py deleted file mode 100644 index 13fa08d..0000000 --- a/python/cugraph-pyg/cugraph_pyg/nn/conv/rgcn_conv.py +++ /dev/null @@ -1,144 +0,0 @@ -# Copyright (c) 2023-2024, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from typing import Optional, Union - -from cugraph.utilities.utils import import_optional -from pylibcugraphops.pytorch.operators import agg_hg_basis_n2n_post - -from .base import BaseConv, CSC - -torch = import_optional("torch") -torch_geometric = import_optional("torch_geometric") - - -class RGCNConv(BaseConv): # pragma: no cover - r"""The relational graph convolutional operator from the `"Modeling - Relational Data with Graph Convolutional Networks" - `_ paper. - - .. math:: - \mathbf{x}^{\prime}_i = \mathbf{\Theta}_{\textrm{root}} \cdot - \mathbf{x}_i + \sum_{r \in \mathcal{R}} \sum_{j \in \mathcal{N}_r(i)} - \frac{1}{|\mathcal{N}_r(i)|} \mathbf{\Theta}_r \cdot \mathbf{x}_j, - - where :math:`\mathcal{R}` denotes the set of relations, *i.e.* edge types. - Edge type needs to be a one-dimensional :obj:`torch.long` tensor which - stores a relation identifier - :math:`\in \{ 0, \ldots, |\mathcal{R}| - 1\}` for each edge. - - Args: - in_channels (int): Size of each input sample. - out_channels (int): Size of each output sample. - num_relations (int): Number of relations. - num_bases (int, optional): If set, this layer will use the - basis-decomposition regularization scheme where :obj:`num_bases` - denotes the number of bases to use. (default: :obj:`None`) - aggr (str, optional): The aggregation scheme to use - (:obj:`"add"`, :obj:`"mean"`, :obj:`"sum"`). - (default: :obj:`"mean"`) - root_weight (bool, optional): If set to :obj:`False`, the layer will - not add transformed root node features to the output. - (default: :obj:`True`) - bias (bool, optional): If set to :obj:`False`, the layer will not learn - an additive bias. (default: :obj:`True`) - """ - - def __init__( - self, - in_channels: int, - out_channels: int, - num_relations: int, - num_bases: Optional[int] = None, - aggr: str = "mean", - root_weight: bool = True, - bias: bool = True, - ): - super().__init__() - - if aggr not in ["mean", "sum", "add"]: - raise ValueError( - f"Aggregation function must be chosen from 'mean', 'sum' or " - f"'add', but got '{aggr}'." - ) - - self.in_channels = in_channels - self.out_channels = out_channels - self.num_relations = num_relations - self.num_bases = num_bases - self.aggr = aggr - self.root_weight = root_weight - - dim_root_weight = 1 if root_weight else 0 - - if num_bases is not None: - self.weight = torch.nn.Parameter( - torch.empty(num_bases + dim_root_weight, in_channels, out_channels) - ) - self.comp = torch.nn.Parameter(torch.empty(num_relations, num_bases)) - else: - self.weight = torch.nn.Parameter( - torch.empty(num_relations + dim_root_weight, in_channels, out_channels) - ) - self.register_parameter("comp", None) - - if bias: - self.bias = torch.nn.Parameter(torch.empty(out_channels)) - else: - self.register_parameter("bias", None) - - self.reset_parameters() - - def reset_parameters(self): - end = -1 if self.root_weight else None - torch_geometric.nn.inits.glorot(self.weight[:end]) - torch_geometric.nn.inits.glorot(self.comp) - if self.root_weight: - torch_geometric.nn.inits.glorot(self.weight[-1]) - torch_geometric.nn.inits.zeros(self.bias) - - def forward( - self, - x: torch.Tensor, - edge_index: Union[torch_geometric.EdgeIndex, CSC], - edge_type: torch.Tensor, - max_num_neighbors: Optional[int] = None, - ) -> torch.Tensor: - - graph, _ = self.get_typed_cugraph( - edge_index, - edge_type, - self.num_relations, - max_num_neighbors=max_num_neighbors, - ) - - out = agg_hg_basis_n2n_post( - x, - self.comp, - graph, - concat_own=self.root_weight, - norm_by_out_degree=bool(self.aggr == "mean"), - ) - - out = out @ self.weight.view(-1, self.out_channels) - - if self.bias is not None: - out = out + self.bias - - return out - - def __repr__(self) -> str: - return ( - f"{self.__class__.__name__}({self.in_channels}, " - f"{self.out_channels}, num_relations={self.num_relations})" - ) diff --git a/python/cugraph-pyg/cugraph_pyg/nn/conv/sage_conv.py b/python/cugraph-pyg/cugraph_pyg/nn/conv/sage_conv.py deleted file mode 100644 index 65dc99d..0000000 --- a/python/cugraph-pyg/cugraph_pyg/nn/conv/sage_conv.py +++ /dev/null @@ -1,151 +0,0 @@ -# Copyright (c) 2023-2024, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from typing import Optional, Tuple, Union - -from cugraph.utilities.utils import import_optional -from pylibcugraphops.pytorch.operators import agg_concat_n2n - -from .base import BaseConv, CSC - -torch = import_optional("torch") -torch_geometric = import_optional("torch_geometric") - - -class SAGEConv(BaseConv): - r"""The GraphSAGE operator from the `"Inductive Representation Learning on - Large Graphs" `_ paper. - - .. math:: - \mathbf{x}^{\prime}_i = \mathbf{W}_1 \mathbf{x}_i + \mathbf{W}_2 \cdot - \mathrm{mean}_{j \in \mathcal{N(i)}} \mathbf{x}_j - - If :obj:`project = True`, then :math:`\mathbf{x}_j` will first get - projected via - - .. math:: - \mathbf{x}_j \leftarrow \sigma ( \mathbf{W}_3 \mathbf{x}_j + - \mathbf{b}) - - as described in Eq. (3) of the paper. - - Args: - in_channels (int or tuple): Size of each input sample. A tuple - corresponds to the sizes of source and target dimensionalities. - out_channels (int): Size of each output sample. - aggr (str or Aggregation, optional): The aggregation scheme to use. - Choose from :obj:`"mean"`, :obj:`"sum"`, :obj:`"min"` or - :obj:`"max"`. (default: :obj:`"mean"`) - normalize (bool, optional): If set to :obj:`True`, output features - will be :math:`\ell_2`-normalized, *i.e.*, - :math:`\frac{\mathbf{h}_i^{k+1}} - {\| \mathbf{h}_i^{k+1} \|_2}`. - (default: :obj:`False`) - root_weight (bool, optional): If set to :obj:`False`, the layer will - not add transformed root node features to the output. - (default: :obj:`True`) - project (bool, optional): If set to :obj:`True`, the layer will apply a - linear transformation followed by an activation function before - aggregation (as described in Eq. (3) of the paper). - (default: :obj:`False`) - bias (bool, optional): If set to :obj:`False`, the layer will not learn - an additive bias. (default: :obj:`True`) - """ - - def __init__( - self, - in_channels: Union[int, Tuple[int, int]], - out_channels: int, - aggr: str = "mean", - normalize: bool = False, - root_weight: bool = True, - project: bool = False, - bias: bool = True, - ): - super().__init__() - - if aggr not in ["mean", "sum", "min", "max"]: - raise ValueError( - f"Aggregation function must be chosen from 'mean'," - f" 'sum', 'min' or 'max', but got '{aggr}'." - ) - - self.in_channels = in_channels - self.out_channels = out_channels - self.aggr = aggr - self.normalize = normalize - self.root_weight = root_weight - self.project = project - - if isinstance(in_channels, int): - self.in_channels_src = self.in_channels_dst = in_channels - else: - self.in_channels_src, self.in_channels_dst = in_channels - - if self.project: - self.pre_lin = torch_geometric.nn.Linear( - self.in_channels_src, self.in_channels_src, bias=True - ) - - if self.root_weight: - self.lin = torch_geometric.nn.Linear( - self.in_channels_src + self.in_channels_dst, out_channels, bias=bias - ) - else: - self.lin = torch_geometric.nn.Linear( - self.in_channels_src, out_channels, bias=bias - ) - - self.reset_parameters() - - def reset_parameters(self): - if self.project: - self.pre_lin.reset_parameters() - self.lin.reset_parameters() - - def forward( - self, - x: Union[torch.Tensor, Tuple[torch.Tensor, torch.Tensor]], - edge_index: Union[torch_geometric.EdgeIndex, CSC], - max_num_neighbors: Optional[int] = None, - ) -> torch.Tensor: - bipartite = isinstance(x, Tuple) - graph, _ = self.get_cugraph( - edge_index=edge_index, - bipartite=bipartite, - max_num_neighbors=max_num_neighbors, - ) - - if self.project: - if bipartite: - x = (self.pre_lin(x[0]).relu(), x[1]) - else: - x = self.pre_lin(x).relu() - - out = agg_concat_n2n(x, graph, self.aggr) - - if self.root_weight: - out = self.lin(out) - else: - out = self.lin(out[:, : self.in_channels_src]) - - if self.normalize: - out = torch.nn.functional.normalize(out, p=2.0, dim=-1) - - return out - - def __repr__(self) -> str: - return ( - f"{self.__class__.__name__}({self.in_channels}, " - f"{self.out_channels}, aggr={self.aggr})" - ) diff --git a/python/cugraph-pyg/cugraph_pyg/nn/conv/transformer_conv.py b/python/cugraph-pyg/cugraph_pyg/nn/conv/transformer_conv.py deleted file mode 100644 index e184ee0..0000000 --- a/python/cugraph-pyg/cugraph_pyg/nn/conv/transformer_conv.py +++ /dev/null @@ -1,214 +0,0 @@ -# Copyright (c) 2023-2024, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from typing import Optional, Tuple, Union - -from cugraph.utilities.utils import import_optional -from pylibcugraphops.pytorch.operators import mha_simple_n2n - -from .base import BaseConv, CSC - -torch = import_optional("torch") -nn = import_optional("torch.nn") -torch_geometric = import_optional("torch_geometric") - - -class TransformerConv(BaseConv): - r"""The graph transformer operator from the `"Masked Label Prediction: - Unified Message Passing Model for Semi-Supervised Classification" - `_ paper. - - .. math:: - \mathbf{x}^{\prime}_i = \mathbf{W}_1 \mathbf{x}_i + - \sum_{j \in \mathcal{N}(i)} \alpha_{i,j} \mathbf{W}_2 \mathbf{x}_{j}, - - where the attention coefficients :math:`\alpha_{i,j}` are computed via - multi-head dot product attention: - - .. math:: - \alpha_{i,j} = \textrm{softmax} \left( - \frac{(\mathbf{W}_3\mathbf{x}_i)^{\top} (\mathbf{W}_4\mathbf{x}_j)} - {\sqrt{d}} \right) - - Args: - in_channels (int or tuple): Size of each input sample, or :obj:`-1` to - derive the size from the first input(s) to the forward method. - A tuple corresponds to the sizes of source and target - dimensionalities. - out_channels (int): Size of each output sample. - heads (int, optional): Number of multi-head-attentions. - (default: :obj:`1`) - concat (bool, optional): If set to :obj:`False`, the multi-head - attentions are averaged instead of concatenated. - (default: :obj:`True`) - beta (bool, optional): If set, will combine aggregation and - skip information via - - .. math:: - \mathbf{x}^{\prime}_i = \beta_i \mathbf{W}_1 \mathbf{x}_i + - (1 - \beta_i) \underbrace{\left(\sum_{j \in \mathcal{N}(i)} - \alpha_{i,j} \mathbf{W}_2 \vec{x}_j \right)}_{=\mathbf{m}_i} - - with :math:`\beta_i = \textrm{sigmoid}(\mathbf{w}_5^{\top} - [ \mathbf{W}_1 \mathbf{x}_i, \mathbf{m}_i, \mathbf{W}_1 - \mathbf{x}_i - \mathbf{m}_i ])` (default: :obj:`False`) - edge_dim (int, optional): Edge feature dimensionality (in case - there are any). Edge features are added to the keys after - linear transformation, that is, prior to computing the - attention dot product. They are also added to final values - after the same linear transformation. The model is: - - .. math:: - \mathbf{x}^{\prime}_i = \mathbf{W}_1 \mathbf{x}_i + - \sum_{j \in \mathcal{N}(i)} \alpha_{i,j} \left( - \mathbf{W}_2 \mathbf{x}_{j} + \mathbf{W}_6 \mathbf{e}_{ij} - \right), - - where the attention coefficients :math:`\alpha_{i,j}` are now - computed via: - - .. math:: - \alpha_{i,j} = \textrm{softmax} \left( - \frac{(\mathbf{W}_3\mathbf{x}_i)^{\top} - (\mathbf{W}_4\mathbf{x}_j + \mathbf{W}_6 \mathbf{e}_{ij})} - {\sqrt{d}} \right) - - (default :obj:`None`) - bias (bool, optional): If set to :obj:`False`, the layer will not learn - an additive bias. (default: :obj:`True`) - root_weight (bool, optional): If set to :obj:`False`, the layer will - not add the transformed root node features to the output and the - option :attr:`beta` is set to :obj:`False`. (default: :obj:`True`) - """ - - def __init__( - self, - in_channels: Union[int, Tuple[int, int]], - out_channels: int, - heads: int = 1, - concat: bool = True, - beta: bool = False, - edge_dim: Optional[int] = None, - bias: bool = True, - root_weight: bool = True, - ): - super().__init__() - - self.in_channels = in_channels - self.out_channels = out_channels - self.heads = heads - self.beta = beta and root_weight - self.root_weight = root_weight - self.concat = concat - self.edge_dim = edge_dim - - if isinstance(in_channels, int): - in_channels = (in_channels, in_channels) - - Linear = torch_geometric.nn.Linear - - self.lin_key = Linear(in_channels[0], heads * out_channels) - self.lin_query = Linear(in_channels[1], heads * out_channels) - self.lin_value = Linear(in_channels[0], heads * out_channels) - if edge_dim is not None: - self.lin_edge = Linear(edge_dim, heads * out_channels, bias=False) - else: - self.lin_edge = self.register_parameter("lin_edge", None) - - if concat: - self.lin_skip = Linear(in_channels[1], heads * out_channels, bias=bias) - if self.beta: - self.lin_beta = Linear(3 * heads * out_channels, 1, bias=False) - else: - self.lin_beta = self.register_parameter("lin_beta", None) - else: - self.lin_skip = Linear(in_channels[1], out_channels, bias=bias) - if self.beta: - self.lin_beta = Linear(3 * out_channels, 1, bias=False) - else: - self.lin_beta = self.register_parameter("lin_beta", None) - - self.reset_parameters() - - def reset_parameters(self): - self.lin_key.reset_parameters() - self.lin_query.reset_parameters() - self.lin_value.reset_parameters() - if self.lin_edge is not None: - self.lin_edge.reset_parameters() - self.lin_skip.reset_parameters() - if self.lin_beta is not None: - self.lin_beta.reset_parameters() - - def forward( - self, - x: Union[torch.Tensor, Tuple[torch.Tensor, torch.Tensor]], - edge_index: Union[torch_geometric.EdgeIndex, CSC], - edge_attr: Optional[torch.Tensor] = None, - ) -> torch.Tensor: - r"""Runs the forward pass of the module. - - Args: - x (torch.Tensor or tuple): The node features. Can be a tuple of - tensors denoting source and destination node features. - edge_index (EdgeIndex or CSC): The edge indices. - edge_attr: (torch.Tensor, optional) The edge features. - """ - bipartite = True - graph, perm = self.get_cugraph(edge_index=edge_index, bipartite=bipartite) - - if isinstance(x, torch.Tensor): - x = (x, x) - - query = self.lin_query(x[1]) - key = self.lin_key(x[0]) - value = self.lin_value(x[0]) - - if edge_attr is not None: - if self.lin_edge is None: - raise RuntimeError( - f"{self.__class__.__name__}.edge_dim must be set to accept " - f"edge features." - ) - if perm is not None: - edge_attr = edge_attr[perm] - edge_attr = self.lin_edge(edge_attr) - - out = mha_simple_n2n( - key, - query, - value, - graph, - self.heads, - self.concat, - edge_emb=edge_attr, - norm_by_dim=True, - score_bias=None, - ) - - if self.root_weight: - x_r = self.lin_skip(x[1]) - if self.lin_beta is not None: - beta = self.lin_beta(torch.cat([out, x_r, out - x_r], dim=-1)) - beta = beta.sigmoid() - out = beta * x_r + (1 - beta) * out - else: - out = out + x_r - - return out - - def __repr__(self) -> str: - return ( - f"{self.__class__.__name__}({self.in_channels}, " - f"{self.out_channels}, heads={self.heads})" - ) diff --git a/python/cugraph-pyg/cugraph_pyg/tests/conftest.py b/python/cugraph-pyg/cugraph_pyg/tests/conftest.py index 7b2f306..b42fb48 100644 --- a/python/cugraph-pyg/cugraph_pyg/tests/conftest.py +++ b/python/cugraph-pyg/cugraph_pyg/tests/conftest.py @@ -1,4 +1,4 @@ -# Copyright (c) 2021-2025, NVIDIA CORPORATION. +# Copyright (c) 2021-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -43,12 +43,6 @@ gpubenchmark = pytest_benchmark.plugin.benchmark -def pytest_ignore_collect(collection_path, config): - """Return True to prevent considering this path for collection.""" - if "nn" in collection_path.name: - return True - - @pytest.fixture(scope="module") def dask_client(): dask_scheduler_file = os.environ.get("SCHEDULER_FILE") diff --git a/python/cugraph-pyg/cugraph_pyg/tests/nn/test_gat_conv.py b/python/cugraph-pyg/cugraph_pyg/tests/nn/test_gat_conv.py deleted file mode 100644 index 92d216f..0000000 --- a/python/cugraph-pyg/cugraph_pyg/tests/nn/test_gat_conv.py +++ /dev/null @@ -1,141 +0,0 @@ -# Copyright (c) 2023-2024, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import pytest - -from cugraph_pyg.nn import GATConv as CuGraphGATConv -from cugraph_pyg.utils.imports import package_available - -ATOL = 1e-6 - - -@pytest.mark.skipif( - package_available("torch_geometric<2.5"), reason="Test requires pyg>=2.5" -) -@pytest.mark.parametrize("use_edge_index", [True, False]) -@pytest.mark.parametrize("bias", [True, False]) -@pytest.mark.parametrize("bipartite", [True, False]) -@pytest.mark.parametrize("concat", [True, False]) -@pytest.mark.parametrize("heads", [1, 2, 3, 5, 10, 16]) -@pytest.mark.parametrize("max_num_neighbors", [8, None]) -@pytest.mark.parametrize("use_edge_attr", [True, False]) -@pytest.mark.parametrize("graph", ["basic_pyg_graph_1", "basic_pyg_graph_2"]) -@pytest.mark.sg -def test_gat_conv_equality( - use_edge_index, - bias, - bipartite, - concat, - heads, - max_num_neighbors, - use_edge_attr, - graph, - request, -): - import torch - from torch_geometric import EdgeIndex - from torch_geometric.nn import GATConv - - torch.manual_seed(12345) - edge_index, size = request.getfixturevalue(graph) - edge_index = edge_index.cuda() - - if bipartite: - in_channels = (5, 3) - x = ( - torch.rand(size[0], in_channels[0]).cuda(), - torch.rand(size[1], in_channels[1]).cuda(), - ) - else: - in_channels = 5 - x = torch.rand(size[0], in_channels).cuda() - out_channels = 2 - - if use_edge_attr: - edge_dim = 3 - edge_attr = torch.rand(edge_index.size(1), edge_dim).cuda() - else: - edge_dim = edge_attr = None - - if use_edge_index: - csc = EdgeIndex(edge_index, sparse_size=size) - else: - if use_edge_attr: - csc, edge_attr_perm = CuGraphGATConv.to_csc( - edge_index, size, edge_attr=edge_attr - ) - else: - csc = CuGraphGATConv.to_csc(edge_index, size) - edge_attr_perm = None - - kwargs = dict(bias=bias, concat=concat, edge_dim=edge_dim) - - conv1 = GATConv( - in_channels, out_channels, heads, add_self_loops=False, **kwargs - ).cuda() - conv2 = CuGraphGATConv(in_channels, out_channels, heads, **kwargs).cuda() - - out_dim = heads * out_channels - with torch.no_grad(): - if bipartite: - conv2.lin_src.weight.copy_(conv1.lin_src.weight) - conv2.lin_dst.weight.copy_(conv1.lin_dst.weight) - else: - conv2.lin.weight.copy_(conv1.lin.weight) - - conv2.att[:out_dim].copy_(conv1.att_src.flatten()) - conv2.att[out_dim : 2 * out_dim].copy_(conv1.att_dst.flatten()) - if use_edge_attr: - conv2.att[2 * out_dim :].copy_(conv1.att_edge.flatten()) - conv2.lin_edge.weight.copy_(conv1.lin_edge.weight) - - out1 = conv1(x, edge_index, edge_attr=edge_attr) - if use_edge_index: - out2 = conv2(x, csc, edge_attr=edge_attr, max_num_neighbors=max_num_neighbors) - else: - out2 = conv2( - x, csc, edge_attr=edge_attr_perm, max_num_neighbors=max_num_neighbors - ) - assert torch.allclose(out1, out2, atol=ATOL) - - grad_output = torch.rand_like(out1) - out1.backward(grad_output) - out2.backward(grad_output) - - if bipartite: - assert torch.allclose( - conv1.lin_src.weight.grad, conv2.lin_src.weight.grad, atol=ATOL - ) - assert torch.allclose( - conv1.lin_dst.weight.grad, conv2.lin_dst.weight.grad, atol=ATOL - ) - else: - assert torch.allclose(conv1.lin.weight.grad, conv2.lin.weight.grad, atol=ATOL) - - assert torch.allclose( - conv1.att_src.grad.flatten(), conv2.att.grad[:out_dim], atol=ATOL - ) - assert torch.allclose( - conv1.att_dst.grad.flatten(), conv2.att.grad[out_dim : 2 * out_dim], atol=ATOL - ) - - if use_edge_attr: - assert torch.allclose( - conv1.att_edge.grad.flatten(), conv2.att.grad[2 * out_dim :], atol=ATOL - ) - assert torch.allclose( - conv1.lin_edge.weight.grad, conv2.lin_edge.weight.grad, atol=ATOL - ) - - if bias: - assert torch.allclose(conv1.bias.grad, conv2.bias.grad, atol=ATOL) diff --git a/python/cugraph-pyg/cugraph_pyg/tests/nn/test_gatv2_conv.py b/python/cugraph-pyg/cugraph_pyg/tests/nn/test_gatv2_conv.py deleted file mode 100644 index 2e22192..0000000 --- a/python/cugraph-pyg/cugraph_pyg/tests/nn/test_gatv2_conv.py +++ /dev/null @@ -1,101 +0,0 @@ -# Copyright (c) 2023-2024, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import pytest - -from cugraph_pyg.nn import GATv2Conv as CuGraphGATv2Conv - -ATOL = 1e-6 - - -@pytest.mark.parametrize("use_edge_index", [True, False]) -@pytest.mark.parametrize("bipartite", [True, False]) -@pytest.mark.parametrize("concat", [True, False]) -@pytest.mark.parametrize("heads", [1, 2, 3, 5, 10, 16]) -@pytest.mark.parametrize("use_edge_attr", [True, False]) -@pytest.mark.parametrize("graph", ["basic_pyg_graph_1", "basic_pyg_graph_2"]) -@pytest.mark.sg -def test_gatv2_conv_equality( - use_edge_index, bipartite, concat, heads, use_edge_attr, graph, request -): - pytest.importorskip("torch_geometric", reason="PyG not available") - import torch - from torch_geometric import EdgeIndex - from torch_geometric.nn import GATv2Conv - - torch.manual_seed(12345) - edge_index, size = request.getfixturevalue(graph) - edge_index = edge_index.cuda() - - if bipartite: - in_channels = (5, 3) - x = ( - torch.rand(size[0], in_channels[0]).cuda(), - torch.rand(size[1], in_channels[1]).cuda(), - ) - else: - in_channels = 5 - x = torch.rand(size[0], in_channels).cuda() - out_channels = 2 - - if use_edge_attr: - edge_dim = 3 - edge_attr = torch.rand(edge_index.size(1), edge_dim).cuda() - else: - edge_dim = edge_attr = None - - if use_edge_index: - csc = EdgeIndex(edge_index, sparse_size=size) - else: - if use_edge_attr: - csc, edge_attr_perm = CuGraphGATv2Conv.to_csc( - edge_index, size, edge_attr=edge_attr - ) - else: - csc = CuGraphGATv2Conv.to_csc(edge_index, size) - edge_attr_perm = None - - kwargs = dict(bias=False, concat=concat, edge_dim=edge_dim) - - conv1 = GATv2Conv( - in_channels, out_channels, heads, add_self_loops=False, **kwargs - ).cuda() - conv2 = CuGraphGATv2Conv(in_channels, out_channels, heads, **kwargs).cuda() - - with torch.no_grad(): - conv2.lin_src.weight.copy_(conv1.lin_l.weight) - conv2.lin_dst.weight.copy_(conv1.lin_r.weight) - conv2.att.copy_(conv1.att.flatten()) - if use_edge_attr: - conv2.lin_edge.weight.copy_(conv1.lin_edge.weight) - - out1 = conv1(x, edge_index, edge_attr=edge_attr) - if use_edge_index: - out2 = conv2(x, csc, edge_attr=edge_attr) - else: - out2 = conv2(x, csc, edge_attr=edge_attr_perm) - assert torch.allclose(out1, out2, atol=ATOL) - - grad_output = torch.rand_like(out1) - out1.backward(grad_output) - out2.backward(grad_output) - - assert torch.allclose(conv1.lin_l.weight.grad, conv2.lin_src.weight.grad, atol=ATOL) - assert torch.allclose(conv1.lin_r.weight.grad, conv2.lin_dst.weight.grad, atol=ATOL) - - assert torch.allclose(conv1.att.grad.flatten(), conv2.att.grad, atol=ATOL) - - if use_edge_attr: - assert torch.allclose( - conv1.lin_edge.weight.grad, conv2.lin_edge.weight.grad, atol=ATOL - ) diff --git a/python/cugraph-pyg/cugraph_pyg/tests/nn/test_hetero_gat_conv.py b/python/cugraph-pyg/cugraph_pyg/tests/nn/test_hetero_gat_conv.py deleted file mode 100644 index f182869..0000000 --- a/python/cugraph-pyg/cugraph_pyg/tests/nn/test_hetero_gat_conv.py +++ /dev/null @@ -1,132 +0,0 @@ -# Copyright (c) 2023-2024, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import pytest - -from cugraph_pyg.nn import HeteroGATConv as CuGraphHeteroGATConv -from cugraph_pyg.utils.imports import package_available - -ATOL = 1e-6 - - -@pytest.mark.cugraph_ops -@pytest.mark.skipif( - package_available("torch_geometric<2.4"), reason="Test requires pyg>=2.4" -) -@pytest.mark.parametrize("heads", [1, 3, 10]) -@pytest.mark.parametrize("aggr", ["sum", "mean"]) -@pytest.mark.sg -def test_hetero_gat_conv_equality(sample_pyg_hetero_data, aggr, heads): - import torch - from torch_geometric.data import HeteroData - from torch_geometric.nn import HeteroConv, GATConv - - device = torch.device("cuda") - data = HeteroData(sample_pyg_hetero_data).to(device) - - in_channels_dict = {k: v.size(1) for k, v in data.x_dict.items()} - out_channels = 2 - - convs_dict = {} - kwargs1 = dict(heads=heads, add_self_loops=False, bias=False) - for edge_type in data.edge_types: - src_t, _, dst_t = edge_type - in_channels_src, in_channels_dst = data.x_dict[src_t].size(-1), data.x_dict[ - dst_t - ].size(-1) - if src_t == dst_t: - convs_dict[edge_type] = GATConv(in_channels_src, out_channels, **kwargs1) - else: - convs_dict[edge_type] = GATConv( - (in_channels_src, in_channels_dst), out_channels, **kwargs1 - ) - - conv1 = HeteroConv(convs_dict, aggr=aggr).to(device) - kwargs2 = dict( - heads=heads, - aggr=aggr, - node_types=data.node_types, - edge_types=data.edge_types, - bias=False, - ) - conv2 = CuGraphHeteroGATConv(in_channels_dict, out_channels, **kwargs2).to(device) - - # copy over linear and attention weights - w_src, w_dst = conv2.split_tensors(conv2.lin_weights, dim=0) - with torch.no_grad(): - for edge_type in conv2.edge_types: - src_t, _, dst_t = edge_type - if src_t == dst_t: - w_src[edge_type].copy_(conv1.convs[edge_type].lin.weight) - else: - w_src[edge_type].copy_(conv1.convs[edge_type].lin_src.weight) - if w_dst[edge_type] is not None: - w_dst[edge_type].copy_(conv1.convs[edge_type].lin_dst.weight) - - conv2.attn_weights[edge_type][: heads * out_channels].copy_( - conv1.convs[edge_type].att_src.flatten() - ) - conv2.attn_weights[edge_type][heads * out_channels :].copy_( - conv1.convs[edge_type].att_dst.flatten() - ) - - out1 = conv1(data.x_dict, data.edge_index_dict) - out2 = conv2(data.x_dict, data.edge_index_dict) - - for node_type in data.node_types: - assert torch.allclose(out1[node_type], out2[node_type], atol=ATOL) - - loss1 = 0 - loss2 = 0 - for node_type in data.node_types: - loss1 += out1[node_type].mean() - loss2 += out2[node_type].mean() - - loss1.backward() - loss2.backward() - - # check gradient w.r.t attention weights - out_dim = heads * out_channels - for edge_type in conv2.edge_types: - assert torch.allclose( - conv1.convs[edge_type].att_src.grad.flatten(), - conv2.attn_weights[edge_type].grad[:out_dim], - atol=ATOL, - ) - assert torch.allclose( - conv1.convs[edge_type].att_dst.grad.flatten(), - conv2.attn_weights[edge_type].grad[out_dim:], - atol=ATOL, - ) - - # check gradient w.r.t linear weights - grad_lin_weights_ref = dict.fromkeys(out1.keys()) - for node_t, (rels_as_src, rels_as_dst) in conv2.relations_per_ntype.items(): - grad_list = [] - for rel_t in rels_as_src: - src_type, _, dst_type = rel_t - if src_type == dst_type: - grad_list.append(conv1.convs[rel_t].lin.weight.grad.clone()) - else: - grad_list.append(conv1.convs[rel_t].lin_src.weight.grad.clone()) - for rel_t in rels_as_dst: - grad_list.append(conv1.convs[rel_t].lin_dst.weight.grad.clone()) - assert len(grad_list) > 0 - grad_lin_weights_ref[node_t] = torch.vstack(grad_list) - - for node_type in conv2.lin_weights: - assert torch.allclose( - grad_lin_weights_ref[node_type], - conv2.lin_weights[node_type].grad, - atol=ATOL, - ) diff --git a/python/cugraph-pyg/cugraph_pyg/tests/nn/test_rgcn_conv.py b/python/cugraph-pyg/cugraph_pyg/tests/nn/test_rgcn_conv.py deleted file mode 100644 index 8b06cb2..0000000 --- a/python/cugraph-pyg/cugraph_pyg/tests/nn/test_rgcn_conv.py +++ /dev/null @@ -1,89 +0,0 @@ -# Copyright (c) 2023-2024, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import pytest - -from cugraph_pyg.nn import RGCNConv as CuGraphRGCNConv - -ATOL = 1e-6 - - -@pytest.mark.parametrize("use_edge_index", [True, False]) -@pytest.mark.parametrize("aggr", ["add", "sum", "mean"]) -@pytest.mark.parametrize("bias", [True, False]) -@pytest.mark.parametrize("max_num_neighbors", [8, None]) -@pytest.mark.parametrize("num_bases", [1, 2, None]) -@pytest.mark.parametrize("root_weight", [True, False]) -@pytest.mark.parametrize("graph", ["basic_pyg_graph_1", "basic_pyg_graph_2"]) -@pytest.mark.sg -def test_rgcn_conv_equality( - use_edge_index, - aggr, - bias, - max_num_neighbors, - num_bases, - root_weight, - graph, - request, -): - pytest.importorskip("torch_geometric", reason="PyG not available") - import torch - from torch_geometric import EdgeIndex - from torch_geometric.nn import FastRGCNConv as RGCNConv - - torch.manual_seed(12345) - in_channels, out_channels, num_relations = (4, 2, 3) - kwargs = dict(aggr=aggr, bias=bias, num_bases=num_bases, root_weight=root_weight) - - edge_index, size = request.getfixturevalue(graph) - edge_index = edge_index.cuda() - edge_type = torch.randint(num_relations, (edge_index.size(1),)).cuda() - - if use_edge_index: - csc = EdgeIndex(edge_index, sparse_size=size) - else: - csc, edge_type_perm = CuGraphRGCNConv.to_csc(edge_index, size, edge_type) - - x = torch.rand(size[0], in_channels, device="cuda") - - conv1 = RGCNConv(in_channels, out_channels, num_relations, **kwargs).cuda() - conv2 = CuGraphRGCNConv(in_channels, out_channels, num_relations, **kwargs).cuda() - - with torch.no_grad(): - if root_weight: - conv2.weight[:-1].copy_(conv1.weight) - conv2.weight[-1].copy_(conv1.root) - else: - conv2.weight.copy_(conv1.weight) - if num_bases is not None: - conv2.comp.copy_(conv1.comp) - - out1 = conv1(x, edge_index, edge_type) - if use_edge_index: - out2 = conv2(x, csc, edge_type) - else: - out2 = conv2(x, csc, edge_type_perm, max_num_neighbors=max_num_neighbors) - assert torch.allclose(out1, out2, atol=ATOL) - - grad_out = torch.rand_like(out1) - out1.backward(grad_out) - out2.backward(grad_out) - - if root_weight: - assert torch.allclose(conv1.weight.grad, conv2.weight.grad[:-1], atol=ATOL) - assert torch.allclose(conv1.root.grad, conv2.weight.grad[-1], atol=ATOL) - else: - assert torch.allclose(conv1.weight.grad, conv2.weight.grad, atol=ATOL) - - if num_bases is not None: - assert torch.allclose(conv1.comp.grad, conv2.comp.grad, atol=ATOL) diff --git a/python/cugraph-pyg/cugraph_pyg/tests/nn/test_sage_conv.py b/python/cugraph-pyg/cugraph_pyg/tests/nn/test_sage_conv.py deleted file mode 100644 index 878ceff..0000000 --- a/python/cugraph-pyg/cugraph_pyg/tests/nn/test_sage_conv.py +++ /dev/null @@ -1,105 +0,0 @@ -# Copyright (c) 2023-2024, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import pytest - -from cugraph_pyg.nn import SAGEConv as CuGraphSAGEConv - -ATOL = 1e-6 - - -@pytest.mark.parametrize("use_edge_index", [True, False]) -@pytest.mark.parametrize("aggr", ["sum", "mean", "min", "max"]) -@pytest.mark.parametrize("bias", [True, False]) -@pytest.mark.parametrize("bipartite", [True, False]) -@pytest.mark.parametrize("max_num_neighbors", [8, None]) -@pytest.mark.parametrize("normalize", [True, False]) -@pytest.mark.parametrize("root_weight", [True, False]) -@pytest.mark.parametrize("graph", ["basic_pyg_graph_1", "basic_pyg_graph_2"]) -@pytest.mark.sg -def test_sage_conv_equality( - use_edge_index, - aggr, - bias, - bipartite, - max_num_neighbors, - normalize, - root_weight, - graph, - request, -): - pytest.importorskip("torch_geometric", reason="PyG not available") - import torch - from torch_geometric import EdgeIndex - from torch_geometric.nn import SAGEConv - - torch.manual_seed(12345) - edge_index, size = request.getfixturevalue(graph) - edge_index = edge_index.cuda() - - if use_edge_index: - csc = EdgeIndex(edge_index, sparse_size=size) - else: - csc = CuGraphSAGEConv.to_csc(edge_index, size) - - if bipartite: - in_channels = (7, 3) - x = ( - torch.rand(size[0], in_channels[0]).cuda(), - torch.rand(size[1], in_channels[1]).cuda(), - ) - else: - in_channels = 5 - x = torch.rand(size[0], in_channels).cuda() - out_channels = 4 - - kwargs = dict(aggr=aggr, bias=bias, normalize=normalize, root_weight=root_weight) - - conv1 = SAGEConv(in_channels, out_channels, **kwargs).cuda() - conv2 = CuGraphSAGEConv(in_channels, out_channels, **kwargs).cuda() - - in_channels_src = conv2.in_channels_src - with torch.no_grad(): - conv2.lin.weight[:, :in_channels_src].copy_(conv1.lin_l.weight) - if root_weight: - conv2.lin.weight[:, in_channels_src:].copy_(conv1.lin_r.weight) - if bias: - conv2.lin.bias.copy_(conv1.lin_l.bias) - - out1 = conv1(x, edge_index) - out2 = conv2(x, csc, max_num_neighbors=max_num_neighbors) - assert torch.allclose(out1, out2, atol=ATOL) - - grad_out = torch.rand_like(out1) - out1.backward(grad_out) - out2.backward(grad_out) - - assert torch.allclose( - conv1.lin_l.weight.grad, - conv2.lin.weight.grad[:, :in_channels_src], - atol=ATOL, - ) - - if root_weight: - assert torch.allclose( - conv1.lin_r.weight.grad, - conv2.lin.weight.grad[:, in_channels_src:], - atol=ATOL, - ) - - if bias: - assert torch.allclose( - conv1.lin_l.bias.grad, - conv2.lin.bias.grad, - atol=ATOL, - ) diff --git a/python/cugraph-pyg/cugraph_pyg/tests/nn/test_transformer_conv.py b/python/cugraph-pyg/cugraph_pyg/tests/nn/test_transformer_conv.py deleted file mode 100644 index d207a4d..0000000 --- a/python/cugraph-pyg/cugraph_pyg/tests/nn/test_transformer_conv.py +++ /dev/null @@ -1,115 +0,0 @@ -# Copyright (c) 2023-2024, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import pytest - -from cugraph_pyg.nn import TransformerConv as CuGraphTransformerConv - -ATOL = 1e-6 - - -@pytest.mark.parametrize("use_edge_index", [True, False]) -@pytest.mark.parametrize("use_edge_attr", [True, False]) -@pytest.mark.parametrize("bipartite", [True, False]) -@pytest.mark.parametrize("concat", [True, False]) -@pytest.mark.parametrize("heads", [1, 2, 3, 5, 10, 16]) -@pytest.mark.parametrize("graph", ["basic_pyg_graph_1", "basic_pyg_graph_2"]) -@pytest.mark.sg -def test_transformer_conv_equality( - use_edge_index, use_edge_attr, bipartite, concat, heads, graph, request -): - pytest.importorskip("torch_geometric", reason="PyG not available") - import torch - from torch_geometric import EdgeIndex - from torch_geometric.nn import TransformerConv - - torch.manual_seed(12345) - edge_index, size = request.getfixturevalue(graph) - edge_index = edge_index.cuda() - - if bipartite: - in_channels = (5, 3) - x = ( - torch.rand(size[0], in_channels[0], device="cuda"), - torch.rand(size[1], in_channels[1], device="cuda"), - ) - else: - in_channels = 5 - x = torch.rand(size[0], in_channels, device="cuda") - out_channels = 2 - - if use_edge_attr: - edge_dim = 3 - edge_attr = torch.rand(edge_index.size(1), edge_dim).cuda() - else: - edge_dim = edge_attr = None - - if use_edge_index: - csc = EdgeIndex(edge_index, sparse_size=size) - else: - if use_edge_attr: - csc, edge_attr_perm = CuGraphTransformerConv.to_csc( - edge_index, size, edge_attr=edge_attr - ) - else: - csc = CuGraphTransformerConv.to_csc(edge_index, size) - edge_attr_perm = None - - kwargs = dict(concat=concat, bias=False, edge_dim=edge_dim, root_weight=False) - - conv1 = TransformerConv(in_channels, out_channels, heads, **kwargs).cuda() - conv2 = CuGraphTransformerConv(in_channels, out_channels, heads, **kwargs).cuda() - - with torch.no_grad(): - conv2.lin_query.weight.copy_(conv1.lin_query.weight) - conv2.lin_key.weight.copy_(conv1.lin_key.weight) - conv2.lin_value.weight.copy_(conv1.lin_value.weight) - conv2.lin_query.bias.copy_(conv1.lin_query.bias) - conv2.lin_key.bias.copy_(conv1.lin_key.bias) - conv2.lin_value.bias.copy_(conv1.lin_value.bias) - if use_edge_attr: - conv2.lin_edge.weight.copy_(conv1.lin_edge.weight) - - out1 = conv1(x, edge_index, edge_attr=edge_attr) - if use_edge_index: - out2 = conv2(x, csc, edge_attr=edge_attr) - else: - out2 = conv2(x, csc, edge_attr=edge_attr_perm) - - assert torch.allclose(out1, out2, atol=ATOL) - - grad_output = torch.rand_like(out1) - out1.backward(grad_output) - out2.backward(grad_output) - - assert torch.allclose( - conv1.lin_query.weight.grad, conv2.lin_query.weight.grad, atol=ATOL - ) - assert torch.allclose( - conv1.lin_key.weight.grad, conv2.lin_key.weight.grad, atol=ATOL - ) - assert torch.allclose( - conv1.lin_value.weight.grad, conv2.lin_value.weight.grad, atol=ATOL - ) - assert torch.allclose( - conv1.lin_query.bias.grad, conv2.lin_query.bias.grad, atol=ATOL - ) - assert torch.allclose(conv1.lin_key.bias.grad, conv2.lin_key.bias.grad, atol=ATOL) - assert torch.allclose( - conv1.lin_value.bias.grad, conv2.lin_value.bias.grad, atol=ATOL - ) - - if use_edge_attr: - assert torch.allclose( - conv1.lin_edge.weight.grad, conv2.lin_edge.weight.grad, atol=ATOL - )