Skip to content

Commit

Permalink
add multi column tests
Browse files Browse the repository at this point in the history
  • Loading branch information
jnke2016 committed Jan 14, 2025
1 parent b94bad2 commit af68c3f
Showing 1 changed file with 49 additions and 33 deletions.
82 changes: 49 additions & 33 deletions python/cugraph/cugraph/tests/sampling/test_uniform_random_walks.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import cudf
import cugraph
from cudf.testing import assert_series_equal
from cudf.testing.testing import assert_frame_equal
from cugraph.utilities import ensure_cugraph_obj_for_nx
from cugraph.testing import SMALL_DATASETS, DEFAULT_DATASETS, utils

Expand Down Expand Up @@ -104,25 +105,48 @@ def check_uniform_random_walks(G, path_data, seeds, max_depth):
total_depth = (max_depth) * len(seeds)

for i in range(total_depth):
vertex_1, vertex_2 = v_paths.iloc[i], v_paths.iloc[i + 1]
if isinstance(seeds, cudf.DataFrame):
vertex_1 = v_paths.iloc[[i]].reset_index(drop=True)
vertex_2 = v_paths.iloc[[i + 1]].reset_index(drop=True)
else:
vertex_1, vertex_2 = v_paths.iloc[i], v_paths.iloc[i + 1]

# Every max_depth'th vertex in 'v_paths' is a seed instead of
# 'seeds[i // (max_depth + 1)]', could have just pop the first element
# of the seeds array once there is a match and compare it to 'vertex_1'
if i % (max_depth + 1) == 0 and vertex_1 != seeds[i // (max_depth + 1)]:
invalid_seeds += 1
print(
"[ERR] Invalid seed: "
" src {} != src {}".format(vertex_1, seeds[i // (max_depth + 1)])
)

if i % (max_depth + 1) == 0:
if isinstance(seeds, cudf.DataFrame):
assert_frame_equal(
vertex_1.rename(columns={x:y for x,y in zip(vertex_1.columns,range(0,len(vertex_1.columns)))}),
seeds.iloc[[i // (max_depth + 1)]].reset_index(drop=True).rename(columns={x:y for x,y in zip(seeds.columns,range(0,len(seeds.columns)))}),
check_dtype=False, check_like=True)
else:
if i % (max_depth + 1) == 0 and vertex_1 != seeds[i // (max_depth + 1)]:
invalid_seeds += 1
print(
"[ERR] Invalid seed: "
" src {} != src {}".format(vertex_1, seeds[i // (max_depth + 1)])
)

if (i % (max_depth + 1)) != (max_depth):
# These are the edges
src = vertex_1
dst = vertex_2

if src != -1 and dst != -1:
# check for valid edge.

# check for valid edge.
if isinstance(seeds, cudf.DataFrame):
if (-1 not in src.iloc[0].reset_index(drop=True)) and (-1 not in dst.iloc[0].reset_index(drop=True)):
edge = cudf.DataFrame()
edge["src"] = vertex_1["0_vertex_paths"]
edge["src_0"] = vertex_1["1_vertex_paths"]
edge["dst"] = vertex_2["0_vertex_paths"]
edge["dst_0"] = vertex_2["1_vertex_paths"]

join1 = cudf.merge(df_G, edge, on=[*edge.columns])

assert len(cudf.merge(df_G, edge, on=[*edge.columns])) > 0
else:
edge = df_G.loc[
(df_G["src"] == (src)) & (df_G["dst"] == (dst))
].reset_index(drop=True)
Expand All @@ -148,11 +172,11 @@ def check_uniform_random_walks(G, path_data, seeds, max_depth):
)
)
invalid_edge_wgt += 1
e_wgt_idx += 1
e_wgt_idx += 1

if src != -1 and dst == -1:
# ensure there is no outgoing edges from 'src'
assert G.out_degree([src])["degree"].iloc[0] == 0
if src != -1 and dst == -1:
# ensure there is no outgoing edges from 'src'
assert G.out_degree([src])["degree"].iloc[0] == 0

assert invalid_seeds == 0
assert invalid_edge == 0
Expand Down Expand Up @@ -189,9 +213,6 @@ def test_uniform_random_walks(graph_file, directed):
input_graph, max_depth=max_depth
)

print("path_data = \n", path_data)
print("seeds = \n", seeds)

check_uniform_random_walks(input_graph, path_data, seeds, max_depth)


Expand Down Expand Up @@ -219,11 +240,10 @@ def test_uniform_random_walks_nx(graph_file):
check_uniform_random_walks(Gnx, path_data, seeds, max_depth)


#"""@pytest.mark.parametrize("graph_file", utils.DATASETS_SMALL)
@pytest.mark.sg
@pytest.mark.parametrize("graph_file", [SMALL_DATASETS[0]])
@pytest.mark.parametrize("directed", [DIRECTED_GRAPH_OPTIONS[0]])
def test_random_walks(
@pytest.mark.parametrize("graph_file", SMALL_DATASETS)
@pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS)
def test_random_walks_multi_column_seeds(
graph_file,
directed
):
Expand All @@ -243,16 +263,12 @@ def test_random_walks(
edge_attr="weight")

k = random.randint(1, 10)
#start_vertices = random.sample(G.nodes().to_numpy().tolist(), k)

start_vertices = G.select_random_vertices(num_vertices=k)

seeds = cudf.DataFrame()
seeds['v'] = start_vertices
print("seeds = \n", seeds)
seeds['v_0'] = seeds['v'] + 1000

df, offsets = cugraph.uniform-random_walks(G, seeds, max_depth)

check_uniform_random_walks(df, offsets, seeds, df_G)
#"""
seeds = G.select_random_vertices(num_vertices=k)
vertex_paths, edge_weights, vertex_path_sizes = cugraph.uniform_random_walks(
G, seeds, max_depth)

path_data = (vertex_paths, edge_weights, vertex_path_sizes)

check_uniform_random_walks(G, path_data, seeds, max_depth)

0 comments on commit af68c3f

Please sign in to comment.