From af68c3f22357e5ec97b7a9df137c10c0a4008576 Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Tue, 14 Jan 2025 09:22:47 -0800 Subject: [PATCH] add multi column tests --- .../sampling/test_uniform_random_walks.py | 82 +++++++++++-------- 1 file changed, 49 insertions(+), 33 deletions(-) diff --git a/python/cugraph/cugraph/tests/sampling/test_uniform_random_walks.py b/python/cugraph/cugraph/tests/sampling/test_uniform_random_walks.py index 9d5fef1695..7fca738575 100644 --- a/python/cugraph/cugraph/tests/sampling/test_uniform_random_walks.py +++ b/python/cugraph/cugraph/tests/sampling/test_uniform_random_walks.py @@ -20,6 +20,7 @@ import cudf import cugraph from cudf.testing import assert_series_equal +from cudf.testing.testing import assert_frame_equal from cugraph.utilities import ensure_cugraph_obj_for_nx from cugraph.testing import SMALL_DATASETS, DEFAULT_DATASETS, utils @@ -104,25 +105,48 @@ def check_uniform_random_walks(G, path_data, seeds, max_depth): total_depth = (max_depth) * len(seeds) for i in range(total_depth): - vertex_1, vertex_2 = v_paths.iloc[i], v_paths.iloc[i + 1] + if isinstance(seeds, cudf.DataFrame): + vertex_1 = v_paths.iloc[[i]].reset_index(drop=True) + vertex_2 = v_paths.iloc[[i + 1]].reset_index(drop=True) + else: + vertex_1, vertex_2 = v_paths.iloc[i], v_paths.iloc[i + 1] # Every max_depth'th vertex in 'v_paths' is a seed instead of # 'seeds[i // (max_depth + 1)]', could have just pop the first element # of the seeds array once there is a match and compare it to 'vertex_1' - if i % (max_depth + 1) == 0 and vertex_1 != seeds[i // (max_depth + 1)]: - invalid_seeds += 1 - print( - "[ERR] Invalid seed: " - " src {} != src {}".format(vertex_1, seeds[i // (max_depth + 1)]) - ) + + if i % (max_depth + 1) == 0: + if isinstance(seeds, cudf.DataFrame): + assert_frame_equal( + vertex_1.rename(columns={x:y for x,y in zip(vertex_1.columns,range(0,len(vertex_1.columns)))}), + seeds.iloc[[i // (max_depth + 1)]].reset_index(drop=True).rename(columns={x:y for x,y in zip(seeds.columns,range(0,len(seeds.columns)))}), + check_dtype=False, check_like=True) + else: + if i % (max_depth + 1) == 0 and vertex_1 != seeds[i // (max_depth + 1)]: + invalid_seeds += 1 + print( + "[ERR] Invalid seed: " + " src {} != src {}".format(vertex_1, seeds[i // (max_depth + 1)]) + ) if (i % (max_depth + 1)) != (max_depth): # These are the edges src = vertex_1 dst = vertex_2 - - if src != -1 and dst != -1: - # check for valid edge. + + # check for valid edge. + if isinstance(seeds, cudf.DataFrame): + if (-1 not in src.iloc[0].reset_index(drop=True)) and (-1 not in dst.iloc[0].reset_index(drop=True)): + edge = cudf.DataFrame() + edge["src"] = vertex_1["0_vertex_paths"] + edge["src_0"] = vertex_1["1_vertex_paths"] + edge["dst"] = vertex_2["0_vertex_paths"] + edge["dst_0"] = vertex_2["1_vertex_paths"] + + join1 = cudf.merge(df_G, edge, on=[*edge.columns]) + + assert len(cudf.merge(df_G, edge, on=[*edge.columns])) > 0 + else: edge = df_G.loc[ (df_G["src"] == (src)) & (df_G["dst"] == (dst)) ].reset_index(drop=True) @@ -148,11 +172,11 @@ def check_uniform_random_walks(G, path_data, seeds, max_depth): ) ) invalid_edge_wgt += 1 - e_wgt_idx += 1 + e_wgt_idx += 1 - if src != -1 and dst == -1: - # ensure there is no outgoing edges from 'src' - assert G.out_degree([src])["degree"].iloc[0] == 0 + if src != -1 and dst == -1: + # ensure there is no outgoing edges from 'src' + assert G.out_degree([src])["degree"].iloc[0] == 0 assert invalid_seeds == 0 assert invalid_edge == 0 @@ -189,9 +213,6 @@ def test_uniform_random_walks(graph_file, directed): input_graph, max_depth=max_depth ) - print("path_data = \n", path_data) - print("seeds = \n", seeds) - check_uniform_random_walks(input_graph, path_data, seeds, max_depth) @@ -219,11 +240,10 @@ def test_uniform_random_walks_nx(graph_file): check_uniform_random_walks(Gnx, path_data, seeds, max_depth) -#"""@pytest.mark.parametrize("graph_file", utils.DATASETS_SMALL) @pytest.mark.sg -@pytest.mark.parametrize("graph_file", [SMALL_DATASETS[0]]) -@pytest.mark.parametrize("directed", [DIRECTED_GRAPH_OPTIONS[0]]) -def test_random_walks( +@pytest.mark.parametrize("graph_file", SMALL_DATASETS) +@pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS) +def test_random_walks_multi_column_seeds( graph_file, directed ): @@ -243,16 +263,12 @@ def test_random_walks( edge_attr="weight") k = random.randint(1, 10) - #start_vertices = random.sample(G.nodes().to_numpy().tolist(), k) - start_vertices = G.select_random_vertices(num_vertices=k) - - seeds = cudf.DataFrame() - seeds['v'] = start_vertices - print("seeds = \n", seeds) - seeds['v_0'] = seeds['v'] + 1000 - - df, offsets = cugraph.uniform-random_walks(G, seeds, max_depth) - - check_uniform_random_walks(df, offsets, seeds, df_G) -#""" + seeds = G.select_random_vertices(num_vertices=k) + vertex_paths, edge_weights, vertex_path_sizes = cugraph.uniform_random_walks( + G, seeds, max_depth) + + path_data = (vertex_paths, edge_weights, vertex_path_sizes) + + check_uniform_random_walks(G, path_data, seeds, max_depth) + \ No newline at end of file