-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathRW_test_graphSAINT.py
110 lines (88 loc) · 3 KB
/
RW_test_graphSAINT.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
"""
Random Walk test on GraphSAINT Datasets
"""
import numpy as np
import pandas as pd
# Import the modules
import cugraph
import cudf
# system and other
import gc
import os
import time
import random
# MTX file reader
from scipy.io import mmread
import scipy.sparse
import networkx as nx
import dgl
from dgl.sampling import random_walk, pack_traces
import torch as th
def read_and_create(datafile):
adj_full = scipy.sparse.load_npz('./data/' + datafile + '/adj_full.npz')
offsets = cudf.Series(adj_full.indptr)
indices = cudf.Series(adj_full.indices)
weights = cudf.Series(adj_full.data)
_g = cugraph.Graph()
_g.from_cudf_adjlist(offsets, indices, weights)
_g.edges()
return _g
def run_rw(_G, _seeds, _depth):
t1 = time.time()
_rw = cugraph.random_walks(_G, _seeds, _depth+1)
# print(_rw)
t2 = time.time() - t1
return t2
def read_dgl(datafile):
adj_full = scipy.sparse.load_npz('./data/' + datafile + '/adj_full.npz')
_g = dgl.from_scipy(adj_full)
# num_nodes = _g.num_nodes()
return _g
def run_dgl_rw(_G, _seeds, _depth):
t1 = time.time()
traces, types = random_walk(_G, nodes=_seeds, length=_depth)
t2 = time.time() - t1
return t2
data = ['ppi', 'flickr', 'reddit', 'yelp', 'amazon']
for file in data:
# some parameters
num_seeds_ = [1000, 3000, 5000, 10000, 20000, 40000, 75000, 100000]
max_depth_ = np.arange(2,2**7+1,2)
# dgl RW
G_dgl = read_dgl(file)
# cugraph RW
G_cu = read_and_create(file)
# num_nodes = G.number_of_nodes()
nodes = G_cu.nodes().to_array().tolist()
for max_depth in max_depth_:
for num_seeds in num_seeds_:
print('number of seeds:', num_seeds)
print('RW length:', max_depth)
# # dgl RW
# G_dgl = read_dgl(file)
t_dgl = []
for i in range(11):
seeds = th.randint(0, G_dgl.num_nodes(), (num_seeds, ), dtype=th.int64)
t = run_dgl_rw(G_dgl, seeds, max_depth)
t_dgl.append(t)
# print('dgl RW runtime: ',t)
# print(t)
# del G_dgl
df_t_dgl = pd.DataFrame([t_dgl])
df_t_dgl.to_csv('./RW_dgl_' + file + '_' + str(num_seeds) + '_.csv', mode='a', index=False, header=None)
print(' ')
# # cugraph RW
# G_cu = read_and_create(file)
# # num_nodes = G.number_of_nodes()
# nodes = G_cu.nodes().to_array().tolist()
t_cugraph = []
for i in range(11):
# seeds = random.sample(nodes, num_seeds)
seeds = random.choices(nodes, k=num_seeds)
t = run_rw(G_cu, seeds, max_depth)
t_cugraph.append(t)
# print('cugraph RW runtime: ',t)
# print(t)
# del G
df_t_cugraph = pd.DataFrame([t_cugraph])
df_t_cugraph.to_csv('./RW_cugraph_' + file + '_' + str(num_seeds) + '_.csv', mode='a', index=False, header=None)