forked from mtodat/ms-bfs
-
Notifications
You must be signed in to change notification settings - Fork 0
/
query4.hpp
313 lines (248 loc) · 9.6 KB
/
query4.hpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
//Copyright (C) 2014 by Manuel Then, Moritz Kaufmann, Fernando Chirigati, Tuan-Anh Hoang-Vu, Kien Pham, Alfons Kemper, Huy T. Vo
//
//Code must not be used, distributed, without written consent by the authors
#pragma once
// #define STATISTICS
// #define FULL_STATISTICS
#include "include/idschedulers.hpp"
#include "include/topklist.hpp"
#include "include/log.hpp"
#include "include/bfs/base.hpp"
#include "include/bfs/batchhuge.hpp"
#include "include/scheduler.hpp"
#include "include/worker.hpp"
#include "include/bfs/naive.hpp"
#include "include/bfs/batch64.hpp"
#include "include/bfs/batch128.hpp"
#include "include/bfs/batch256.hpp"
#include "include/bfs/statistics.hpp"
#include <mutex>
#include <cmath>
#include <algorithm>
#include <random>
// #define OUTPUT_PROGRESS
using namespace std;
namespace Query4 {
static const uint32_t maxMorselTasks = 256000;
static const uint32_t minMorselSize = 1;
struct CentralityResult {
PersonId person;
uint64_t distances;
uint32_t numReachable;
double centrality;
CentralityResult(PersonId person, uint64_t distances, uint32_t numReachable, double centrality)
: person(person), distances(distances), numReachable(numReachable), centrality(centrality) {
}
bool operator==(const CentralityResult& other) const {
return person==other.person&¢rality==other.centrality;
}
friend std::ostream& operator<<(std::ostream &out, const CentralityResult& v) {
out<<v.person<<"::"<<v.distances;
return out;
}
};
typedef std::pair<PersonId, CentralityResult> CentralityEntry;
}
namespace awfy {
static const double EPSILON = 0.000000000001;
template<>
class TopKComparer<Query4::CentralityEntry> {
public:
// Returns true if first param is larger or equal
static bool compare(const Query4::CentralityEntry& a, const Query4::CentralityEntry& b)
{
auto delta = a.second.centrality - b.second.centrality;
return ((delta >0) || (fabs(delta)< EPSILON && a.second.person < b.second.person));
}
};
}
namespace Query4 {
typedef awfy::TopKComparer<CentralityEntry> CentralityCmp;
class QueryState {
public:
const uint32_t k;
const PersonSubgraph& subgraph;
const uint64_t startTime;
vector<uint8_t> personChecked;
mutex topResultsMutex;
awfy::TopKList<PersonId, CentralityResult> topResults;
QueryState(const uint32_t k, const PersonSubgraph& subgraph)
: k(k), subgraph(move(subgraph)), startTime(tschrono::now()), personChecked(subgraph.size()), topResultsMutex(),
topResults(make_pair(std::numeric_limits<PersonId>::max(),CentralityResult(std::numeric_limits<PersonId>::max(), 0, 0, 0.0))) {
topResults.init(k);
}
};
double getCloseness(uint32_t totalPersons,uint64_t totalDistances,uint32_t totalReachable);
struct ResultConcatenator {
QueryState* state;
const char*& resultOut;
#ifdef STATISTICS
BatchStatistics& statistics;
#endif
ResultConcatenator(QueryState* state, const char*& resultOut
#ifdef STATISTICS
, BatchStatistics& statistics
#endif
);
void operator()();
// ResultConcatenator(ResultConcatenator&&) = default;
// ResultConcatenator& operator=(ResultConcatenator&&) = default;
};
size_t getMaxMorselBatchSize();
template<typename BFSRunnerT>
struct MorselTask {
private:
const uint32_t rangeStart;
const uint32_t rangeEnd;
size_t batchSize;
QueryState& state;
const PersonSubgraph& subgraph;
vector<PersonId>& ids;
const uint64_t startTime;
#ifdef STATISTICS
BatchStatistics& statistics;
#endif
public:
MorselTask(QueryState& state, PersonId rangeStart, PersonId rangeEnd, const PersonSubgraph& subgraph, vector<PersonId>& ids, uint64_t startTime
#ifdef STATISTICS
, BatchStatistics& statistics
#endif
)
: rangeStart(rangeStart), rangeEnd(rangeEnd), state(state), subgraph(subgraph), ids(ids), startTime(startTime)
#ifdef STATISTICS
, statistics(statistics)
#endif
{
batchSize = BFSRunnerT::batchSize();
if(batchSize>getMaxMorselBatchSize()) {
batchSize=getMaxMorselBatchSize();
}
}
//Returns pair of processed persons and whether the bound was updated
pair<uint32_t,bool> processPersonBatch(PersonId begin, PersonId end) {
// Build batch with the desired size
vector<BatchBFSdata> batchData;
batchData.reserve(batchSize);
PersonId index=begin;
for(; batchData.size()<batchSize && index<end; index++) {
PersonId subgraphPersonId = ids[index];
assert(!state.personChecked[subgraphPersonId]);
const uint32_t componentSize = subgraph.componentSizes[subgraph.personComponents[subgraphPersonId]];
BatchBFSdata personData(subgraphPersonId, componentSize);
state.personChecked[subgraphPersonId] = true;
batchData.push_back(move(personData));
}
const uint32_t last = index-1;
// #endif
bool boundUpdated=false;
if(batchData.size()>0) {
//Run BFS
BFSRunnerT::runBatch(batchData, state.subgraph
#ifdef STATISTICS
, statistics
#endif
);
for(auto bIter=batchData.begin(); bIter!=batchData.end(); bIter++) {
const auto closeness = getCloseness(bIter->componentSize, bIter->totalDistances, bIter->totalReachable);
const PersonId externalPersonId = bIter->person;
CentralityResult resultCentrality(externalPersonId, bIter->totalDistances, bIter->totalReachable, closeness);
// Check if person qualifies as new top k value
if(CentralityCmp::compare(make_pair(resultCentrality.person, resultCentrality), state.topResults.getBound())) {
// Add improved value to top k list
lock_guard<mutex> lock(state.topResultsMutex);
state.topResults.insert(resultCentrality.person, resultCentrality);
}
}
}
return make_pair(last-begin+1, boundUpdated);
}
void operator()() {
if(rangeEnd<rangeStart) {
FATAL_ERROR("[MorselTask] Fail! Invalid task range: "<<rangeStart<<"-"<<rangeEnd);
}
#ifdef OUTPUT_PROGRESS
static std::mutex m;
{
std::lock_guard<std::mutex> lock(m);
std::cout<<"#"<<rangeStart<<" @ "<<tschrono::now() - startTime<<std::endl;
}
#endif
PersonId person=rangeStart;
while(person<rangeEnd) {
const auto batchResult = processPersonBatch(person, rangeEnd);
person += batchResult.first;
}
}
};
}
std::vector<pair<Query4::PersonId,Query4::PersonId>> generateTasks(const uint64_t maxBfs, const Query4::PersonId graphSize, const size_t batchSize);
template<typename BFSRunnerT>
std::string runBFS(const uint32_t k, const Query4::PersonSubgraph& subgraph, Workers& workers, const uint64_t maxBfs, uint64_t& runtimeOut
#ifdef STATISTICS
, Query4::BatchStatistics& statistics
#endif
) {
// #ifdef STATISTICS
// numTouchedPersonInDistance.clear();
// numTouchedPersonInDistance.resize(subgraph.size());
// for(uint32_t a=1; a<subgraph.size(); a++) {
// numTouchedPersonInDistance[a].resize(Query4::maxStatisticsDistance);
// }
// #endif
// Initialize query state
Query4::QueryState* queryState = new Query4::QueryState(k, subgraph);
// Determine bfs order
std::vector<Query4::PersonId> ids(subgraph.size());
for (unsigned i = 0; i < subgraph.size(); ++i) {
ids[i] = i;
}
// Deterministic random shuffeling
RandomNodeOrdering::order(ids, BFSRunnerT::batchSize(), subgraph);
// Do final ordering on specified subset
LOG_PRINT("[Query4] Starting sort "<<tschrono::now());
// ComponentOrdering::order(ids, maxBfs, BFSRunnerT::batchSize(), subgraph);
DegreeOrdering::order(ids, maxBfs, BFSRunnerT::batchSize(), subgraph);
// TwoHopDegreeOrdering::order(ids, maxBfs, BFSRunnerT::batchSize(), subgraph);
//AdvancedNeighborOrdering::order(ids, BFSRunnerT::batchSize(), subgraph);
//NeighourDegreeOrdering::order(ids, BFSRunnerT::batchSize(), subgraph);
LOG_PRINT("[Query4] Finished sort "<<tschrono::now());
const auto start = tschrono::now();
// Create bfs tasks from specified subset
TaskGroup tasks;
uint64_t numTraversedEdges = 0;
auto ranges = generateTasks(maxBfs, subgraph.size(), BFSRunnerT::batchSize());
for(auto& range : ranges) {
Query4::MorselTask<BFSRunnerT> bfsTask(*queryState, range.first, range.second, subgraph, ids, start
#ifdef STATISTICS
, statistics
#endif
);
tasks.schedule(LambdaRunner::createLambdaTask(bfsTask));
for(Query4::PersonId i=range.first; i<range.second; i++) {
numTraversedEdges += subgraph.componentEdgeCount[subgraph.personComponents[ids[i]]];
}
}
TraceStats<BFSRunnerT::batchSize()>& stats = TraceStats<BFSRunnerT::batchSize()>::getStats();
stats.setNumTraversedEdges(numTraversedEdges);
//std::cout << "# TaskStats "<<maxBfs<<", "<<ranges.size()<< std::endl;
const char* resultChar;
tasks.join(LambdaRunner::createLambdaTask(move(Query4::ResultConcatenator(queryState, resultChar
#ifdef STATISTICS
, statistics
#endif
))));
LOG_PRINT("[Query4] Scheduling "<< ranges.size() << " tasks.");
Scheduler scheduler;
scheduler.schedule(tasks.close());
scheduler.setCloseOnEmpty();
workers.assist(scheduler);
// Always run one executor on the main thread
Executor executor(scheduler,0, false);
executor.run();
runtimeOut = tschrono::now() - start;
scheduler.waitAllFinished();
LOG_PRINT("[Query4] All tasks finished");
std::string resultStr = std::string(resultChar);
delete[] resultChar;
return std::string(resultStr);
}