-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpcap_rtt_analysis.py
344 lines (270 loc) · 12.6 KB
/
pcap_rtt_analysis.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
# SPDX-License-Identifier: GPL-2.0-or-later
import pandas as pd
import scapy.all as scapy
import util
import sar_data_loading as sdl
U32_MAX = 1 << 32
U32_HALF = 1 << 31
def uint32_wraparound(a):
return int(a) % U32_MAX
def uint32_geq(a, b):
""" a >= b with u32 wraparound """
return 0 <= uint32_wraparound(int(a) - int(b)) < U32_HALF
def uint32_grt(a, b):
""" a > b with u32 wraparound """
return 0 < uint32_wraparound(int(a) - int(b)) < U32_HALF
def scapy_get_flow_label(packet):
ip = packet.getlayer("IP")
tcp = packet.getlayer("TCP")
if ip is not None and tcp is not None:
return "{}:{}+{}:{}".format(ip.src, tcp.sport, ip.dst, tcp.dport)
return ""
def scapy_get_tcp_timestamps(tcp_layer):
for opt in tcp_layer.options:
if opt[0] == "Timestamp":
return opt[1]
return -1, -1
def scapy_get_tcp_payload_length(packet):
ip = packet.getlayer("IP")
tcp = packet.getlayer("TCP")
payload = ip.len - 4 * ip.ihl - 4 * tcp.dataofs
# For sequence number analysis, SYN and FIN count as 1 byte of payload
if "S" in str(tcp.flags) or "F" in str(tcp.flags):
payload += 1
return payload
def get_reverse_flow(flow_label):
src, dst = flow_label.split("+")
return dst + "+" + src
def _find_unsync_tsval_update(pcap_file, max_packets=None, verbose=True):
flowcount = dict()
flowstate = dict()
uniq_tsval = dict()
perr = dict()
aerr = dict()
errors = list()
for i, packet in enumerate(scapy.PcapReader(pcap_file)):
if max_packets is not None and i > max_packets:
break
tcp = packet.getlayer("TCP")
if tcp is None:
continue
tsval, tsecr = scapy_get_tcp_timestamps(tcp)
if tsval < 0 or tsecr < 0:
continue
flow = scapy_get_flow_label(packet)
rev_flow = get_reverse_flow(flow)
flowcount[flow] = flowcount.get(flow, 0) + 1
# create flowstate
if flow not in flowstate:
flowstate[flow] = {"last_TSval": None, "TSval_switches": dict(),
"inflated_RTT_tsval": dict(), "seq_0": tcp.seq}
fs = flowstate[flow]
p_size = scapy_get_tcp_payload_length(packet)
eack = uint32_wraparound(tcp.seq + p_size)
# new TSval
if p_size > 0 and (fs["last_TSval"] is None or uint32_grt(tsval, fs["last_TSval"])):
fs["last_TSval"] = tsval
fs["TSval_switches"][tsval] = {"ack": tcp.ack,
"seq": tcp.seq,
"eack": eack}
uniq_tsval[flow] = uniq_tsval.get(flow, 0) + 1
# Check how TSecr match against reverse flow
if rev_flow not in flowstate:
continue
rev_fs = flowstate[rev_flow]
# Delete state for all TSval that have already been matched
for r_tsval in list(rev_fs["TSval_switches"].keys()):
if uint32_geq(tsecr, r_tsval):
del rev_fs["TSval_switches"][r_tsval]
# Check if acking old TSval (potential error)
for r_tsval, r_tsdata in rev_fs["TSval_switches"].items():
if uint32_grt(r_tsval, tsecr) and uint32_geq(tcp.ack, r_tsdata["eack"]):
if verbose:
print("Potential error: {} - {}: TSecr: {} < {} and ACK {} >= {}".format(
i, flow, tsecr, r_tsval, tcp.ack, r_tsdata["eack"]))
perr[flow] = perr.get(flow, 0) + 1
rev_fs["inflated_RTT_tsval"][r_tsval] = tcp.ack
# Check if troublesome TSecr is seen (actual error)
if tsecr in rev_fs["inflated_RTT_tsval"]:
if verbose:
print("ERROR!: {} - {}: TSecr {}".format(i, flow, tsecr))
aerr[flow] = aerr.get(flow, 0) + 1
errors.append({"packet_index": i, "flow": flow, "TSecr": tsecr, "ack": tcp.ack})
del rev_fs["inflated_RTT_tsval"][tsecr]
if verbose:
print("{} packets from {} flows processed".format(i+1, len(flowcount)))
print("{} potential and {} actual errors discovered".format(
sum([flow_perr for flow_perr in perr.values()]), len(errors)))
return {"flowcount": flowcount, "unique_TSvals": uniq_tsval,
"potential_errors": perr, "actual_errors": aerr,
"errors": errors, "flowstate": flowstate}
def _find_too_fast_retrans(pcap_file, max_packets=None, verbose=True):
flowcount = dict()
flowstate = dict()
perr = dict()
aerr = dict()
perr_weak = dict()
aerr_weak = dict()
errors = list()
weak_errors = list()
for i, packet in enumerate(scapy.PcapReader(pcap_file)):
if max_packets is not None and i > max_packets:
break
tcp = packet.getlayer("TCP")
if tcp is None:
continue
tsval, tsecr = scapy_get_tcp_timestamps(tcp)
if tsval < 0 or tsecr < 0:
continue
flow = scapy_get_flow_label(packet)
rev_flow = get_reverse_flow(flow)
flowcount[flow] = flowcount.get(flow, 0) + 1
# create flowstate
if flow not in flowstate:
flowstate[flow] = {"last_byte_sent": None, "last_TSval": None, "TSval_switches": dict(),
"partial_err_tsval": dict(), "err_tsval": dict(), "seq_0": tcp.seq}
fs = flowstate[flow]
p_size = scapy_get_tcp_payload_length(packet)
eack = uint32_wraparound(tcp.seq + p_size)
# New seq or retransmission?
if p_size > 0 and (fs["last_byte_sent"] is None or uint32_grt(tcp.seq, fs["last_byte_sent"])):
fs["last_byte_sent"] = uint32_wraparound(eack - 1)
elif p_size > 0: # Retrans
# Retrans with same TSval as current outstanding TSval (potential_error)
if tsval in fs["TSval_switches"]:
if tcp.seq == fs["TSval_switches"][tsval]["seq"]:
fs["err_tsval"][tsval] = tcp.seq
perr[rev_flow] = perr.get(rev_flow, 0) + 1
else:
fs["partial_err_tsval"][tsval] = tcp.seq
perr_weak[rev_flow] = perr_weak.get(rev_flow, 0) + 1
if verbose:
print("Potential error: {} - {}: Retrans seq: {} - {}, TSval {}".format(
i, flow, tcp.seq, eack, tsval))
# new TSval
if p_size > 0 and (fs["last_TSval"] is None or uint32_grt(tsval, fs["last_TSval"])):
fs["last_TSval"] = tsval
fs["TSval_switches"][tsval] = {"ack": tcp.ack,
"seq": tcp.seq,
"eack": eack}
# Check how TSecr match against reverse flow
if rev_flow not in flowstate:
continue
rev_fs = flowstate[rev_flow]
# Delete state for all TSval that have already been matched
for r_tsval in list(rev_fs["TSval_switches"].keys()):
if uint32_geq(tsecr, r_tsval):
del rev_fs["TSval_switches"][r_tsval]
# Check if acking retransmitted TSval (error)
if tsecr in rev_fs["err_tsval"]:
aerr[flow] = aerr.get(flow, 0) + 1
errors.append({"packet_index": i, "flow": flow, "TSecr": tsecr, "ack": tcp.ack})
del rev_fs["err_tsval"][tsecr]
if verbose:
print("ERROR!: {} - {}: TSecr {}".format(i, flow, tsecr))
elif tsecr in rev_fs["partial_err_tsval"]:
aerr_weak[flow] = aerr_weak.get(flow, 0) + 1
weak_errors.append({"packet_index": i, "flow": flow, "TSecr": tsecr, "ack": tcp.ack})
del rev_fs["partial_err_tsval"][tsecr]
if verbose:
print("ERROR (weak)!: {} - {}: TSecr {}".format(i, flow, tsecr))
if verbose:
print("{} packets from {} flows processed".format(i+1, len(flowcount)))
print("{} potential and {} actual errors discovered".format(
sum([flow_perr for flow_perr in perr.values()]), len(errors)))
print("{} potential and {} actual weak errors discovered".format(
sum([flow_perr for flow_perr in perr_weak.values()]), len(weak_errors)))
return {"flowcount": flowcount, "potential_errors": perr,
"actual_errors": aerr, "weak_potential_error": perr_weak,
"weak_actual_errors": aerr_weak, "errors": errors,
"weak_errors": weak_errors, "flowstate": flowstate}
def _calculate_rtts_from_pcap(pcap_file, max_packets=None, verbose=False):
flowstate = dict()
rtts = []
for i, packet in enumerate(scapy.PcapReader(pcap_file)):
if max_packets is not None and i > max_packets:
break
tcp = packet.getlayer("TCP")
if tcp is None:
continue
tsval, tsecr = scapy_get_tcp_timestamps(tcp)
flow = scapy_get_flow_label(packet)
rev_flow = get_reverse_flow(flow)
p_size = scapy_get_tcp_payload_length(packet)
eack = uint32_wraparound(tcp.seq + p_size)
# create flowstate
if flow not in flowstate:
flowstate[flow] = {"outstanding_packets": [],
"last_eack": tcp.seq,
"last_tsval": None,
"seq_0": tcp.seq}
fs = flowstate[flow]
# Add outgoing packets
if p_size > 0: # SYN and FIN adds 1 to the payload, so they are also included
# Detect retrans
retrans = False
if uint32_geq(tcp.seq, fs["last_eack"]):
fs["last_eack"] = eack
else:
retrans = True
# Detect TSval shift
new_tsval = False
if tsval is not None and (fs["last_tsval"] is None or uint32_grt(tsval, fs["last_tsval"])):
fs["last_tsval"] = tsval
new_tsval = True
fs["outstanding_packets"].append({"seq": tcp.seq,
"eack": eack,
"retrans": retrans,
"tsval": tsval,
"new_tsval": new_tsval,
"time": packet.time})
if verbose:
print("{}: Adding - flow: {}, seq: {}, eack: {}, tsval: {}".format(
i+1, flow, tcp.seq, eack, tsval))
# Match ACKs against previous packets in reverse direction
if rev_flow not in flowstate:
continue
rev_fs = flowstate[rev_flow]
if "A" not in str(tcp.flags):
continue
# Find packets that are acked and remove them from outstanding list
ack_pkts = []
rem_pkts = []
for prev_pkt in rev_fs["outstanding_packets"]:
if uint32_geq(tcp.ack, prev_pkt["eack"]):
ack_pkts.append(prev_pkt)
if verbose:
print("{}: Match against - ack: {}, seq: {}".format(i+1, tcp.ack, prev_pkt["seq"]))
else:
rem_pkts.append(prev_pkt)
rev_fs["outstanding_packets"] = rem_pkts
if len(ack_pkts) > 0:
match_times = [pkt["time"] for pkt in ack_pkts]
min_rtt = float(packet.time - max(match_times))
max_rtt = float(packet.time - min(match_times))
# Calculate rtt based on TCP timestamp if available
timestamp_rtt = None
for prev_pkt in ack_pkts:
if prev_pkt["new_tsval"] and prev_pkt["tsval"] == tsecr:
timestamp_rtt = float(packet.time - prev_pkt["time"])
break
rtts.append({"time": util.parse_unix_timestamp(str(packet.time)),
"flow": flow,
"min_rtt": min_rtt,
"max_rtt": max_rtt,
"timestamp_rtt": timestamp_rtt,
"rtt": min_rtt, # add min_rtt as "rtt" as well to make default interaction with others easier
"ack": tcp.ack,
"tsecr": tsecr,
"retrans": any([pkt["retrans"] for pkt in ack_pkts])})
if verbose:
print("{}: RTT - flow: {}, min_rtt: {}, max_rtt: {}".format(i+1, flow, min_rtt, max_rtt))
if len(rtts) == 0:
return None
return pd.DataFrame.from_records(rtts)
def find_unsync_tsval_update(pcap_file, **kwargs):
return sdl._run_on_xz_file(_find_unsync_tsval_update, pcap_file, **kwargs)
def find_too_fast_retrans(pcap_file, **kwargs):
return sdl._run_on_xz_file(_find_too_fast_retrans, pcap_file, **kwargs)
def calculate_rtts_from_pcap(pcap_file, **kwargs):
return sdl._run_on_xz_file(_calculate_rtts_from_pcap, pcap_file, **kwargs)