Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

programs: Add utility to convert Proton fsync log into queue profile. #1914

Draft
wants to merge 3 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
162 changes: 162 additions & 0 deletions programs/perf-script-to-profile.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
#!/usr/bin/env python3

"""
Copyright 2025 Hans-Kristian Arntzen for Valve Corporation

This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.

This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.

You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
"""

"""
Script which reads perf.data and converts it into the VKD3D_QUEUE_PROFILE format for correlation with D3D12 events.

To use:

Disable paranoid perf (or use sudo when capturing):
# echo -1 > /proc/sys/kernel/perf_event_paranoid

$ VKD3D_QUEUE_PROFILE=/tmp/profile.json VKD3D_QUEUE_PROFILE_ABSOLUTE=1 $game

# While game is running, find the PID and capture it.
# Proton uses MONOTONIC_RAW for its timebase it seems, so need that to correlate with vkd3d-proton profile.
$ perf record -F $rate -k CLOCK_MONOTONIC_RAW --pid $pid

# Add perf info to queue profile
$ python perf-script-to-profile.py --rate $rate perf.data >> /tmp/profile.json
"""

import argparse
import os
import sys
import subprocess
import re
import collections
import math

class ThreadState:
def __init__(self, comm, tid, expected_delta):
self.start_ts = 0.0
self.last_ts = 0.0
self.idle_delta = expected_delta
self.comm = comm
self.current_sym = ''
self.current_dso = ''
self.cycles = 0
self.tid = tid

def _begin_event(self, ts, cycles, sym, dso):
self.start_ts = ts
self.last_ts = ts
self.current_sym = sym
self.current_dso = dso
self.cycles = cycles

def register_event(self, ts, cycles, sym, dso):
if self.start_ts == 0.0:
# First event
self._begin_event(ts, cycles, sym, dso)
elif (ts - self.last_ts) > (1.5 * self.idle_delta):
# Detected some idling
# Assume that thread went idle in the middle of the sampling period
self.last_ts += 0.5 * self.idle_delta
self.flush_event()

#self.start_ts = self.last_ts
#self.last_ts = ts
#self.current_dso = 'idle'
#self.current_sym = 'idle'
#self.flush_event()

self._begin_event(ts, cycles, sym, dso)
elif self.current_sym != sym or self.current_dso != dso:
self.last_ts = ts
self.flush_event()
self._begin_event(ts, cycles, sym, dso)
else:
# Keep going
self.last_ts = ts
self.cycles += cycles

def flush_event(self):
if self.start_ts != self.last_ts:
print('{', '"name": "{} ({}) ({} cycles)", "ph" : "X", "tid": "{} ({})", "pid": "perf", "ts": {}, "dur": {}'.format(
self.current_dso, self.current_sym, self.cycles, self.comm, self.tid, self.start_ts * 1000000.0,
(self.last_ts - self.start_ts) * 1000000.0), '},')

def main():
parser = argparse.ArgumentParser(description = 'Script for parsing perf profiling data.')
parser.add_argument('profile', help = 'The profile binary blob (perf.data).')
parser.add_argument('--rate', type = int, default = 4000, help = 'The sampling rate used (used to detect discontinuity)')
parser.add_argument('--start', type = float, default = 0.0, help = 'Start timestamp to emit (in seconds)')
parser.add_argument('--end', type = float, default = math.inf, help = 'End timestamp to emit (in seconds)')
parser.add_argument('--filter-tids', nargs = '+', action = 'extend', type = int, help = 'Only emit provided tids')

args = parser.parse_args()
if not args.profile:
raise AssertionError('Need profile folder.')

expected_delta = 1.0 / args.rate
thread_state = {}

with subprocess.Popen(['perf', 'script', '-F', 'trace:sym,tid,time', '--ns', '-i', args.profile], stdout = subprocess.PIPE) as proc:
while True:
line = proc.stdout.readline()
if not line:
break
line = line.decode('utf-8')
if line[-1] == '\n':
line = line[0:-1]

if line[0] == '\t':
# Ignore empty first lines, since they are used for backtraces when -g is used.
continue

comm = line[0:16].strip()
line = line[16:]

elems = list(filter(None, line.split(' ')))

if len(elems) < 7:
continue

tid = int(elems[0])

if args.filter_tids and (tid not in args.filter_tids):
continue

ts = float(elems[1][0:-1])

if ts < args.start or ts > args.end:
continue

cycles = int(elems[2])
sym = elems[5]
dso = elems[6]

# Massage the DSO so it's more readable.
dso = os.path.basename(dso)
if dso[-1] == ')':
dso = dso[:-1]

if tid not in thread_state:
thread_state[tid] = ThreadState(comm, tid, expected_delta)

state = thread_state[tid]
state.register_event(ts, cycles, sym, dso)

for i in thread_state.items():
i[1].flush_event()

if __name__ == '__main__':
main()
169 changes: 169 additions & 0 deletions programs/vkd3d-fsync-log-to-profile.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,169 @@
#!/usr/bin/env python3

"""
Copyright 2024 Hans-Kristian Arntzen for Valve Corporation

This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.

This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.

You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
"""

"""
Hacky script that parses PROTON_LOG=+fsync,+microsecs and emits wait-states in VKD3D_QUEUE_PROFILE format with VKD3D_QUEUE_PROFILE_ABSOLUTE=1.
Equivalent in more plain Wine would be WINEDEBUG=+fsync,+timestamp,+pid,+tid,+threadname,+microsecs WINEFSYNC=1.
Can be used to directly append to vkd3d-proton generated JSON.
"""

import re
import sys
import argparse

def print_json_wait(tid, desc, start_ts, end_ts):
print('{', '"name": "{}", "ph" : "X", "tid": "{}", "pid": "fsync wait", "ts": {}, "dur": {}'.format(desc, tid, start_ts, end_ts - start_ts), '},')

def print_json_signal(tid, desc, ts):
print('{', '"name": "{}", "ph" : "i", "tid": "{}", "pid": "fsync signal", "ts": {}'.format(desc, tid, ts), '},')

def print_nt_read_file(tid, desc, ts):
print('{', '"name": "{}", "ph" : "i", "tid": "{}", "pid": "NtReadFile", "ts": {}'.format(desc, tid, ts), '},')

def strip_hex_prefix(x):
if x.startswith('0x'):
return x[2:]
else:
return x

def time_in_bounds(time, start_time, end_time):
if start_time == end_time:
return True
else:
return time >= start_time and time <= end_time

def parse_proton_log(file, start_time, duration_time):
end_time = start_time + duration_time
find_threadname = re.compile(r'.+:([0-9A-Fa-f]+):warn:threadname:.*renamed to L"(.+)"$')
find_wait = re.compile(r'(\d+\.\d+):.+:([0-9A-Fa-f]+):trace:fsync:__fsync_wait_objects Waiting for (all|any) of \d+ handles: ([^,]+),.*$')
find_wake = re.compile(r'(\d+\.\d+):.+:([0-9A-Fa-f]+):trace:fsync:__fsync_wait_objects Woken up by handle 0x([0-9A-Fa-f]+) .*$')
find_wake_timeout = re.compile(r'(\d+\.\d+):.+:([0-9A-Fa-f]+):trace:fsync:__fsync_wait_objects Wait timed out.$')
find_set_event = re.compile(r'(\d+\.\d+):.+:([0-9A-Fa-f]+):trace:fsync:fsync_set_event 0x([0-9A-Fa-f]+)\.$')
find_release_sem = re.compile(r'(\d+\.\d+):.+:([0-9A-Fa-f]+):trace:fsync:fsync_release_semaphore 0x([0-9A-Fa-f]+),.*$')
find_nt_read_file = re.compile(r'(\d+\.\d+):.+:([0-9A-Fa-f]+):trace:file:NtReadFile \((.+)\)$')
thread_id_to_name = {}
sleep_states = {}
signal_cause = {}

for line in file.readlines():
m = find_threadname.match(line)
if m:
thread_id_to_name[m[1]] = m[1] + ' (' + m[2] + ')'

m = find_wait.search(line)
if m:
time = int(float(m[1]) * 1e6)
tid = m[2]
any_or_all = m[3]
handle_list = m[4]
if tid in sleep_states:
raise Exception('{} has a sleep state already. line: "{}"'.format(tid, m[0]))
sleep_states[tid] = (any_or_all, [strip_hex_prefix(x) for x in handle_list.split(' ')], time)
continue

m = find_wake_timeout.search(line)
if m:
time = int(float(m[1]) * 1e6)
tid = m[2]
try:
state = sleep_states.pop(tid)
name = thread_id_to_name.get(tid, tid)
pretty_list = ', '.join(state[1])
desc = '{} {} timeout'.format(state[0], pretty_list)
if time_in_bounds(time, start_time, end_time):
print_json_wait(name, desc, state[2], time)
except KeyError as e:
raise Exception('{} timed out, but there is no wait state? line: "{}"'.format(tid, m[0]))
continue

m = find_wake.search(line)
if m:
time = int(float(m[1]) * 1e6)
tid = m[2]
handle = m[3]
try:
state = sleep_states.pop(tid)
name = thread_id_to_name.get(tid, tid)

pretty_list = ', '.join(state[1])
if len(state[1]) > 1 and state[0] == 'any':
desc = '{} {} woken by {}'.format(state[0], pretty_list, handle)
else:
desc = '{} {}'.format(state[0], pretty_list)

cause_tid = signal_cause.get(handle)
if cause_tid:
cause_tid = signal_cause.pop(handle)
desc += ' [signal by {}]'.format(thread_id_to_name.get(cause_tid, cause_tid))

if time_in_bounds(time, start_time, end_time) or time_in_bounds(state[2], start_time, end_time):
print_json_wait(name, desc, state[2], time)
except KeyError as e:
raise Exception('{} was woken up, but there is no wait state? line: "{}"'.format(tid, m[0]))
continue

m = find_set_event.search(line)
if m:
time = int(float(m[1]) * 1e6)
tid = m[2]
handle = m[3]
name = thread_id_to_name.get(tid, tid)
if time_in_bounds(time, start_time, end_time):
print_json_signal(name, 'event ' + handle, time)
signal_cause[handle] = tid
continue

m = find_release_sem.search(line)
if m:
time = int(float(m[1]) * 1e6)
tid = m[2]
handle = m[3]
name = thread_id_to_name.get(tid, tid)
if time_in_bounds(time, start_time, end_time):
print_json_signal(name, 'semaphore ' + handle, time)
signal_cause[handle] = tid
continue

m = find_nt_read_file.search(line)
if m:
time = int(float(m[1]) * 1e6)
tid = m[2]
name = thread_id_to_name.get(tid, tid)
if 'vkd3d' in name or 'wine_' in name:
continue
if time_in_bounds(time, start_time, end_time):
args = m[3].split(',')
print_nt_read_file(name, 'handle {}, {} bytes'.format(args[0], int(args[6], 16)), time)

def main():
parser = argparse.ArgumentParser(description = 'Script for dumping Proton fsync to trace.')
parser.add_argument('--start', default = 0, type = float, help = 'Filter events starting from time in seconds.')
parser.add_argument('--duration', default = 0, type = float, help = 'Duration for event filtering in seconds.')
parser.add_argument('log', help = 'Path to PROTON_LOG=+fsync,+microsecs output.')
args = parser.parse_args()

if not args.log:
raise AssertionError('Missing log.')

with open(args.log, 'r') as f:
parse_proton_log(f, int(args.start * 1e6), int(args.duration * 1e6))

if __name__ == '__main__':
main()