Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding experimental Tracy API for TLS-less event recording. #19625

Merged
merged 1 commit into from
Jan 8, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
237 changes: 237 additions & 0 deletions runtime/src/iree/base/tracing/tracy.cc
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,13 @@
#include "TracyClient.cpp"
#endif // IREE_TRACING_FEATURES

#if defined(TRACY_ENABLE) && IREE_TRACING_EXPERIMENTAL_CONTEXT_API
// HACK: tracy doesn't let us at this but we need it in order to create new
// queue contexts. It's an implementation detail we have to take a dependency on
// because tracy does not have an API for what we're doing (yet).
extern tracy::moodycamel::ConcurrentQueue<tracy::QueueItem> tracy::s_queue;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

macos runtime python bindings build failure last night with this: https://github.com/iree-org/iree/actions/runs/12683971137/job/35351915575#step:10:2228

  [189/482] Building CXX object runtime/src/iree/base/tracing/CMakeFiles/iree_base_tracing_provider.objects.dir/tracy.cc.o
  FAILED: runtime/src/iree/base/tracing/CMakeFiles/iree_base_tracing_provider.objects.dir/tracy.cc.o
  /Applications/Xcode_15.4.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/c++ -DIREE_TRACING_MODE=2 -DIREE_TRACING_PROVIDER_H=\"iree/base/tracing/tracy.h\" -DTRACY_ENABLE -I/Users/runner/work/iree/iree/c -I/Users/runner/work/iree/iree/c/runtime/build/b/t -I/Users/runner/work/iree/iree/c/runtime/src -I/Users/runner/work/iree/iree/c/runtime/build/b/t/runtime/src -isystem /Users/runner/work/iree/iree/c/third_party/tracy/public -O3 -DNDEBUG -std=gnu++17 -arch arm64 -isysroot /Applications/Xcode_15.4.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX14.5.sdk -mmacosx-version-min=13.0 -fPIC -fvisibility=hidden -fno-rtti -fno-exceptions -Werror -Wall -Wno-error=deprecated-declarations -Wno-ambiguous-member-template -Wno-char-subscripts -Wno-extern-c-compat -Wno-gnu-alignof-expression -Wno-gnu-variable-sized-type-not-at-end -Wno-ignored-optimization-argument -Wno-invalid-offsetof -Wno-invalid-source-encoding -Wno-mismatched-tags -Wno-pointer-sign -Wno-reserved-user-defined-literal -Wno-return-type-c-linkage -Wno-self-assign-overloaded -Wno-sign-compare -Wno-signed-unsigned-wchar -Wno-strict-overflow -Wno-trigraphs -Wno-unknown-pragmas -Wno-unknown-warning-option -Wno-unused-command-line-argument -Wno-unused-const-variable -Wno-unused-function -Wno-unused-local-typedef -Wno-unused-private-field -Wno-user-defined-warnings -Wno-missing-braces -Wctad-maybe-unsupported -Wfloat-overflow-conversion -Wfloat-zero-conversion -Wfor-loop-analysis -Wformat-security -Wgnu-redeclared-enum -Wimplicit-fallthrough -Winfinite-recursion -Wliteral-conversion -Wnon-virtual-dtor -Woverloaded-virtual -Wpointer-arith -Wself-assign -Wstring-conversion -Wtautological-overlap-compare -Wthread-safety -Wthread-safety-beta -Wunused-comparison -Wvla -fno-lax-vector-conversions -fmacro-prefix-map=/Users/runner/work/iree/iree/c=c -flto=full -MD -MT runtime/src/iree/base/tracing/CMakeFiles/iree_base_tracing_provider.objects.dir/tracy.cc.o -MF runtime/src/iree/base/tracing/CMakeFiles/iree_base_tracing_provider.objects.dir/tracy.cc.o.d -o runtime/src/iree/base/tracing/CMakeFiles/iree_base_tracing_provider.objects.dir/tracy.cc.o -c /Users/runner/work/iree/iree/c/runtime/src/iree/base/tracing/tracy.cc
  /Users/runner/work/iree/iree/c/runtime/src/iree/base/tracing/tracy.cc:20:68: error: no member named 's_queue' in namespace 'tracy'
  extern tracy::moodycamel::ConcurrentQueue<tracy::QueueItem> tracy::s_queue;
                                                              ~~~~~~~^

presubmit doesn't enable tracing, release python packages do... hmm...

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fix in #19653, verified with #19668.

#endif // TRACY_ENABLE && IREE_TRACING_EXPERIMENTAL_CONTEXT_API

#ifdef __cplusplus
extern "C" {
#endif // __cplusplus
Expand Down Expand Up @@ -438,6 +445,236 @@ void iree_tracing_gpu_zone_notify(uint8_t context_id, uint16_t query_id,
void* iree_tracing_obscure_ptr(void* ptr) { return ptr; }
#endif // IREE_TRACING_FEATURE_ALLOCATION_TRACKING

//===----------------------------------------------------------------------===//
// Experimental Tracing Interop API
//===----------------------------------------------------------------------===//

#if IREE_TRACING_EXPERIMENTAL_CONTEXT_API

struct iree_tracing_context_t {
static std::atomic<uint32_t> next_tracing_thread_id;
tracy::moodycamel::ProducerToken token_detail;
tracy::ProducerWrapper token;
uint32_t thread_id = 0;
iree_tracing_context_t()
: token_detail(tracy::s_queue),
token({tracy::s_queue.get_explicit_producer(token_detail)}),
thread_id(iree_tracing_context_t::next_tracing_thread_id++) {
token.ptr->threadId = thread_id;
}
};

// static
std::atomic<uint32_t> iree_tracing_context_t::next_tracing_thread_id{
0x80000000u};

#define IREE_TRACING_CONTEXT_BEGIN_WRITE(context, queue_type) \
tracy::moodycamel::ConcurrentQueueDefaultTraits::index_t __magic; \
tracy::moodycamel::ConcurrentQueue<tracy::QueueItem>::ExplicitProducer* \
__token = (context)->token.ptr; \
auto& __tail = __token->get_tail_index(); \
auto item = __token->enqueue_begin(__magic); \
tracy::MemWrite(&item->hdr.type, (queue_type));

#define IREE_TRACING_CONTEXT_END_WRITE(context) \
__tail.store(__magic + 1, std::memory_order_release);

iree_tracing_context_t* iree_tracing_context_allocate(
const char* name, iree_host_size_t name_length) {
iree_tracing_context_t* context = new iree_tracing_context_t();

// TODO(benvanik): upstream a tracy::Profiler::SetThreadNameWithHint that
// only updates the GetThreadNameData() linked list with a new entry. Today
// there's no way to set the thread name explicitly.

return context;
}

void iree_tracing_context_free(iree_tracing_context_t* context) {
if (context) delete context;
}

void iree_tracing_context_calibrate_executor(
iree_tracing_context_t* context, iree_tracing_executor_id_t executor_id,
int64_t cpu_delta, uint64_t host_timestamp, uint64_t executor_timestamp) {
IREE_TRACING_CONTEXT_BEGIN_WRITE(context, tracy::QueueType::GpuCalibration);
tracy::MemWrite(&item->gpuCalibration.gpuTime, executor_timestamp);
tracy::MemWrite(&item->gpuCalibration.cpuTime, host_timestamp);
tracy::MemWrite(&item->gpuCalibration.cpuDelta, cpu_delta);
tracy::MemWrite(&item->gpuCalibration.context, executor_id);
IREE_TRACING_CONTEXT_END_WRITE(context);
}

void iree_tracing_context_zone_begin(iree_tracing_context_t* context,
uint64_t timestamp,
const iree_tracing_location_t* src_loc) {
IREE_TRACING_CONTEXT_BEGIN_WRITE(context, tracy::QueueType::ZoneBegin);
tracy::MemWrite(&item->zoneBegin.time, timestamp);
tracy::MemWrite(&item->zoneBegin.srcloc, reinterpret_cast<uint64_t>(src_loc));
IREE_TRACING_CONTEXT_END_WRITE(context);
}

void iree_tracing_context_zone_end(iree_tracing_context_t* context,
uint64_t timestamp) {
IREE_TRACING_CONTEXT_BEGIN_WRITE(context, tracy::QueueType::ZoneEnd);
tracy::MemWrite(&item->zoneEnd.time, timestamp);
IREE_TRACING_CONTEXT_END_WRITE(context);
}

void iree_tracing_context_zone_value_i64(iree_tracing_context_t* context,
uint64_t value) {
IREE_TRACING_CONTEXT_BEGIN_WRITE(context, tracy::QueueType::ZoneValue);
tracy::MemWrite(&item->zoneValue.value, value);
IREE_TRACING_CONTEXT_END_WRITE(context);
}

void iree_tracing_context_zone_value_text_literal(
iree_tracing_context_t* context, const char* value) {
// NOTE: no literal tracing support, have to use the slow path.
iree_tracing_context_zone_value_text_dynamic(context, value, strlen(value));
}

void iree_tracing_context_zone_value_text_dynamic(
iree_tracing_context_t* context, const char* value,
iree_host_size_t value_length) {
auto ptr = (char*)tracy::tracy_malloc(value_length);
memcpy(ptr, value, value_length);
IREE_TRACING_CONTEXT_BEGIN_WRITE(context, tracy::QueueType::ZoneText);
tracy::MemWrite(&item->zoneTextFat.text, (uint64_t)ptr);
tracy::MemWrite(&item->zoneTextFat.size, (uint16_t)value_length);
IREE_TRACING_CONTEXT_END_WRITE(context);
}

// TODO(benvanik): figure out why serial recording works with GPU zones and
// thread-local recording doesn't (sometimes?). May be timing related.
#define IREE_TRACING_CONTEXT_SERIAL_FALLBACK 1

void iree_tracing_context_execution_zone_begin(
iree_tracing_context_t* context, uint64_t timestamp,
const iree_tracing_location_t* src_loc,
iree_tracing_executor_id_t executor_id, iree_tracing_query_id_t query_id) {
#if IREE_TRACING_CONTEXT_SERIAL_FALLBACK
auto* item = tracy::Profiler::QueueSerial();
tracy::MemWrite(&item->hdr.type, tracy::QueueType::GpuZoneBeginSerial);
tracy::MemWrite(&item->gpuZoneBegin.cpuTime, timestamp);
tracy::MemWrite(&item->gpuZoneBegin.srcloc, src_loc);
tracy::MemWrite(&item->gpuZoneBegin.thread, context->thread_id);
tracy::MemWrite(&item->gpuZoneBegin.queryId, query_id);
tracy::MemWrite(&item->gpuZoneBegin.context, executor_id);
tracy::Profiler::QueueSerialFinish();
#else
IREE_TRACING_CONTEXT_BEGIN_WRITE(context, tracy::QueueType::GpuZoneBegin);
tracy::MemWrite(&item->gpuZoneBegin.cpuTime, timestamp);
tracy::MemWrite(&item->gpuZoneBegin.thread, context->thread_id);
tracy::MemWrite(&item->gpuZoneBegin.queryId, query_id);
tracy::MemWrite(&item->gpuZoneBegin.context, executor_id);
tracy::MemWrite(&item->gpuZoneBegin.srcloc, src_loc);
IREE_TRACING_CONTEXT_END_WRITE(context);
#endif // IREE_TRACING_CONTEXT_SERIAL_FALLBACK
}

void iree_tracing_context_execution_zone_end(
iree_tracing_context_t* context, uint64_t timestamp,
iree_tracing_executor_id_t executor_id, iree_tracing_query_id_t query_id) {
#if IREE_TRACING_CONTEXT_SERIAL_FALLBACK
auto* item = tracy::Profiler::QueueSerial();
tracy::MemWrite(&item->hdr.type, tracy::QueueType::GpuZoneEndSerial);
tracy::MemWrite(&item->gpuZoneEnd.cpuTime, timestamp);
tracy::MemWrite(&item->gpuZoneEnd.thread, context->thread_id);
tracy::MemWrite(&item->gpuZoneEnd.queryId, query_id);
tracy::MemWrite(&item->gpuZoneEnd.context, executor_id);
tracy::Profiler::QueueSerialFinish();
#else
IREE_TRACING_CONTEXT_BEGIN_WRITE(context, tracy::QueueType::GpuZoneEnd);
tracy::MemWrite(&item->gpuZoneEnd.cpuTime, timestamp);
tracy::MemWrite(&item->gpuZoneEnd.thread, context->thread_id);
tracy::MemWrite(&item->gpuZoneEnd.queryId, query_id);
tracy::MemWrite(&item->gpuZoneEnd.context, executor_id);
IREE_TRACING_CONTEXT_END_WRITE(context);
#endif // IREE_TRACING_CONTEXT_SERIAL_FALLBACK
}

void iree_tracing_context_execution_zone_notify(
iree_tracing_context_t* context, iree_tracing_executor_id_t executor_id,
iree_tracing_query_id_t query_id, uint64_t query_timestamp) {
#if IREE_TRACING_CONTEXT_SERIAL_FALLBACK
iree_tracing_gpu_zone_notify(executor_id, query_id, query_timestamp);
auto* item = tracy::Profiler::QueueSerial();
tracy::MemWrite(&item->hdr.type, tracy::QueueType::GpuTime);
tracy::MemWrite(&item->gpuTime.gpuTime, query_timestamp);
tracy::MemWrite(&item->gpuTime.queryId, query_id);
tracy::MemWrite(&item->gpuTime.context, executor_id);
tracy::Profiler::QueueSerialFinish();
#else
IREE_TRACING_CONTEXT_BEGIN_WRITE(context, tracy::QueueType::GpuTime);
tracy::MemWrite(&item->gpuTime.gpuTime, query_timestamp);
tracy::MemWrite(&item->gpuTime.queryId, query_id);
tracy::MemWrite(&item->gpuTime.context, executor_id);
IREE_TRACING_CONTEXT_END_WRITE(context);
#endif // IREE_TRACING_CONTEXT_SERIAL_FALLBACK
}

void iree_tracing_context_memory_alloc(iree_tracing_context_t* context,
uint64_t timestamp, const char* pool,
uint64_t ptr, uint64_t size) {
// TODO(benvanik): add a thread override to MemAllocNamed - it does shady
// things with m_memNamePayload that we can't easily replicate outside of the
// tracy implementation.
}

void iree_tracing_context_memory_free(iree_tracing_context_t* context,
uint64_t timestamp, const char* pool,
uint64_t ptr) {
// TODO(benvanik): add a thread override to MemFreeNamed- it does shady
// things with m_memNamePayload that we can't easily replicate outside of the
// tracy implementation.
}

void iree_tracing_context_message_literal(iree_tracing_context_t* context,
uint64_t timestamp,
const char* value) {
IREE_TRACING_CONTEXT_BEGIN_WRITE(context, tracy::QueueType::MessageLiteral);
tracy::MemWrite(&item->messageLiteral.time, timestamp);
tracy::MemWrite(&item->messageLiteral.text, (uint64_t)value);
IREE_TRACING_CONTEXT_END_WRITE(context);
}

void iree_tracing_context_message_dynamic(iree_tracing_context_t* context,
uint64_t timestamp, const char* value,
iree_host_size_t value_length) {
auto ptr = (char*)tracy::tracy_malloc(value_length);
memcpy(ptr, value, value_length);
IREE_TRACING_CONTEXT_BEGIN_WRITE(context, tracy::QueueType::Message);
tracy::MemWrite(&item->messageFat.time, timestamp);
tracy::MemWrite(&item->messageFat.text, (uint64_t)ptr);
tracy::MemWrite(&item->messageFat.size, (uint16_t)value_length);
IREE_TRACING_CONTEXT_END_WRITE(context);
}

void iree_tracing_context_plot_config(iree_tracing_context_t* context,
const char* name_literal, uint8_t type,
bool step, bool fill, uint32_t color) {
IREE_TRACING_CONTEXT_BEGIN_WRITE(context, tracy::QueueType::PlotConfig);
tracy::MemWrite(&item->plotConfig.name, (uint64_t)name_literal);
tracy::MemWrite(&item->plotConfig.type, (uint8_t)type);
tracy::MemWrite(&item->plotConfig.step, (uint8_t)step);
tracy::MemWrite(&item->plotConfig.fill, (uint8_t)fill);
tracy::MemWrite(&item->plotConfig.color, color);
IREE_TRACING_CONTEXT_END_WRITE(context);
}

void iree_tracing_context_plot_value_i64(iree_tracing_context_t* context,
uint64_t timestamp,
const char* plot_name, int64_t value) {
IREE_TRACING_CONTEXT_BEGIN_WRITE(context, tracy::QueueType::PlotDataInt);
tracy::MemWrite(&item->plotDataInt.name, (uint64_t)plot_name);
tracy::MemWrite(&item->plotDataInt.time, timestamp);
tracy::MemWrite(&item->plotDataInt.val, value);
IREE_TRACING_CONTEXT_END_WRITE(context);
}

#endif // IREE_TRACING_EXPERIMENTAL_CONTEXT_API

#endif // IREE_TRACING_FEATURES

#ifdef __cplusplus
Expand Down
Loading
Loading