Skip to content

Commit

Permalink
EDGEML-8777 - [Linux NPU Driver]: Support firmware log buffer
Browse files Browse the repository at this point in the history
[Why]
  Log buffer support is required to enhance debugging support on NPU stack

[How]
  1. Allocate log buffer and share with firmware via 'start_event_trace' message
     Config start_event_trace request param, send via mgmt channel.
     Handle the send buffer resp.
  2. Receive notifications about log buffer fullness, via interrupt from firmware.
     Handle channel interrupt, process further buffer data and prepare new buffer.
  3. Add stop_event_trace_send api and it's handle to stop logging when aie2 shutdown.

Signed-off-by: Vinit Shukla <[email protected]>
  • Loading branch information
VinitAmd committed Dec 23, 2024
1 parent 929e8ab commit add9c42
Show file tree
Hide file tree
Showing 7 changed files with 231 additions and 2 deletions.
1 change: 1 addition & 0 deletions src/driver/amdxdna/Kbuild
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ amdxdna-y := \
aie2_error.o \
aie2_debugfs.o \
aie2_message.o \
aie2_event_trace.o \
aie2_pm.o \
aie2_pci.o \
npu1_regs.o \
Expand Down
87 changes: 87 additions & 0 deletions src/driver/amdxdna/aie2_event_trace.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2025, Advanced Micro Devices, Inc.
*/

#include <linux/kthread.h>
#include <linux/kernel.h>
#include <linux/dma-mapping.h>
#include <drm/drm_cache.h>
#include "aie2_msg_priv.h"
#include "aie2_pci.h"

struct event_trace_req_buf
{
struct amdxdna_dev_hdl *ndev;
struct start_event_trace_req trace_req;
u8 *buf;
};

int aie2_stop_event_trace_send(struct amdxdna_dev_hdl *ndev)
{
struct amdxdna_dev *xdna = ndev->xdna;
drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
return aie2_stop_event_trace(ndev);
}

int aie2_event_trace_alloc(struct amdxdna_dev_hdl *ndev)
{
int ret;
struct amdxdna_dev *xdna = ndev->xdna;
struct event_trace_req_buf *req_buf;

req_buf = kzalloc(sizeof(struct event_trace_req_buf), GFP_KERNEL);
if (!req_buf)
return -ENOMEM;

req_buf->buf = dma_alloc_noncoherent(xdna->ddev.dev, TRACE_EVENT_BUFFER_SIZE, (dma_addr_t *)&req_buf->trace_req.dram_buffer_address,
DMA_FROM_DEVICE, GFP_KERNEL);
if (!req_buf->buf) {
ret = -ENOMEM;
goto free_event_trace_req_buf;
}
req_buf->trace_req.dram_buffer_size = TRACE_EVENT_BUFFER_SIZE;
ndev->event_trace_req = req_buf;

XDNA_INFO(xdna, "trace event buf size %d, dram_buffer_address 0x%llx",
req_buf->trace_req.dram_buffer_size, req_buf->trace_req.dram_buffer_address);
return 0;

free_event_trace_req_buf:
kfree(req_buf);
return ret;
}

void aie2_event_trace_free(struct amdxdna_dev_hdl *ndev)
{
struct amdxdna_dev *xdna = ndev->xdna;
struct event_trace_req_buf *req_buf = ndev->event_trace_req;

if (!req_buf)
return;

dma_free_noncoherent(xdna->ddev.dev, req_buf->trace_req.dram_buffer_size, req_buf->buf,
(dma_addr_t)req_buf->trace_req.dram_buffer_address, DMA_FROM_DEVICE);
kfree(req_buf);
}

int aie2_start_event_trace_send(struct amdxdna_dev_hdl *ndev)
{
int ret;
struct event_trace_req_buf *trace_req_buf = NULL;
struct amdxdna_dev *xdna = ndev->xdna;

ret = aie2_event_trace_alloc(ndev);

if (!ret) {
trace_req_buf = ndev->event_trace_req;
drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
drm_clflush_virt_range(trace_req_buf->buf, trace_req_buf->trace_req.dram_buffer_size);
ret = aie2_start_event_trace(ndev, trace_req_buf->trace_req.dram_buffer_address,
trace_req_buf->trace_req.dram_buffer_size, &trace_req_buf->trace_req);
} else {
XDNA_ERR(xdna, "Failed to allocate event trace buffer");
}

return ret;
}
43 changes: 42 additions & 1 deletion src/driver/amdxdna/aie2_message.c
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@
#define DECLARE_AIE2_MSG(name, op) \
DECLARE_XDNA_MSG_COMMON(name, op, MAX_AIE2_STATUS_CODE)

#define DECLARE_AIE2_STOP_EVENT_TRACE_MSG(name, op) \
DECLARE_XDNA_STOP_EVENT_TRACE_MSG(name, op, MAX_AIE2_STATUS_CODE)

#define aie2_send_mgmt_msg_wait(ndev, msg) \
aie2_send_mgmt_msg_wait_offset(ndev, msg, 0)

Expand Down Expand Up @@ -64,8 +67,20 @@ aie2_send_mgmt_msg_wait_offset(struct amdxdna_dev_hdl *ndev,

if (!ret && hdl->data[offset] != AIE2_STATUS_SUCCESS) {
XDNA_ERR(xdna, "command opcode 0x%x failed, status 0x%x",
msg->opcode, *hdl->data);
msg->opcode, *hdl->data);
ret = -EINVAL;
} else if (ret) {
XDNA_ERR(xdna, "Send message failed, ret %d", ret);
} else {
XDNA_INFO(xdna, "Command opcode 0x%x success", msg->opcode);
if (msg->opcode == MSG_OP_START_EVENT_TRACE) {
struct start_event_trace_resp *resp = (struct start_event_trace_resp *)hdl->data;
XDNA_INFO(xdna, "Event trace started, status %d msi %u ts 0x%lld",
resp->status, resp->msi_idx, resp->current_timestamp);
} else if (msg->opcode == MSG_OP_STOP_EVENT_TRACE) {
struct stop_event_trace_resp *resp = (struct stop_event_trace_resp *)hdl->data;
XDNA_INFO(xdna, "Stop event trace status %d \n", resp->status);
}
}

return ret;
Expand Down Expand Up @@ -255,6 +270,32 @@ int aie2_query_firmware_version(struct amdxdna_dev_hdl *ndev,
return 0;
}

int aie2_start_event_trace(struct amdxdna_dev_hdl *ndev, dma_addr_t addr,
u32 size, void *handle)
{
DECLARE_AIE2_MSG(start_event_trace, MSG_OP_START_EVENT_TRACE);
struct amdxdna_dev *xdna = ndev->xdna;
int ret;

req.dram_buffer_address = addr;
req.dram_buffer_size = size;
req.event_trace_dest = EVENT_TRACE_DEST_DRAM;
req.event_trace_categories = 0xFFFFFFFF;
req.event_trace_timestamp = EVENT_TRACE_TIMESTAMP_CPU_CCOUNT;

ret = aie2_send_mgmt_msg_wait(ndev, &msg);
XDNA_INFO(xdna, "vs- trace buf addr 0x%llx size 0x%x ret: %d", addr, size, ret);
return ret;
}

int aie2_stop_event_trace(struct amdxdna_dev_hdl *ndev)
{
DECLARE_AIE2_STOP_EVENT_TRACE_MSG(stop_event_trace, MSG_OP_STOP_EVENT_TRACE);

printk(KERN_INFO "send stop trace msg\n");
return aie2_send_mgmt_msg_wait(ndev, &msg);
}

int aie2_create_context(struct amdxdna_dev_hdl *ndev, struct amdxdna_hwctx *hwctx)
{
DECLARE_AIE2_MSG(create_ctx, MSG_OP_CREATE_CONTEXT);
Expand Down
53 changes: 53 additions & 0 deletions src/driver/amdxdna/aie2_msg_priv.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@ enum aie2_msg_opcode {
MSG_OP_SET_RUNTIME_CONFIG = 0x10A,
MSG_OP_GET_RUNTIME_CONFIG = 0x10B,
MSG_OP_REGISTER_ASYNC_EVENT_MSG = 0x10C,
MSG_OP_START_EVENT_TRACE = 0x10F,
MSG_OP_STOP_EVENT_TRACE = 0x110,
MSG_OP_MAX_DRV_OPCODE,
MSG_OP_GET_PROTOCOL_VERSION = 0x301,
MSG_OP_MAX_OPCODE
Expand Down Expand Up @@ -387,6 +389,57 @@ struct async_event_msg_resp {
enum async_event_type type;
} __packed;

/***************Start of event tracing data struct ****************/
#define TRACE_EVENT_BUFFER_SIZE 0x100000
#define EVENT_TRACE_CATEGORY_OVERVIEW (0x00000001u)
#define EVENT_TRACE_CATEGORY_TCT (0x00000002u)
#define EVENT_TRACE_CATEGORY_LATENCY (0x00000004u)
#define EVENT_TRACE_CATEGORY_LATENCY2 (0x00000008u)

enum event_trace_destination {
EVENT_TRACE_DEST_DEBUG_BUS,
EVENT_TRACE_DEST_DRAM,
EVENT_TRACE_DEST_COUNT
};

enum event_trace_timestamp {
EVENT_TRACE_TIMESTAMP_FW_CHRONO,
EVENT_TRACE_TIMESTAMP_CPU_CCOUNT,
EVENT_TRACE_TIMESTAMP_COUNT
};

struct start_event_trace_req {
uint32_t event_trace_categories;
enum event_trace_destination event_trace_dest;
enum event_trace_timestamp event_trace_timestamp;
//DRAM log buffer address and size
uint64_t dram_buffer_address;
uint32_t dram_buffer_size;
} __packed;

struct start_event_trace_resp {
enum aie2_msg_status status;
uint32_t msi_idx;
uint64_t current_timestamp;
} __packed;

struct stop_event_trace_req {
uint32_t stop;
} __packed;

struct stop_event_trace_resp {
enum aie2_msg_status status;
} __packed;

struct set_event_trace_categories_req {
uint32_t event_trace_categories;
};

struct set_event_trace_categories_resp {
enum aie2_msg_status status;
};
/***************End of event tracing data structs ****************/

#define MAX_CHAIN_CMDBUF_SIZE 0x1000
#define slot_cf_has_space(offset, payload_size) \
(MAX_CHAIN_CMDBUF_SIZE - ((offset) + (payload_size)) > \
Expand Down
22 changes: 21 additions & 1 deletion src/driver/amdxdna/aie2_pci.c
Original file line number Diff line number Diff line change
Expand Up @@ -314,8 +314,15 @@ static int aie2_mgmt_fw_query(struct amdxdna_dev_hdl *ndev)
static void aie2_mgmt_fw_fini(struct amdxdna_dev_hdl *ndev)
{
if (aie2_suspend_fw(ndev))
{
XDNA_ERR(ndev->xdna, "suspend_fw failed");
XDNA_DBG(ndev->xdna, "npu firmware suspended");
} else {
XDNA_DBG(ndev->xdna, "npu firmware suspended");
int ret = aie2_stop_event_trace_send(ndev);
if (ret) {
XDNA_ERR(ndev->xdna, "send stop event trace failed, ret %d", ret);
}
}
}

static int aie2_xrs_set_dft_dpm_level(struct drm_device *ddev, u32 dpm_level)
Expand Down Expand Up @@ -615,6 +622,17 @@ static int aie2_init(struct amdxdna_dev *xdna)
goto async_event_free;
}

ret = aie2_start_event_trace_send(ndev);
if(ret) {
XDNA_ERR(xdna, "Send start event trace failed, ret %d", ret);
goto event_trace_free;
}

ret = aie2_stop_event_trace(ndev);
if(ret) {
XDNA_ERR(xdna, "Send stop event trace failed, ret %d", ret);
}

/* Just to make sure firmware handled async events */
ret = aie2_query_firmware_version(ndev, &ndev->xdna->fw_ver);
if (ret) {
Expand All @@ -627,6 +645,8 @@ static int aie2_init(struct amdxdna_dev *xdna)

async_event_free:
aie2_error_async_events_free(ndev);
event_trace_free:
aie2_event_trace_free(ndev);
stop_hw:
aie2_hw_stop(xdna);
disable_sva:
Expand Down
10 changes: 10 additions & 0 deletions src/driver/amdxdna/aie2_pci.h
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,7 @@ struct amdxdna_dev_hdl {
struct mailbox *mbox;
struct mailbox_channel *mgmt_chann;
struct async_events *async_events;
struct event_trace_req_buf *event_trace_req;

u32 dev_status;
u32 hwctx_num;
Expand Down Expand Up @@ -340,6 +341,15 @@ void aie2_error_async_events_free(struct amdxdna_dev_hdl *ndev);
int aie2_error_async_events_send(struct amdxdna_dev_hdl *ndev);
int aie2_error_async_msg_thread(void *data);

/* aie2_event.c */
int aie2_event_trace_alloc(struct amdxdna_dev_hdl *ndev);
void aie2_event_trace_free(struct amdxdna_dev_hdl *ndev);
int aie2_stop_event_trace_send(struct amdxdna_dev_hdl *ndev);
int aie2_start_event_trace_send(struct amdxdna_dev_hdl *ndev);
int aie2_start_event_trace(struct amdxdna_dev_hdl *ndev, dma_addr_t addr,
u32 size, void *handle);
int aie2_stop_event_trace(struct amdxdna_dev_hdl *ndev);

/* aie2_message.c */
int aie2_suspend_fw(struct amdxdna_dev_hdl *ndev);
int aie2_resume_fw(struct amdxdna_dev_hdl *ndev);
Expand Down
17 changes: 17 additions & 0 deletions src/driver/amdxdna/amdxdna_mailbox_helper.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,23 @@ struct xdna_notify {
.notify_cb = xdna_msg_cb, \
}

#define DECLARE_XDNA_STOP_EVENT_TRACE_MSG(name, op, status) \
struct name##_req req = {}; \
struct name##_resp resp = { status }; \
struct xdna_notify hdl = { \
.error = 0, \
.data = (u32 *)&resp, \
.size = sizeof(resp), \
.comp = COMPLETION_INITIALIZER_ONSTACK(hdl.comp), \
}; \
struct xdna_mailbox_msg msg = { \
.send_data = (u8 *)&req, \
.send_size = sizeof(req), \
.handle = &hdl, \
.opcode = op, \
.notify_cb = xdna_msg_cb, \
}

#define XDNA_STATUS_OFFSET(name) (offsetof(struct name##_resp, status) / sizeof(u32))

int xdna_msg_cb(void *handle, const u32 *data, size_t size);
Expand Down

0 comments on commit add9c42

Please sign in to comment.