diff --git a/docs/source/rb/index.rst b/docs/source/rb/index.rst index 12ccffbb8..2e9dd9ce8 100644 --- a/docs/source/rb/index.rst +++ b/docs/source/rb/index.rst @@ -75,7 +75,7 @@ The NIC register space is constructed from a linked list of register blocks. Ea 0x0000C020 0x00000400 :ref:`rb_cqm` 0x0000C030 0x00000400 :ref:`rb_qm_tx` 0x0000C031 0x00000400 :ref:`rb_qm_rx` - 0x0000C040 0x00000100 :ref:`rb_sched_rr` + 0x0000C040 0x00000200 :ref:`rb_sched_rr` 0x0000C050 0x00000100 :ref:`rb_sched_ctrl_tdma` 0x0000C060 0x00000200 :ref:`rb_tdma_sch` 0x0000C080 0x00000200 :ref:`rb_phc` diff --git a/docs/source/rb/sched_rr.rst b/docs/source/rb/sched_rr.rst index e496e047a..2fc28bb6f 100644 --- a/docs/source/rb/sched_rr.rst +++ b/docs/source/rb/sched_rr.rst @@ -4,35 +4,43 @@ Round-robin scheduler register block ==================================== -The round-robin scheduler register block has a header with type 0x0000C040, version 0x00000100, and indicates the location of the scheduler in the register space, as well as containing some control, status, and informational registers. +The round-robin scheduler register block has a header with type 0x0000C040, version 0x00000200, and indicates the location of the scheduler in the register space, as well as containing some control, status, and informational registers. .. table:: - ======== ============= ====== ====== ====== ====== ============= - Address Field 31..24 23..16 15..8 7..0 Reset value - ======== ============= ====== ====== ====== ====== ============= - RBB+0x00 Type Vendor ID Type RO 0x0000C040 - -------- ------------- -------------- -------------- ------------- - RBB+0x04 Version Major Minor Patch Meta RO 0x00000100 - -------- ------------- ------ ------ ------ ------ ------------- - RBB+0x08 Next pointer Pointer to next register block RO - - -------- ------------- ------------------------------ ------------- - RBB+0x0C Offset Offset to scheduler RO - - -------- ------------- ------------------------------ ------------- - RBB+0x10 CH count Channel count RO - - -------- ------------- ------------------------------ ------------- - RBB+0x14 CH stride Channel stride RO 0x00000004 - -------- ------------- ------------------------------ ------------- - RBB+0x18 Control Control RW 0x00000000 - -------- ------------- ------------------------------ ------------- - RBB+0x1C Dest Dest RW - - ======== ============= ============================== ============= + ============ ============= ====== ====== ====== ====== ============= + Address Field 31..24 23..16 15..8 7..0 Reset value + ============ ============= ====== ====== ====== ====== ============= + RBB+0x00 Type Vendor ID Type RO 0x0000C040 + ------------ ------------- -------------- -------------- ------------- + RBB+0x04 Version Major Minor Patch Meta RO 0x00000200 + ------------ ------------- ------ ------ ------ ------ ------------- + RBB+0x08 Next pointer Pointer to next register block RO - + ------------ ------------- ------------------------------ ------------- + RBB+0x0C Offset Offset to scheduler RO - + ------------ ------------- ------------------------------ ------------- + RBB+0x10 Queue count Queue count RO - + ------------ ------------- ------------------------------ ------------- + RBB+0x14 Queue stride Queue stride RO - + ------------ ------------- ------------------------------ ------------- + RBB+0x18 Control Scheduler Control RW 0x00000000 + ------------ ------------- ------------------------------ ------------- + RBB+0x1C Config FC scl Ports TCs RO - + ------------ ------------- ------ ------ ------ ------ ------------- + RBB+0x20+16n CH N ctrl Channel control RW 0x00000000 + ------------ ------------- ------------------------------ ------------- + RBB+0x24+16n CH N FC 1 Packet budget Dest RW - + ------------ ------------- -------------- -------------- ------------- + RBB+0x28+16n CH N FC 2 Packet limit Data budget RW - + ------------ ------------- -------------- -------------- ------------- + RBB+0x2C+16n CH N FC 3 Data limit RW - + ============ ============= ============================== ============= See :ref:`rb_overview` for definitions of the standard register block header fields. .. object:: Offset - The offset field contains the offset to the start of the scheduler, relative to the start of the current region. + The offset field contains the offset to the start of the scheduler region, relative to the start of the current region. .. table:: @@ -42,31 +50,31 @@ See :ref:`rb_overview` for definitions of the standard register block header fie RBB+0x0C Offset to scheduler RO - ======== ============================== ============= -.. object:: Channel count +.. object:: Queue count - The channel count field contains the number of channels. + The queue count field contains the number of queues. .. table:: ======== ====== ====== ====== ====== ============= Address 31..24 23..16 15..8 7..0 Reset value ======== ====== ====== ====== ====== ============= - RBB+0x10 Channel count RO - + RBB+0x10 Queue count RO - ======== ============================== ============= -.. object:: Channel stride +.. object:: Queue stride - The channel stride field contains the size of the region for each channel. + The queue stride field contains the size of the region for each queue. .. table:: ======== ====== ====== ====== ====== ============= Address 31..24 23..16 15..8 7..0 Reset value ======== ====== ====== ====== ====== ============= - RBB+0x14 Channel stride RO 0x00000004 + RBB+0x14 Queue stride RO 0x00000004 ======== ============================== ============= -.. object:: Control +.. object:: Control/status The control field contains scheduler-related control bits. @@ -84,53 +92,187 @@ See :ref:`rb_overview` for definitions of the standard register block header fie Bit Function === ======== 0 Enable + 16 Active === ======== -.. object:: Dest +.. object:: Config - The dest field controls the destination port and traffic class of the scheduler. It is initialized with the scheduler's index with traffic class 0. + The config register contains the number of ports and traffic classes that the scheduler is configured for, as well as the flow control scale value. The scheduler implements a hierarchical schedule, round-robin across X ports, strict priority across Y traffic classes on each port, and round-robin on all queues enabled on each TC. Queues can be enabled on one TC on any number of ports. .. table:: ======== ====== ====== ====== ====== ============= Address 31..24 23..16 15..8 7..0 Reset value ======== ====== ====== ====== ====== ============= - RBB+0x1C Dest RW - - ======== ============================== ============= + RBB+0x1C FC Scl Ports TCs RO - + ======== ====== ====== ====== ====== ============= + +.. object:: Channel control + + The control field contains scheduler-related control bits. + + .. table:: + + ============ ====== ====== ====== ====== ============= + Address 31..24 23..16 15..8 7..0 Reset value + ============ ====== ====== ====== ====== ============= + RBB+0x20+16n Status Control RW 0x00000000 + ============ ============== ============== ============= + + .. table:: + + === ======== + Bit Function + === ======== + 0 Enable + 16 Active + 17 Fetch active + 18 FC available + 19 Scheduler primed + === ======== + +.. object:: Channel flow control registers + + The channel flow control registers contain aggregate limit settings for outstanding operations as well as budgets for starting new operations. The data limits are specified in flow control credits, with the FC scale value determining the number of bytes per credit. The packet budget and data budget control the number of packets and aggregate packet data that can be fetched for each scheduling decision on the scheduler channel. The packet limit and data limit determine the maximum number of outstanding packets and aggregate packet data in transmission on the scheduler channel at any time. The dest field is used to control the routing and traffic class for the scheduler channel. + + .. table:: -Round-robin scheduler CSRs -========================== + ============ ====== ====== ====== ====== ============= + Address 31..24 23..16 15..8 7..0 Reset value + ============ ====== ====== ====== ====== ============= + RBB+0x2C+16n Packet budget dest RW - + ------------ -------------- -------------- ------------- + RBB+0x2C+16n Packet limit Data budget RW - + ------------ -------------- -------------- ------------- + RBB+0x2C+16n Data limit RW - + ============ ============================== ============= -Each scheduler channel has several associated control registers, detailed in this table: +Round-robin scheduler queue CSRs +================================ + +Each queue has several associated control registers, detailed in this table: .. table:: ========= ============== ====== ====== ====== ====== ============= Address Field 31..24 23..16 15..8 7..0 Reset value ========= ============== ====== ====== ====== ====== ============= - Base+0x00 Control Control RW 0x00000000 - ========= ============== ============================== ============= + Base+0x00 Control P n+3 P n+2 P n+1 P n RW 0x00000000 + ========= ============== ====== ====== ====== ====== ============= .. object:: Control - The control field contains scheduler-related control bits. + The control field contains scheduler-related control bits. Each port has a dedicated byte; the stride size will be set based on the number of ports. Queue-level bits are located in the MSBs of each byte. All fields are read-only; use commands to control the enable and pause bits as well as set the TCs on each of the ports. .. table:: ========= ====== ====== ====== ====== ============= Address 31..24 23..16 15..8 7..0 Reset value ========= ====== ====== ====== ====== ============= - Base+0x00 Control RW 0x00000000 - ========= ============================== ============= + Base+0x00 P n+3 P n+2 P n+1 P n RW 0x00000000 + ========= ====== ====== ====== ====== ============= .. table:: - === ============= - Bit Function - === ============= - 0 Enable - 1 Global enable - 2 Control enable - 16 Active - 24 Scheduled - === ============= + ===== ============= + Bit Function + ===== ============= + 2:0 Port n TC + 3 Port n enable + 4 Port n pause + 5 Port n scheduled + 6 Queue enable + 7 Queue pause + 10:8 Port n+1 TC + 11 Port n+1 enable + 12 Port n+1 pause + 13 Port n+1 scheduled + 14 Queue active + 18:16 Port n+2 TC + 19 Port n+2 enable + 20 Port n+2 pause + 21 Port n+2 scheduled + 26:24 Port n+3 TC + 27 Port n+3 enable + 28 Port n+3 pause + 29 Port n+3 scheduled + ===== ============= + +Round-robin scheduler queue commands +==================================== + +.. table:: + + ======================== ====== ====== ====== ====== + Command 31..24 23..16 15..8 7..0 + ======================== ====== ====== ====== ====== + Set port TC 0x8001 Port TC + ------------------------ -------------- ------ ------ + Set port enable 0x8002 Port Enable + ------------------------ -------------- ------ ------ + Set port pause 0x8003 Port Pause + ------------------------ -------------- ------ ------ + Set queue enable 0x400001 Enable + ------------------------ ---------------------- ------ + Set queue pause 0x400002 Pause + ======================== ====================== ====== + +.. object:: Set port TC + + The set port TC command is used to set the traffic class for the specified port for the queue. Allowed at any time, but the change only takes affect when the queue is rescheduled. + + .. table:: + + ====== ====== ====== ====== + 31..24 23..16 15..8 7..0 + ====== ====== ====== ====== + 0x8001 Port TC + ============== ====== ====== + +.. object:: Set port enable + + The set port enable command is used to set the traffic class for the specified port for the queue. Allowed at any time. + + .. table:: + + ====== ====== ====== ====== + 31..24 23..16 15..8 7..0 + ====== ====== ====== ====== + 0x8002 Port Enable + ============== ====== ====== + +.. object:: Set port pause + + The set port pause command is used to set the traffic class for the specified port for the queue. Allowed at any time. + + .. table:: + + ====== ====== ====== ====== + 31..24 23..16 15..8 7..0 + ====== ====== ====== ====== + 0x8003 Port Pause + ============== ====== ====== + +.. object:: Set queue enable + + The set queue enable command is used to enable or disable the queue. Allowed at any time. + + .. table:: + + ====== ====== ====== ====== + 31..24 23..16 15..8 7..0 + ====== ====== ====== ====== + 0x400001 Enable + ====================== ====== + +.. object:: Set queue pause + + The set queue pause command is used to pause or un-pause the queue. Allowed at any time. + + .. table:: + + ====== ====== ====== ====== + 31..24 23..16 15..8 7..0 + ====== ====== ====== ====== + 0x400002 Pause + ====================== ====== diff --git a/fpga/common/rtl/tx_scheduler_rr.v b/fpga/common/rtl/tx_scheduler_rr.v index 92051fdcb..0d827e5ce 100644 --- a/fpga/common/rtl/tx_scheduler_rr.v +++ b/fpga/common/rtl/tx_scheduler_rr.v @@ -23,6 +23,8 @@ module tx_scheduler_rr # parameter PIPELINE = 2, parameter SCHED_CTRL_ENABLE = 0, parameter REQ_DEST_DEFAULT = 0, + parameter MAX_TX_SIZE = 9216, + parameter FC_SCALE = 64, // AXI lite interface configuration parameter AXIL_BASE_ADDR = 0, @@ -31,7 +33,7 @@ module tx_scheduler_rr # parameter AXIL_STRB_WIDTH = (AXIL_DATA_WIDTH/8), // Register interface configuration - parameter REG_ADDR_WIDTH = $clog2(32), + parameter REG_ADDR_WIDTH = $clog2(64), parameter REG_DATA_WIDTH = AXIL_DATA_WIDTH, parameter REG_STRB_WIDTH = (REG_DATA_WIDTH/8), parameter RB_BLOCK_TYPE = 32'h0000C040, @@ -127,6 +129,12 @@ module tx_scheduler_rr # output wire active ); +localparam CL_FC_SCALE = $clog2(FC_SCALE); +localparam PKT_FC_W = 8; +localparam BUDGET_FC_W = LEN_WIDTH-CL_FC_SCALE; +localparam DATA_FC_W = BUDGET_FC_W+PKT_FC_W; +localparam TX_FC_W = 4; + localparam QUEUE_COUNT = 2**QUEUE_INDEX_WIDTH; localparam CL_OP_TABLE_SIZE = $clog2(OP_TABLE_SIZE); @@ -173,12 +181,12 @@ initial begin $finish; end - if (REG_ADDR_WIDTH < $clog2(32)) begin + if (REG_ADDR_WIDTH < $clog2(64)) begin $error("Error: Register address width too narrow (instance %m)"); $finish; end - if (RB_NEXT_PTR && RB_NEXT_PTR >= RB_BASE_ADDR && RB_NEXT_PTR < RB_BASE_ADDR + 32) begin + if (RB_NEXT_PTR && RB_NEXT_PTR >= RB_BASE_ADDR && RB_NEXT_PTR < RB_BASE_ADDR + 64) begin $error("Error: RB_NEXT_PTR overlaps block (instance %m)"); $finish; end @@ -230,15 +238,13 @@ reg [RAM_WIDTH-1:0] queue_ram_rd_data; // Scheduler RAM entry: // bit len field // 0 1 enable -// 1 1 global_enable -// 2 1 sched_enable +// 1 1 pause // 6 1 active // 7 1 scheduled // 15:8 8 tail index wire queue_ram_rd_data_enabled = queue_ram_rd_data[0]; -wire queue_ram_rd_data_global_enable = queue_ram_rd_data[1]; -wire queue_ram_rd_data_sched_enable = queue_ram_rd_data[2]; +wire queue_ram_rd_data_paused = queue_ram_rd_data[1]; wire queue_ram_rd_data_active = queue_ram_rd_data[6]; wire queue_ram_rd_data_scheduled = queue_ram_rd_data[7]; wire [CL_OP_TABLE_SIZE-1:0] queue_ram_rd_data_op_tail_index = queue_ram_rd_data[15:8]; @@ -476,6 +482,154 @@ initial begin end end +// flow control +reg ch_fetch_fc_cons_en = 1'b0; +reg ch_fetch_fc_rel_sched_fail_en = 1'b0; +reg ch_fetch_fc_rel_dequeue_fail_en = 1'b0; +reg ch_fetch_fc_rel_fetch_fail_en = 1'b0; +reg [DATA_FC_W-1:0] ch_tx_data_fc_cons = 0; +reg ch_tx_fc_cons_en = 1'b0; +reg [DATA_FC_W-1:0] ch_tx_data_fc_rel = 0; +reg ch_tx_fc_rel_en = 1'b0; + +reg ch_enable_reg = 1'b0; +reg ch_active_reg = 1'b0; +reg ch_fetch_active_reg = 1'b0; +reg [TX_FC_W-1:0] ch_fetch_fc_cnt_reg = 0; +reg [TX_FC_W-1:0] ch_fetch_fc_lim_reg = 0; +reg ch_fetch_fc_av_reg = 0; +reg [PKT_FC_W-1:0] ch_fetch_pkt_fc_cons_reg = 0; +reg [PKT_FC_W-1:0] ch_fetch_pkt_fc_rel_sched_fail_reg = 0; +reg [PKT_FC_W-1:0] ch_fetch_pkt_fc_rel_dequeue_fail_reg = 0; +reg [PKT_FC_W-1:0] ch_fetch_pkt_fc_rel_fetch_fail_reg = 0; +reg [PKT_FC_W-1:0] ch_pkt_fc_lim_reg = {PKT_FC_W{1'b1}}; +reg [BUDGET_FC_W-1:0] ch_data_fc_budget_reg = (MAX_TX_SIZE + 2**CL_FC_SCALE - 1) >> CL_FC_SCALE; +reg [PKT_FC_W-1:0] ch_tx_pkt_fc_cons_reg = 0; +reg [DATA_FC_W-1:0] ch_tx_data_fc_cons_reg = 0; +reg [PKT_FC_W-1:0] ch_tx_pkt_fc_rel_reg = 0; +reg [DATA_FC_W-1:0] ch_tx_data_fc_rel_reg = 0; +reg [DATA_FC_W-1:0] ch_data_fc_lim_reg = (MAX_TX_SIZE + 2**CL_FC_SCALE - 1) >> CL_FC_SCALE; + +reg [TX_FC_W-1:0] ch_fetch_fc_cnt_d1_reg = 0; +reg [TX_FC_W-1:0] ch_fetch_fc_cnt_d2_reg = 0; +reg [PKT_FC_W-1:0] ch_fetch_pkt_fc_cnt_reg = 0; +reg [PKT_FC_W-1:0] ch_tx_pkt_fc_cnt_reg = 0; +reg [PKT_FC_W-1:0] ch_pkt_fc_cnt_reg = 0; +reg [DATA_FC_W-1:0] ch_tx_data_fc_cnt_reg = 0; +reg [DATA_FC_W-1:0] ch_data_fc_cnt_reg = 0; + +always @* begin + ch_fetch_fc_rel_dequeue_fail_en = 1'b0; + ch_fetch_fc_rel_fetch_fail_en = 1'b0; + ch_tx_data_fc_cons = 0; + ch_tx_fc_cons_en = 1'b0; + ch_tx_data_fc_rel = 0; + ch_tx_fc_rel_en = 1'b0; + + if (s_axis_tx_status_dequeue_valid) begin + if (s_axis_tx_status_dequeue_empty || s_axis_tx_status_dequeue_error) begin + ch_fetch_fc_rel_dequeue_fail_en = 1'b1; + end + end + + ch_tx_data_fc_cons = (s_axis_tx_status_start_len + 2**CL_FC_SCALE-1) >> CL_FC_SCALE; + if (s_axis_tx_status_start_valid) begin + if (s_axis_tx_status_start_error) begin + ch_fetch_fc_rel_fetch_fail_en = 1'b1; + end else begin + ch_fetch_fc_rel_fetch_fail_en = 1'b1; + ch_tx_fc_cons_en = 1'b1; + end + end + + ch_tx_data_fc_rel = (s_axis_tx_status_finish_len + 2**CL_FC_SCALE-1) >> CL_FC_SCALE; + if (s_axis_tx_status_finish_valid) begin + ch_tx_fc_rel_en = 1'b1; + end +end + +always @(posedge clk) begin + // handle events + if (ch_fetch_fc_cons_en) begin + ch_fetch_pkt_fc_cons_reg <= ch_fetch_pkt_fc_cons_reg + 1; + ch_fetch_fc_cnt_reg <= ch_fetch_fc_cnt_reg + 1; + ch_fetch_fc_av_reg <= ((ch_fetch_fc_lim_reg - ch_fetch_fc_cnt_reg - 1) & {TX_FC_W{1'b1}}) <= 2**(TX_FC_W-1) && ch_enable_reg; + end else begin + ch_fetch_fc_av_reg <= ((ch_fetch_fc_lim_reg - ch_fetch_fc_cnt_reg) & {TX_FC_W{1'b1}}) <= 2**(TX_FC_W-1) && ch_enable_reg; + end + + if (ch_fetch_fc_rel_sched_fail_en) begin + ch_fetch_pkt_fc_rel_sched_fail_reg <= ch_fetch_pkt_fc_rel_sched_fail_reg + 1; + end + + if (ch_fetch_fc_rel_dequeue_fail_en) begin + ch_fetch_pkt_fc_rel_dequeue_fail_reg <= ch_fetch_pkt_fc_rel_dequeue_fail_reg + 1; + end + + if (ch_fetch_fc_rel_fetch_fail_en) begin + ch_fetch_pkt_fc_rel_fetch_fail_reg <= ch_fetch_pkt_fc_rel_fetch_fail_reg + 1; + end + + if (ch_tx_fc_cons_en) begin + ch_tx_pkt_fc_cons_reg <= ch_tx_pkt_fc_cons_reg + 1; + ch_tx_data_fc_cons_reg <= ch_tx_data_fc_cons_reg + ch_tx_data_fc_cons; + end + + if (ch_tx_fc_rel_en) begin + ch_tx_pkt_fc_rel_reg <= ch_tx_pkt_fc_rel_reg + 1; + ch_tx_data_fc_rel_reg <= ch_tx_data_fc_rel_reg + ch_tx_data_fc_rel; + end + + // intermediate counts + ch_fetch_pkt_fc_cnt_reg <= ch_fetch_pkt_fc_cons_reg - ch_fetch_pkt_fc_rel_sched_fail_reg - ch_fetch_pkt_fc_rel_dequeue_fail_reg - ch_fetch_pkt_fc_rel_fetch_fail_reg; + ch_tx_pkt_fc_cnt_reg <= ch_tx_pkt_fc_cons_reg - ch_tx_pkt_fc_rel_reg; + ch_tx_data_fc_cnt_reg <= ch_tx_data_fc_cons_reg - ch_tx_data_fc_rel_reg; + ch_fetch_fc_cnt_d1_reg <= ch_fetch_fc_cnt_reg; + + // final counts + ch_pkt_fc_cnt_reg <= ch_fetch_pkt_fc_cnt_reg + ch_tx_pkt_fc_cnt_reg; + ch_data_fc_cnt_reg <= ch_fetch_pkt_fc_cnt_reg*ch_data_fc_budget_reg + ch_tx_data_fc_cnt_reg; + ch_fetch_fc_cnt_d2_reg <= ch_fetch_fc_cnt_d1_reg; + + ch_fetch_active_reg <= ch_fetch_pkt_fc_cnt_reg != 0; + ch_active_reg <= ch_fetch_pkt_fc_cnt_reg != 0 || ch_tx_pkt_fc_cnt_reg != 0; + + // generate credits + if ($signed({1'b0, ch_data_fc_lim_reg}) - $signed({1'b0, ch_data_fc_cnt_reg}) >= {ch_data_fc_budget_reg, 3'd0} && $signed({1'b0, ch_pkt_fc_lim_reg}) - $signed({1'b0, ch_pkt_fc_cnt_reg}) >= 8 && TX_FC_W > 3) begin + ch_fetch_fc_lim_reg <= ch_fetch_fc_cnt_d2_reg + 8; + end else if ($signed({1'b0, ch_data_fc_lim_reg}) - $signed({1'b0, ch_data_fc_cnt_reg}) >= {ch_data_fc_budget_reg, 2'd0} && $signed({1'b0, ch_pkt_fc_lim_reg}) - $signed({1'b0, ch_pkt_fc_cnt_reg}) >= 4 && TX_FC_W > 2) begin + ch_fetch_fc_lim_reg <= ch_fetch_fc_cnt_d2_reg + 4; + end else if ($signed({1'b0, ch_data_fc_lim_reg}) - $signed({1'b0, ch_data_fc_cnt_reg}) >= {ch_data_fc_budget_reg, 1'd0} && $signed({1'b0, ch_pkt_fc_lim_reg}) - $signed({1'b0, ch_pkt_fc_cnt_reg}) >= 2) begin + ch_fetch_fc_lim_reg <= ch_fetch_fc_cnt_d2_reg + 2; + end else if ($signed({1'b0, ch_data_fc_lim_reg}) - $signed({1'b0, ch_data_fc_cnt_reg}) >= ch_data_fc_budget_reg && $signed({1'b0, ch_pkt_fc_lim_reg}) - $signed({1'b0, ch_pkt_fc_cnt_reg}) >= 1) begin + ch_fetch_fc_lim_reg <= ch_fetch_fc_cnt_d2_reg + 1; + end else begin + ch_fetch_fc_lim_reg <= ch_fetch_fc_cnt_d2_reg; + ch_fetch_fc_av_reg <= 1'b0; + end + + if (rst) begin + ch_fetch_fc_cnt_reg <= 0; + ch_fetch_fc_cnt_d1_reg <= 0; + ch_fetch_fc_cnt_d2_reg <= 0; + ch_fetch_fc_lim_reg <= 0; + ch_fetch_fc_av_reg <= 0; + ch_fetch_pkt_fc_cons_reg <= 0; + ch_fetch_pkt_fc_rel_sched_fail_reg <= 0; + ch_fetch_pkt_fc_rel_dequeue_fail_reg <= 0; + ch_fetch_pkt_fc_rel_fetch_fail_reg <= 0; + ch_fetch_pkt_fc_cnt_reg <= 0; + ch_tx_pkt_fc_cnt_reg <= 0; + ch_pkt_fc_cnt_reg <= 0; + ch_tx_pkt_fc_cons_reg <= 0; + ch_tx_data_fc_cons_reg <= 0; + ch_tx_pkt_fc_rel_reg <= 0; + ch_tx_data_fc_rel_reg <= 0; + ch_tx_data_fc_cnt_reg <= 0; + ch_data_fc_cnt_reg <= 0; + end +end + // control registers reg ctrl_reg_wr_ack_reg = 1'b0; reg [REG_DATA_WIDTH-1:0] ctrl_reg_rd_data_reg = {REG_DATA_WIDTH{1'b0}}; @@ -503,9 +657,35 @@ always @(posedge clk) begin // Round-robin scheduler RBB+8'h18: begin // Sched: control - enable_reg <= ctrl_reg_wr_data[0]; + if (ctrl_reg_wr_strb[0]) begin + enable_reg <= ctrl_reg_wr_data[0]; + end + end + RBB+8'h20: begin + if (ctrl_reg_wr_strb[0]) begin + ch_enable_reg <= ctrl_reg_wr_data[0]; + end + end + RBB+8'h24: begin + if (ctrl_reg_wr_strb[1:0]) begin + m_axis_tx_req_dest_reg <= ctrl_reg_wr_data[15:0]; + end + if (ctrl_reg_wr_strb[3:2]) begin + // TODO + // ch_pkt_fc_budget_reg <= ctrl_reg_wr_data[31:16]; + end + end + RBB+8'h28: begin + if (ctrl_reg_wr_strb[1:0]) begin + ch_data_fc_budget_reg <= ctrl_reg_wr_data[15:0]; + end + if (ctrl_reg_wr_strb[3:2]) begin + ch_pkt_fc_lim_reg <= ctrl_reg_wr_data[31:16]; + end + end + RBB+8'h2C: begin + ch_data_fc_lim_reg <= ctrl_reg_wr_data; end - RBB+8'h1C: m_axis_tx_req_dest_reg <= ctrl_reg_wr_data; // Sched: dest default: ctrl_reg_wr_ack_reg <= 1'b0; endcase end @@ -516,17 +696,40 @@ always @(posedge clk) begin case ({ctrl_reg_rd_addr >> 2, 2'b00}) // Round-robin scheduler RBB+8'h00: ctrl_reg_rd_data_reg <= RB_BLOCK_TYPE; // Sched: Type - RBB+8'h04: ctrl_reg_rd_data_reg <= 32'h00000100; // Sched: Version + RBB+8'h04: ctrl_reg_rd_data_reg <= 32'h00000200; // Sched: Version RBB+8'h08: ctrl_reg_rd_data_reg <= RB_NEXT_PTR; // Sched: Next header RBB+8'h0C: ctrl_reg_rd_data_reg <= AXIL_BASE_ADDR; // Sched: Offset RBB+8'h10: ctrl_reg_rd_data_reg <= 2**QUEUE_INDEX_WIDTH; // Sched: Channel count RBB+8'h14: ctrl_reg_rd_data_reg <= 4; // Sched: Channel stride RBB+8'h18: begin // Sched: control - ctrl_reg_rd_data_reg[0] <= enable_reg; - ctrl_reg_rd_data_reg[8] <= active_queue_count_reg != 0; + ctrl_reg_rd_data_reg[0] <= enable_reg; + ctrl_reg_rd_data_reg[16] <= active_queue_count_reg != 0; + end + RBB+8'h1C: begin + ctrl_reg_rd_data_reg[7:0] <= 1; // Sched: TC count + ctrl_reg_rd_data_reg[15:8] <= 1; // Sched: Port count + ctrl_reg_rd_data_reg[23:16] <= CL_FC_SCALE; // Sched: FC scale + end + RBB+8'h20: begin + ctrl_reg_rd_data_reg[0] <= ch_enable_reg; + ctrl_reg_rd_data_reg[16] <= ch_active_reg; + ctrl_reg_rd_data_reg[17] <= ch_fetch_active_reg; + ctrl_reg_rd_data_reg[18] <= ch_fetch_fc_av_reg; + ctrl_reg_rd_data_reg[19] <= axis_scheduler_fifo_out_valid; + end + RBB+8'h24: begin + ctrl_reg_rd_data_reg[15:0] <= m_axis_tx_req_dest_reg; + // TODO + ctrl_reg_rd_data_reg[31:16] <= 1; // ch_pkt_fc_budget_reg; + end + RBB+8'h28: begin + ctrl_reg_rd_data_reg[15:0] <= ch_data_fc_budget_reg; + ctrl_reg_rd_data_reg[31:16] <= ch_pkt_fc_lim_reg; + end + RBB+8'h2C: begin + ctrl_reg_rd_data_reg <= ch_data_fc_lim_reg; end - RBB+8'h1C: ctrl_reg_rd_data_reg <= m_axis_tx_req_dest_reg; // Sched: dest default: ctrl_reg_rd_ack_reg <= 1'b0; endcase end @@ -537,9 +740,17 @@ always @(posedge clk) begin enable_reg <= 1'b0; m_axis_tx_req_dest_reg <= REQ_DEST_DEFAULT; + + ch_enable_reg <= 0; + ch_pkt_fc_lim_reg <= {PKT_FC_W{1'b1}}; + ch_data_fc_budget_reg <= (MAX_TX_SIZE + 2**CL_FC_SCALE - 1) >> CL_FC_SCALE; + ch_data_fc_lim_reg <= (MAX_TX_SIZE + 2**CL_FC_SCALE - 1) >> CL_FC_SCALE; end end +reg enabled; +reg paused; + always @* begin op_axil_write_pipe_next = {op_axil_write_pipe_reg, 1'b0}; op_axil_read_pipe_next = {op_axil_read_pipe_reg, 1'b0}; @@ -625,6 +836,9 @@ always @* begin axis_scheduler_fifo_out_ready = 1'b0; + ch_fetch_fc_cons_en = 1'b0; + ch_fetch_fc_rel_sched_fail_en = 1'b0; + // pipeline stage 0 - receive request if (!init_reg) begin // init queue states @@ -687,7 +901,7 @@ always @* begin queue_ram_rd_addr = s_axis_sched_ctrl_queue; queue_ram_addr_pipeline_next[0] = s_axis_sched_ctrl_queue; - end else if (enable && enable_reg && op_table_start_ptr_valid && axis_scheduler_fifo_out_valid && (!m_axis_tx_req_valid || m_axis_tx_req_ready) && !op_req_pipe_reg) begin + end else if (enable && enable_reg && op_table_start_ptr_valid && axis_scheduler_fifo_out_valid && ch_fetch_fc_av_reg && (!m_axis_tx_req_valid || m_axis_tx_req_ready) && !op_req_pipe_reg) begin // transmit request op_req_pipe_next[0] = 1'b1; @@ -698,6 +912,8 @@ always @* begin axis_scheduler_fifo_out_ready = 1'b1; + ch_fetch_fc_cons_en = 1'b1; + queue_ram_rd_addr = axis_scheduler_fifo_out_queue; queue_ram_addr_pipeline_next[0] = axis_scheduler_fifo_out_queue; end @@ -708,11 +924,9 @@ always @* begin // init queue state queue_ram_wr_addr = queue_ram_addr_pipeline_reg[PIPELINE-1]; + queue_ram_wr_data = 0; queue_ram_wr_data[0] = 1'b0; // queue enabled - if (SCHED_CTRL_ENABLE) begin - queue_ram_wr_data[1] = 1'b0; // queue global enable - queue_ram_wr_data[2] = 1'b0; // queue sched enable - end + queue_ram_wr_data[1] = 1'b0; // queue paused queue_ram_wr_data[6] = 1'b0; // queue active queue_ram_wr_data[7] = 1'b0; // queue scheduled queue_ram_wr_strb[0] = 1'b1; @@ -727,7 +941,7 @@ always @* begin queue_ram_wr_en = 1'b1; // schedule queue if necessary - if (queue_ram_rd_data_enabled && (!SCHED_CTRL_ENABLE || queue_ram_rd_data_global_enable || queue_ram_rd_data_sched_enable) && !queue_ram_rd_data_scheduled) begin + if (queue_ram_rd_data_enabled && !queue_ram_rd_data_paused && !queue_ram_rd_data_scheduled) begin queue_ram_wr_data[7] = 1'b1; // queue scheduled axis_scheduler_fifo_in_queue = queue_ram_addr_pipeline_reg[PIPELINE-1]; @@ -761,7 +975,7 @@ always @* begin op_table_update_next_ptr = queue_ram_rd_data_op_tail_index; op_table_update_next_index = op_index_pipeline_reg[PIPELINE-1]; - if (queue_ram_rd_data_enabled && (!SCHED_CTRL_ENABLE || queue_ram_rd_data_global_enable || queue_ram_rd_data_sched_enable) && queue_ram_rd_data_active && queue_ram_rd_data_scheduled) begin + if (queue_ram_rd_data_enabled && !queue_ram_rd_data_paused && queue_ram_rd_data_active && queue_ram_rd_data_scheduled) begin // queue enabled, active, and scheduled // issue transmit request @@ -786,6 +1000,8 @@ always @* begin // update state queue_ram_wr_data[7] = 1'b0; // queue scheduled + ch_fetch_fc_rel_sched_fail_en = 1'b1; + if (queue_ram_rd_data_scheduled) begin active_queue_count_next = active_queue_count_reg - 1; end @@ -816,8 +1032,8 @@ always @* begin if (write_data_pipeline_reg[PIPELINE-1][0]) begin queue_ram_wr_data[6] = 1'b1; // queue active - // schedule if disabled - if ((!SCHED_CTRL_ENABLE || write_data_pipeline_reg[PIPELINE-1][1] || queue_ram_rd_data_sched_enable) && !queue_ram_rd_data_scheduled) begin + // schedule if necessary + if (queue_ram_rd_data_enabled && !queue_ram_rd_data_paused && !queue_ram_rd_data_scheduled) begin queue_ram_wr_data[7] = 1'b1; // queue scheduled axis_scheduler_fifo_in_queue = queue_ram_addr_pipeline_reg[PIPELINE-1]; @@ -833,11 +1049,11 @@ always @* begin queue_ram_wr_addr = queue_ram_addr_pipeline_reg[PIPELINE-1]; queue_ram_wr_en = 1'b1; - queue_ram_wr_data[2] = write_data_pipeline_reg[PIPELINE-1][0]; // queue sched enable + queue_ram_wr_data[1] = !write_data_pipeline_reg[PIPELINE-1][0]; // queue pause queue_ram_wr_strb[0] = 1'b1; - // schedule if disabled - if (queue_ram_rd_data_enabled && queue_ram_rd_data_active && (queue_ram_rd_data_global_enable || write_data_pipeline_reg[PIPELINE-1][0]) && !queue_ram_rd_data_scheduled) begin + // schedule if necessary + if (queue_ram_rd_data_enabled && queue_ram_rd_data_active && !(!write_data_pipeline_reg[PIPELINE-1][0]) && !queue_ram_rd_data_scheduled) begin queue_ram_wr_data[7] = 1'b1; // queue scheduled axis_scheduler_fifo_in_queue = queue_ram_addr_pipeline_reg[PIPELINE-1]; @@ -852,12 +1068,42 @@ always @* begin queue_ram_wr_addr = queue_ram_addr_pipeline_reg[PIPELINE-1]; queue_ram_wr_en = 1'b1; - queue_ram_wr_data[0] = write_data_pipeline_reg[PIPELINE-1][0]; // queue enabled - queue_ram_wr_data[1] = write_data_pipeline_reg[PIPELINE-1][1]; // queue global enable - queue_ram_wr_strb[0] = write_strobe_pipeline_reg[PIPELINE-1][0]; + enabled = queue_ram_rd_data_enabled; + paused = queue_ram_rd_data_paused; + + casez (write_data_pipeline_reg[PIPELINE-1]) + 32'h8001zzzz: begin + // set port TC + // TODO + end + 32'h8002zzzz: begin + // set port enable + // TODO + end + 32'h8003zzzz: begin + // set port pause + // TODO + end + 32'h400001zz: begin + // set queue enable + queue_ram_wr_data[0] = write_data_pipeline_reg[PIPELINE-1][0]; + queue_ram_wr_strb[0] = 1'b1; + enabled = write_data_pipeline_reg[PIPELINE-1][0]; + end + 32'h400002zz: begin + // set queue pause + queue_ram_wr_data[1] = write_data_pipeline_reg[PIPELINE-1][0]; + queue_ram_wr_strb[0] = 1'b1; + paused = write_data_pipeline_reg[PIPELINE-1][0]; + end + default: begin + // invalid command + $display("Error: Invalid command 0x%x for queue %d (instance %m)", write_data_pipeline_reg[PIPELINE-1], queue_ram_addr_pipeline_reg[PIPELINE-1]); + end + endcase - // schedule if disabled - if (write_data_pipeline_reg[PIPELINE-1][0] && queue_ram_rd_data_active && (!SCHED_CTRL_ENABLE || write_data_pipeline_reg[PIPELINE-1][1] || queue_ram_rd_data_sched_enable) && !queue_ram_rd_data_scheduled) begin + // schedule if necessary + if (enabled && queue_ram_rd_data_active && !paused && !queue_ram_rd_data_scheduled) begin queue_ram_wr_data[7] = 1'b1; // queue scheduled axis_scheduler_fifo_in_queue = queue_ram_addr_pipeline_reg[PIPELINE-1]; @@ -870,13 +1116,15 @@ always @* begin s_axil_rvalid_next = 1'b1; s_axil_rdata_next = 0; - s_axil_rdata_next[0] = queue_ram_rd_data_enabled; - if (SCHED_CTRL_ENABLE) begin - s_axil_rdata_next[1] = queue_ram_rd_data_global_enable; - s_axil_rdata_next[2] = queue_ram_rd_data_sched_enable; - end - s_axil_rdata_next[16] = queue_ram_rd_data_active; - s_axil_rdata_next[24] = queue_ram_rd_data_scheduled; + // queue + s_axil_rdata_next[6] = queue_ram_rd_data_enabled; + s_axil_rdata_next[7] = queue_ram_rd_data_paused; + s_axil_rdata_next[14] = queue_ram_rd_data_active; + + // port 0 + s_axil_rdata_next[3] = queue_ram_rd_data_enabled; + s_axil_rdata_next[4] = queue_ram_rd_data_paused; + s_axil_rdata_next[5] = queue_ram_rd_data_scheduled; end // handle read data override diff --git a/fpga/common/tb/mqnic.py b/fpga/common/tb/mqnic.py index 0fd94a852..e66f3c3b1 100644 --- a/fpga/common/tb/mqnic.py +++ b/fpga/common/tb/mqnic.py @@ -245,13 +245,37 @@ MQNIC_RB_SCHED_BLOCK_VER = 0x00000300 MQNIC_RB_SCHED_BLOCK_REG_OFFSET = 0x0C -MQNIC_RB_SCHED_RR_TYPE = 0x0000C040 -MQNIC_RB_SCHED_RR_VER = 0x00000100 -MQNIC_RB_SCHED_RR_REG_OFFSET = 0x0C -MQNIC_RB_SCHED_RR_REG_CH_COUNT = 0x10 -MQNIC_RB_SCHED_RR_REG_CH_STRIDE = 0x14 -MQNIC_RB_SCHED_RR_REG_CTRL = 0x18 -MQNIC_RB_SCHED_RR_REG_DEST = 0x1C +MQNIC_RB_SCHED_RR_TYPE = 0x0000C040 +MQNIC_RB_SCHED_RR_VER = 0x00000200 +MQNIC_RB_SCHED_RR_REG_OFFSET = 0x0C +MQNIC_RB_SCHED_RR_REG_QUEUE_COUNT = 0x10 +MQNIC_RB_SCHED_RR_REG_QUEUE_STRIDE = 0x14 +MQNIC_RB_SCHED_RR_REG_CTRL = 0x18 +MQNIC_RB_SCHED_RR_REG_CFG = 0x1C +MQNIC_RB_SCHED_RR_REG_CH_STRIDE = 0x10 +MQNIC_RB_SCHED_RR_REG_CH0_CTRL = 0x20 +MQNIC_RB_SCHED_RR_REG_CH0_FC1 = 0x24 +MQNIC_RB_SCHED_RR_REG_CH0_FC1_DEST = 0x24 +MQNIC_RB_SCHED_RR_REG_CH0_FC1_PB = 0x26 +MQNIC_RB_SCHED_RR_REG_CH0_FC2 = 0x28 +MQNIC_RB_SCHED_RR_REG_CH0_FC2_DB = 0x28 +MQNIC_RB_SCHED_RR_REG_CH0_FC2_PL = 0x2A +MQNIC_RB_SCHED_RR_REG_CH0_FC3 = 0x2C +MQNIC_RB_SCHED_RR_REG_CH0_FC3_DL = 0x2C + +MQNIC_SCHED_RR_PORT_TC = (0x7 << 0) +MQNIC_SCHED_RR_PORT_EN = (1 << 3) +MQNIC_SCHED_RR_PORT_PAUSE = (1 << 4) +MQNIC_SCHED_RR_PORT_SCHEDULED = (1 << 5) +MQNIC_SCHED_RR_QUEUE_EN = (1 << 6) +MQNIC_SCHED_RR_QUEUE_PAUSE = (1 << 7) +MQNIC_SCHED_RR_QUEUE_ACTIVE = (1 << 14) + +MQNIC_SCHED_RR_CMD_SET_PORT_TC = 0x80010000 +MQNIC_SCHED_RR_CMD_SET_PORT_ENABLE = 0x80020000 +MQNIC_SCHED_RR_CMD_SET_PORT_PAUSE = 0x80030000 +MQNIC_SCHED_RR_CMD_SET_QUEUE_ENABLE = 0x40000100 +MQNIC_SCHED_RR_CMD_SET_QUEUE_PAUSE = 0x40000200 MQNIC_RB_SCHED_CTRL_TDMA_TYPE = 0x0000C050 MQNIC_RB_SCHED_CTRL_TDMA_VER = 0x00000100 @@ -1094,39 +1118,97 @@ def __init__(self, port, index, rb): self.queue_count = None self.queue_stride = None + self.tc_count = None + self.port_count = None + self.channel_count = None + self.fc_scale = None + async def init(self): await super().init() offset = await self.rb.read_dword(MQNIC_RB_SCHED_RR_REG_OFFSET) self.hw_regs = self.rb.parent.create_window(offset) - self.queue_count = await self.rb.read_dword(MQNIC_RB_SCHED_RR_REG_CH_COUNT) - self.queue_stride = await self.rb.read_dword(MQNIC_RB_SCHED_RR_REG_CH_STRIDE) + self.queue_count = await self.rb.read_dword(MQNIC_RB_SCHED_RR_REG_QUEUE_COUNT) + self.queue_stride = await self.rb.read_dword(MQNIC_RB_SCHED_RR_REG_QUEUE_STRIDE) self.queue_count = min(self.queue_count, MQNIC_MAX_TXQ) + val = await self.rb.read_dword(MQNIC_RB_SCHED_RR_REG_CFG) + self.tc_count = val & 0xff + self.port_count = (val >> 8) & 0xff + self.channel_count = self.port_count * self.tc_count + self.fc_scale = 1 << ((val >> 16) & 0xff) + async def enable(self): await self.set_ctrl(1) async def disable(self): await self.set_ctrl(0) + async def enable_ch(self, ch): + await self.set_ch_ctrl(ch, 1) + + async def disable_ch(self, ch): + await self.set_ch_ctrl(ch, 0) + async def enable_queue(self, queue): - await self.set_queue_ctrl(queue, 0x00000003) + await self.set_queue_ctrl(queue, MQNIC_SCHED_RR_CMD_SET_QUEUE_ENABLE | 1) async def disable_queue(self, queue): - await self.set_queue_ctrl(queue, 0x00000000) + await self.set_queue_ctrl(queue, MQNIC_SCHED_RR_CMD_SET_QUEUE_ENABLE | 0) async def disable_all_queues(self): for k in range(self.queue_count): await self.disable_queue(k) + async def set_queue_pause(self, queue, val): + await self.set_queue_ctrl(queue, MQNIC_SCHED_RR_CMD_SET_QUEUE_PAUSE | (1 if val else 0)) + async def get_ctrl(self): return await self.rb.read_dword(MQNIC_RB_SCHED_RR_REG_CTRL) async def set_ctrl(self, val): await self.rb.write_dword(MQNIC_RB_SCHED_RR_REG_CTRL, val) + async def get_ch_ctrl(self, ch): + return await self.rb.read_dword(MQNIC_RB_SCHED_RR_REG_CH0_CTRL + ch*MQNIC_RB_SCHED_RR_REG_CH_STRIDE) + + async def set_ch_ctrl(self, ch, val): + await self.rb.write_dword(MQNIC_RB_SCHED_RR_REG_CH0_CTRL + ch*MQNIC_RB_SCHED_RR_REG_CH_STRIDE, val) + + async def get_ch_dest(self, ch): + return await self.rb.read_word(MQNIC_RB_SCHED_RR_REG_CH0_FC1_DEST + ch*MQNIC_RB_SCHED_RR_REG_CH_STRIDE) + + async def set_ch_dest(self, ch, val): + await self.rb.write_word(MQNIC_RB_SCHED_RR_REG_CH0_FC1_DEST + ch*MQNIC_RB_SCHED_RR_REG_CH_STRIDE, val) + + async def get_ch_pkt_budget(self, ch): + return await self.rb.read_word(MQNIC_RB_SCHED_RR_REG_CH0_FC1_PB + ch*MQNIC_RB_SCHED_RR_REG_CH_STRIDE) + + async def set_ch_pkt_budget(self, ch, val): + await self.rb.write_word(MQNIC_RB_SCHED_RR_REG_CH0_FC1_PB + ch*MQNIC_RB_SCHED_RR_REG_CH_STRIDE, val) + + async def get_ch_data_budget(self, ch): + return await self.rb.read_word(MQNIC_RB_SCHED_RR_REG_CH0_FC2_DB + ch*MQNIC_RB_SCHED_RR_REG_CH_STRIDE) * self.fc_scale + + async def set_ch_data_budget(self, ch, val): + val = (val + self.fc_scale-1) // self.fc_scale + await self.rb.write_word(MQNIC_RB_SCHED_RR_REG_CH0_FC2_DB + ch*MQNIC_RB_SCHED_RR_REG_CH_STRIDE, val) + + async def get_ch_pkt_limit(self, ch): + return await self.rb.read_word(MQNIC_RB_SCHED_RR_REG_CH0_FC2_PL + ch*MQNIC_RB_SCHED_RR_REG_CH_STRIDE) + + async def set_ch_pkt_limit(self, ch, val): + await self.rb.write_word(MQNIC_RB_SCHED_RR_REG_CH0_FC2_PL + ch*MQNIC_RB_SCHED_RR_REG_CH_STRIDE, val) + + async def get_ch_data_limit(self, ch): + return await self.rb.read_dword(MQNIC_RB_SCHED_RR_REG_CH0_FC3_DL + ch*MQNIC_RB_SCHED_RR_REG_CH_STRIDE) * self.fc_scale + + async def set_ch_data_limit(self, ch, val): + val = (val + self.fc_scale-1) // self.fc_scale + await self.rb.write_dword(MQNIC_RB_SCHED_RR_REG_CH0_FC3_DL + ch*MQNIC_RB_SCHED_RR_REG_CH_STRIDE, val) + async def get_queue_ctrl(self, queue): return await self.hw_regs.read_dword(queue*4) @@ -1264,16 +1346,16 @@ async def open(self): await self.port.set_tx_ctrl(MQNIC_PORT_TX_CTRL_EN) # configure scheduler - for queue in range(self.sched_block.schedulers[0].queue_count): - found = False - for q in self.txq: - if queue == q.index: - found = True - break - if found: - await self.sched_block.schedulers[0].enable_queue(queue) - else: - await self.sched_block.schedulers[0].disable_queue(queue) + for q in self.txq: + await self.sched_block.schedulers[0].enable_queue(q.index) + + # configure scheduler flow control + await self.sched_block.schedulers[0].set_ch_pkt_budget(0, 1) + await self.sched_block.schedulers[0].set_ch_data_budget(0, self.interface.max_tx_mtu) + await self.sched_block.schedulers[0].set_ch_pkt_limit(0, 0xFFFF) + await self.sched_block.schedulers[0].set_ch_data_limit(0, self.interface.tx_fifo_depth) + + await self.sched_block.schedulers[0].enable_ch(0) # enable scheduler await self.sched_block.activate() @@ -1300,6 +1382,16 @@ async def close(self): for q in self.rxq: q.disable() + # configure scheduler + for q in self.txq: + await self.sched_block.schedulers[0].disable_queue(q.index) + + # configure scheduler flow control + await self.sched_block.schedulers[0].disable_ch(0) + + # enable scheduler + await self.sched_block.deactivate() + # wait for all writes to complete await self.hw_regs.read_dword(0) diff --git a/fpga/common/tb/tx_scheduler_rr/Makefile b/fpga/common/tb/tx_scheduler_rr/Makefile index 694c79856..f2c5e7dcb 100644 --- a/fpga/common/tb/tx_scheduler_rr/Makefile +++ b/fpga/common/tb/tx_scheduler_rr/Makefile @@ -25,6 +25,8 @@ export PARAM_QUEUE_INDEX_WIDTH := 6 export PARAM_PIPELINE := 2 export PARAM_SCHED_CTRL_ENABLE := 1 export PARAM_REQ_DEST_DEFAULT := 0 +export PARAM_MAX_TX_SIZE := 9216 +export PARAM_FC_SCALE := 64 export PARAM_AXIL_BASE_ADDR := 0 export PARAM_AXIL_DATA_WIDTH := 32 diff --git a/fpga/common/tb/tx_scheduler_rr/test_tx_scheduler_rr.py b/fpga/common/tb/tx_scheduler_rr/test_tx_scheduler_rr.py index 982374837..22d43fe35 100644 --- a/fpga/common/tb/tx_scheduler_rr/test_tx_scheduler_rr.py +++ b/fpga/common/tb/tx_scheduler_rr/test_tx_scheduler_rr.py @@ -102,6 +102,37 @@ async def reset(self): await RisingEdge(self.dut.clk) +MQNIC_RB_SCHED_RR_REG_OFFSET = 0x0C +MQNIC_RB_SCHED_RR_REG_QUEUE_COUNT = 0x10 +MQNIC_RB_SCHED_RR_REG_QUEUE_STRIDE = 0x14 +MQNIC_RB_SCHED_RR_REG_CTRL = 0x18 +MQNIC_RB_SCHED_RR_REG_CFG = 0x1C +MQNIC_RB_SCHED_RR_REG_CH_STRIDE = 0x10 +MQNIC_RB_SCHED_RR_REG_CH0_CTRL = 0x20 +MQNIC_RB_SCHED_RR_REG_CH0_FC1 = 0x24 +MQNIC_RB_SCHED_RR_REG_CH0_FC1_DEST = 0x24 +MQNIC_RB_SCHED_RR_REG_CH0_FC1_PB = 0x26 +MQNIC_RB_SCHED_RR_REG_CH0_FC2 = 0x28 +MQNIC_RB_SCHED_RR_REG_CH0_FC2_DB = 0x28 +MQNIC_RB_SCHED_RR_REG_CH0_FC2_PL = 0x2A +MQNIC_RB_SCHED_RR_REG_CH0_FC3 = 0x2C +MQNIC_RB_SCHED_RR_REG_CH0_FC3_DL = 0x2C + +MQNIC_SCHED_RR_PORT_TC = (0x7 << 0) +MQNIC_SCHED_RR_PORT_EN = (1 << 3) +MQNIC_SCHED_RR_PORT_PAUSE = (1 << 4) +MQNIC_SCHED_RR_PORT_SCHEDULED = (1 << 5) +MQNIC_SCHED_RR_QUEUE_EN = (1 << 6) +MQNIC_SCHED_RR_QUEUE_PAUSE = (1 << 7) +MQNIC_SCHED_RR_QUEUE_ACTIVE = (1 << 14) + +MQNIC_SCHED_RR_CMD_SET_PORT_TC = 0x80010000 +MQNIC_SCHED_RR_CMD_SET_PORT_ENABLE = 0x80020000 +MQNIC_SCHED_RR_CMD_SET_PORT_PAUSE = 0x80030000 +MQNIC_SCHED_RR_CMD_SET_QUEUE_ENABLE = 0x40000100 +MQNIC_SCHED_RR_CMD_SET_QUEUE_PAUSE = 0x40000200 + + async def run_test_config(dut): tb = TB(dut) @@ -109,15 +140,24 @@ async def run_test_config(dut): await tb.reset() # enable - assert await tb.rd_ctrl_reg(0x18) == 0 - await tb.wr_ctrl_reg(0x18, 1) - assert await tb.rd_ctrl_reg(0x18) == 1 + assert await tb.rd_ctrl_reg(MQNIC_RB_SCHED_RR_REG_CTRL) == 0 + await tb.wr_ctrl_reg(MQNIC_RB_SCHED_RR_REG_CTRL, 1) + assert await tb.rd_ctrl_reg(MQNIC_RB_SCHED_RR_REG_CTRL) == 1 + + val = await tb.rd_ctrl_reg(MQNIC_RB_SCHED_RR_REG_CH0_CTRL) + tb.log.info("CTRL: %08x", val) + val = await tb.rd_ctrl_reg(MQNIC_RB_SCHED_RR_REG_CH0_FC1) + tb.log.info("FC1: %08x", val) + val = await tb.rd_ctrl_reg(MQNIC_RB_SCHED_RR_REG_CH0_FC2) + tb.log.info("FC2: %08x", val) + val = await tb.rd_ctrl_reg(MQNIC_RB_SCHED_RR_REG_CH0_FC3) + tb.log.info("FC3: %08x", val) assert await tb.axil_master.read_dword(0*4) == 0 - - await tb.axil_master.write_dword(0*4, 3) - - assert await tb.axil_master.read_dword(0*4) == 3 + await tb.axil_master.write_dword(0*4, MQNIC_SCHED_RR_CMD_SET_PORT_TC | (0 << 8) | 0) + await tb.axil_master.write_dword(0*4, MQNIC_SCHED_RR_CMD_SET_PORT_ENABLE | (0 << 8) | 1) + await tb.axil_master.write_dword(0*4, MQNIC_SCHED_RR_CMD_SET_QUEUE_ENABLE | 1) + assert await tb.axil_master.read_dword(0*4) == 0x00000048 await RisingEdge(dut.clk) await RisingEdge(dut.clk) @@ -133,9 +173,14 @@ async def run_test_single(dut, idle_inserter=None, backpressure_inserter=None): tb.set_backpressure_generator(backpressure_inserter) dut.enable.value = 1 - await tb.wr_ctrl_reg(0x18, 1) + await tb.wr_ctrl_reg(MQNIC_RB_SCHED_RR_REG_CH0_FC2, (25 << 16) | ((1536+63)//64)) + await tb.wr_ctrl_reg(MQNIC_RB_SCHED_RR_REG_CH0_FC3, ((1536+63)//64)*32) + await tb.wr_ctrl_reg(MQNIC_RB_SCHED_RR_REG_CH0_CTRL, 1) + await tb.wr_ctrl_reg(MQNIC_RB_SCHED_RR_REG_CTRL, 1) - await tb.axil_master.write_dword(0*4, 3) + await tb.axil_master.write_dword(0*4, MQNIC_SCHED_RR_CMD_SET_PORT_TC | (0 << 8) | 0) + await tb.axil_master.write_dword(0*4, MQNIC_SCHED_RR_CMD_SET_PORT_ENABLE | (0 << 8) | 1) + await tb.axil_master.write_dword(0*4, MQNIC_SCHED_RR_CMD_SET_QUEUE_ENABLE | 1) await tb.doorbell_source.send(DoorbellTransaction(queue=0)) @@ -176,6 +221,9 @@ async def run_test_single(dut, idle_inserter=None, backpressure_inserter=None): tb.log.info("TX status: %s", status) await tb.tx_status_dequeue_source.send(status) + for k in range(200): + await RisingEdge(dut.clk) + await RisingEdge(dut.clk) await RisingEdge(dut.clk) @@ -190,10 +238,15 @@ async def run_test_multiple(dut, idle_inserter=None, backpressure_inserter=None) tb.set_backpressure_generator(backpressure_inserter) dut.enable.value = 1 - await tb.wr_ctrl_reg(0x18, 1) + await tb.wr_ctrl_reg(MQNIC_RB_SCHED_RR_REG_CH0_FC2, (25 << 16) | ((1536+63)//64)) + await tb.wr_ctrl_reg(MQNIC_RB_SCHED_RR_REG_CH0_FC3, ((1536+63)//64)*32) + await tb.wr_ctrl_reg(MQNIC_RB_SCHED_RR_REG_CH0_CTRL, 1) + await tb.wr_ctrl_reg(MQNIC_RB_SCHED_RR_REG_CTRL, 1) for k in range(10): - await tb.axil_master.write_dword(k*4, 3) + await tb.axil_master.write_dword(k*4, MQNIC_SCHED_RR_CMD_SET_PORT_TC | (0 << 8) | 0) + await tb.axil_master.write_dword(k*4, MQNIC_SCHED_RR_CMD_SET_PORT_ENABLE | (0 << 8) | 1) + await tb.axil_master.write_dword(k*4, MQNIC_SCHED_RR_CMD_SET_QUEUE_ENABLE | 1) for k in range(10): await tb.doorbell_source.send(DoorbellTransaction(queue=k)) @@ -246,9 +299,14 @@ async def run_test_doorbell(dut, idle_inserter=None, backpressure_inserter=None) tb.set_backpressure_generator(backpressure_inserter) dut.enable.value = 1 - await tb.wr_ctrl_reg(0x18, 1) + await tb.wr_ctrl_reg(MQNIC_RB_SCHED_RR_REG_CH0_FC2, (25 << 16) | ((1536+63)//64)) + await tb.wr_ctrl_reg(MQNIC_RB_SCHED_RR_REG_CH0_FC3, ((1536+63)//64)*32) + await tb.wr_ctrl_reg(MQNIC_RB_SCHED_RR_REG_CH0_CTRL, 1) + await tb.wr_ctrl_reg(MQNIC_RB_SCHED_RR_REG_CTRL, 1) - await tb.axil_master.write_dword(0*4, 3) + await tb.axil_master.write_dword(0*4, MQNIC_SCHED_RR_CMD_SET_PORT_TC | (0 << 8) | 0) + await tb.axil_master.write_dword(0*4, MQNIC_SCHED_RR_CMD_SET_PORT_ENABLE | (0 << 8) | 1) + await tb.axil_master.write_dword(0*4, MQNIC_SCHED_RR_CMD_SET_QUEUE_ENABLE | 1) await tb.doorbell_source.send(DoorbellTransaction(queue=0)) @@ -393,6 +451,8 @@ def test_tx_scheduler_rr(request): parameters['PIPELINE'] = 2 parameters['SCHED_CTRL_ENABLE'] = 1 parameters['REQ_DEST_DEFAULT'] = 0 + parameters['MAX_TX_SIZE'] = 9216 + parameters['FC_SCALE'] = 64 parameters['AXIL_BASE_ADDR'] = 0 parameters['AXIL_DATA_WIDTH'] = 32 diff --git a/lib/mqnic/mqnic.h b/lib/mqnic/mqnic.h index eb029bdb4..67616bac4 100644 --- a/lib/mqnic/mqnic.h +++ b/lib/mqnic/mqnic.h @@ -14,6 +14,10 @@ #define mqnic_reg_read32(base, reg) (((volatile uint32_t *)(base))[(reg)/4]) #define mqnic_reg_write32(base, reg, val) (((volatile uint32_t *)(base))[(reg)/4]) = val +#define mqnic_reg_read16(base, reg) (((volatile uint16_t *)(base))[(reg)/2]) +#define mqnic_reg_write16(base, reg, val) (((volatile uint16_t *)(base))[(reg)/2]) = val +#define mqnic_reg_read8(base, reg) (((volatile uint8_t *)(base))[reg]) +#define mqnic_reg_write8(base, reg, val) (((volatile uint8_t *)(base))[reg]) = val struct mqnic; @@ -34,8 +38,13 @@ struct mqnic_sched { uint32_t type; uint32_t offset; - uint32_t channel_count; - uint32_t channel_stride; + uint32_t queue_count; + uint32_t queue_stride; + + int tc_count; + int port_count; + int channel_count; + int fc_scale; size_t regs_size; volatile uint8_t *regs; diff --git a/lib/mqnic/mqnic_scheduler.c b/lib/mqnic/mqnic_scheduler.c index c938781d9..ad52fe749 100644 --- a/lib/mqnic/mqnic_scheduler.c +++ b/lib/mqnic/mqnic_scheduler.c @@ -32,8 +32,14 @@ struct mqnic_sched *mqnic_sched_open(struct mqnic_sched_block *block, int index, sched->type = rb->type; sched->offset = mqnic_reg_read32(rb->regs, MQNIC_RB_SCHED_RR_REG_OFFSET); - sched->channel_count = mqnic_reg_read32(rb->regs, MQNIC_RB_SCHED_RR_REG_CH_COUNT); - sched->channel_stride = mqnic_reg_read32(rb->regs, MQNIC_RB_SCHED_RR_REG_CH_STRIDE); + sched->queue_count = mqnic_reg_read32(rb->regs, MQNIC_RB_SCHED_RR_REG_QUEUE_COUNT); + sched->queue_stride = mqnic_reg_read32(rb->regs, MQNIC_RB_SCHED_RR_REG_QUEUE_STRIDE); + + uint32_t val = mqnic_reg_read32(rb->regs, MQNIC_RB_SCHED_RR_REG_CFG); + sched->tc_count = val & 0xff; + sched->port_count = (val >> 8) & 0xff; + sched->channel_count = sched->tc_count * sched->port_count; + sched->fc_scale = 1 << ((val >> 16) & 0xff); return sched; diff --git a/modules/mqnic/mqnic.h b/modules/mqnic/mqnic.h index 5cf6f7fdc..81acf40c2 100644 --- a/modules/mqnic/mqnic.h +++ b/modules/mqnic/mqnic.h @@ -328,8 +328,13 @@ struct mqnic_sched { u32 type; u32 offset; - u32 channel_count; - u32 channel_stride; + u32 queue_count; + u32 queue_stride; + + int tc_count; + int port_count; + int channel_count; + int fc_scale; u8 __iomem *hw_addr; }; @@ -534,6 +539,22 @@ struct mqnic_sched *mqnic_create_scheduler(struct mqnic_sched_block *block, void mqnic_destroy_scheduler(struct mqnic_sched *sched); int mqnic_scheduler_enable(struct mqnic_sched *sched); void mqnic_scheduler_disable(struct mqnic_sched *sched); +int mqnic_scheduler_channel_enable(struct mqnic_sched *sched, int ch); +void mqnic_scheduler_channel_disable(struct mqnic_sched *sched, int ch); +void mqnic_scheduler_channel_set_dest(struct mqnic_sched *sched, int ch, int val); +int mqnic_scheduler_channel_get_dest(struct mqnic_sched *sched, int ch); +void mqnic_scheduler_channel_set_pkt_budget(struct mqnic_sched *sched, int ch, int val); +int mqnic_scheduler_channel_get_pkt_budget(struct mqnic_sched *sched, int ch); +void mqnic_scheduler_channel_set_data_budget(struct mqnic_sched *sched, int ch, int val); +int mqnic_scheduler_channel_get_data_budget(struct mqnic_sched *sched, int ch); +void mqnic_scheduler_channel_set_pkt_limit(struct mqnic_sched *sched, int ch, int val); +int mqnic_scheduler_channel_get_pkt_limit(struct mqnic_sched *sched, int ch); +void mqnic_scheduler_channel_set_data_limit(struct mqnic_sched *sched, int ch, int val); +int mqnic_scheduler_channel_get_data_limit(struct mqnic_sched *sched, int ch); +int mqnic_scheduler_queue_enable(struct mqnic_sched *sched, int queue); +void mqnic_scheduler_queue_disable(struct mqnic_sched *sched, int queue); +void mqnic_scheduler_queue_set_pause(struct mqnic_sched *sched, int queue, int val); +int mqnic_scheduler_queue_get_pause(struct mqnic_sched *sched, int queue); // mqnic_ptp.c void mqnic_register_phc(struct mqnic_dev *mdev); diff --git a/modules/mqnic/mqnic_hw.h b/modules/mqnic/mqnic_hw.h index 193167222..797115705 100644 --- a/modules/mqnic/mqnic_hw.h +++ b/modules/mqnic/mqnic_hw.h @@ -288,13 +288,37 @@ #define MQNIC_RB_SCHED_BLOCK_VER 0x00000300 #define MQNIC_RB_SCHED_BLOCK_REG_OFFSET 0x0C -#define MQNIC_RB_SCHED_RR_TYPE 0x0000C040 -#define MQNIC_RB_SCHED_RR_VER 0x00000100 -#define MQNIC_RB_SCHED_RR_REG_OFFSET 0x0C -#define MQNIC_RB_SCHED_RR_REG_CH_COUNT 0x10 -#define MQNIC_RB_SCHED_RR_REG_CH_STRIDE 0x14 -#define MQNIC_RB_SCHED_RR_REG_CTRL 0x18 -#define MQNIC_RB_SCHED_RR_REG_DEST 0x1C +#define MQNIC_RB_SCHED_RR_TYPE 0x0000C040 +#define MQNIC_RB_SCHED_RR_VER 0x00000200 +#define MQNIC_RB_SCHED_RR_REG_OFFSET 0x0C +#define MQNIC_RB_SCHED_RR_REG_QUEUE_COUNT 0x10 +#define MQNIC_RB_SCHED_RR_REG_QUEUE_STRIDE 0x14 +#define MQNIC_RB_SCHED_RR_REG_CTRL 0x18 +#define MQNIC_RB_SCHED_RR_REG_CFG 0x1C +#define MQNIC_RB_SCHED_RR_REG_CH_STRIDE 0x10 +#define MQNIC_RB_SCHED_RR_REG_CH0_CTRL 0x20 +#define MQNIC_RB_SCHED_RR_REG_CH0_FC1 0x24 +#define MQNIC_RB_SCHED_RR_REG_CH0_FC1_DEST 0x24 +#define MQNIC_RB_SCHED_RR_REG_CH0_FC1_PB 0x26 +#define MQNIC_RB_SCHED_RR_REG_CH0_FC2 0x28 +#define MQNIC_RB_SCHED_RR_REG_CH0_FC2_DB 0x28 +#define MQNIC_RB_SCHED_RR_REG_CH0_FC2_PL 0x2A +#define MQNIC_RB_SCHED_RR_REG_CH0_FC3 0x2C +#define MQNIC_RB_SCHED_RR_REG_CH0_FC3_DL 0x2C + +#define MQNIC_SCHED_RR_PORT_TC (0x7 << 0) +#define MQNIC_SCHED_RR_PORT_EN (1 << 3) +#define MQNIC_SCHED_RR_PORT_PAUSE (1 << 4) +#define MQNIC_SCHED_RR_PORT_SCHEDULED (1 << 5) +#define MQNIC_SCHED_RR_QUEUE_EN (1 << 6) +#define MQNIC_SCHED_RR_QUEUE_PAUSE (1 << 7) +#define MQNIC_SCHED_RR_QUEUE_ACTIVE (1 << 14) + +#define MQNIC_SCHED_RR_CMD_SET_PORT_TC 0x80010000 +#define MQNIC_SCHED_RR_CMD_SET_PORT_ENABLE 0x80020000 +#define MQNIC_SCHED_RR_CMD_SET_PORT_PAUSE 0x80030000 +#define MQNIC_SCHED_RR_CMD_SET_QUEUE_ENABLE 0x40000100 +#define MQNIC_SCHED_RR_CMD_SET_QUEUE_PAUSE 0x40000200 #define MQNIC_RB_SCHED_CTRL_TDMA_TYPE 0x0000C050 #define MQNIC_RB_SCHED_CTRL_TDMA_VER 0x00000100 diff --git a/modules/mqnic/mqnic_netdev.c b/modules/mqnic/mqnic_netdev.c index 151984e38..043a0b9b8 100644 --- a/modules/mqnic/mqnic_netdev.c +++ b/modules/mqnic/mqnic_netdev.c @@ -161,6 +161,23 @@ int mqnic_start_port(struct net_device *ndev) mqnic_port_set_tx_ctrl(priv->port, MQNIC_PORT_TX_CTRL_EN); + // configure scheduler + down_read(&priv->txq_table_sem); + radix_tree_for_each_slot(slot, &priv->txq_table, &iter, 0) { + struct mqnic_ring *q = (struct mqnic_ring *)*slot; + + mqnic_scheduler_queue_enable(priv->sched_block->sched[0], q->index); + } + up_read(&priv->txq_table_sem); + + // configure scheduler flow control + mqnic_scheduler_channel_set_pkt_budget(priv->sched_block->sched[0], 0, 1); + mqnic_scheduler_channel_set_data_budget(priv->sched_block->sched[0], 0, ndev->mtu + ETH_HLEN); + mqnic_scheduler_channel_set_pkt_limit(priv->sched_block->sched[0], 0, 0xFFFF); + mqnic_scheduler_channel_set_data_limit(priv->sched_block->sched[0], 0, iface->tx_fifo_depth); + + mqnic_scheduler_channel_enable(priv->sched_block->sched[0], 0); + // enable scheduler mqnic_activate_sched_block(priv->sched_block); @@ -212,6 +229,18 @@ void mqnic_stop_port(struct net_device *ndev) mqnic_update_stats(ndev); spin_unlock_bh(&priv->stats_lock); + // configure scheduler + down_read(&priv->txq_table_sem); + radix_tree_for_each_slot(slot, &priv->txq_table, &iter, 0) { + struct mqnic_ring *q = (struct mqnic_ring *)*slot; + + mqnic_scheduler_queue_disable(priv->sched_block->sched[0], q->index); + } + up_read(&priv->txq_table_sem); + + // configure scheduler flow control + mqnic_scheduler_channel_disable(priv->sched_block->sched[0], 0); + // disable scheduler mqnic_deactivate_sched_block(priv->sched_block); diff --git a/modules/mqnic/mqnic_scheduler.c b/modules/mqnic/mqnic_scheduler.c index f318289ca..cc96bc525 100644 --- a/modules/mqnic/mqnic_scheduler.c +++ b/modules/mqnic/mqnic_scheduler.c @@ -10,6 +10,7 @@ struct mqnic_sched *mqnic_create_scheduler(struct mqnic_sched_block *block, { struct device *dev = block->dev; struct mqnic_sched *sched; + u32 val; sched = kzalloc(sizeof(*sched), GFP_KERNEL); if (!sched) @@ -25,15 +26,25 @@ struct mqnic_sched *mqnic_create_scheduler(struct mqnic_sched_block *block, sched->type = rb->type; sched->offset = ioread32(rb->regs + MQNIC_RB_SCHED_RR_REG_OFFSET); - sched->channel_count = ioread32(rb->regs + MQNIC_RB_SCHED_RR_REG_CH_COUNT); - sched->channel_stride = ioread32(rb->regs + MQNIC_RB_SCHED_RR_REG_CH_STRIDE); + sched->queue_count = ioread32(rb->regs + MQNIC_RB_SCHED_RR_REG_QUEUE_COUNT); + sched->queue_stride = ioread32(rb->regs + MQNIC_RB_SCHED_RR_REG_QUEUE_STRIDE); sched->hw_addr = block->interface->hw_addr + sched->offset; + val = ioread32(rb->regs + MQNIC_RB_SCHED_RR_REG_CFG); + sched->tc_count = val & 0xff; + sched->port_count = (val >> 8) & 0xff; + sched->channel_count = sched->tc_count * sched->port_count; + sched->fc_scale = 1 << ((val >> 16) & 0xff); + dev_info(dev, "Scheduler type: 0x%08x", sched->type); dev_info(dev, "Scheduler offset: 0x%08x", sched->offset); + dev_info(dev, "Scheduler queue count: %d", sched->queue_count); + dev_info(dev, "Scheduler queue stride: %d", sched->queue_stride); + dev_info(dev, "Scheduler TC count: %d", sched->tc_count); + dev_info(dev, "Scheduler port count: %d", sched->port_count); dev_info(dev, "Scheduler channel count: %d", sched->channel_count); - dev_info(dev, "Scheduler channel stride: 0x%08x", sched->channel_stride); + dev_info(dev, "Scheduler FC scale: %d", sched->fc_scale); mqnic_scheduler_disable(sched); @@ -49,22 +60,116 @@ void mqnic_destroy_scheduler(struct mqnic_sched *sched) int mqnic_scheduler_enable(struct mqnic_sched *sched) { - int k; - - // enable scheduler iowrite32(1, sched->rb->regs + MQNIC_RB_SCHED_RR_REG_CTRL); - // enable queues - for (k = 0; k < sched->channel_count; k++) - iowrite32(3, sched->hw_addr + k * sched->channel_stride); - return 0; } EXPORT_SYMBOL(mqnic_scheduler_enable); void mqnic_scheduler_disable(struct mqnic_sched *sched) { - // disable scheduler iowrite32(0, sched->rb->regs + MQNIC_RB_SCHED_RR_REG_CTRL); } EXPORT_SYMBOL(mqnic_scheduler_disable); + +int mqnic_scheduler_channel_enable(struct mqnic_sched *sched, int ch) +{ + iowrite32(1, sched->rb->regs + MQNIC_RB_SCHED_RR_REG_CH0_CTRL + ch*MQNIC_RB_SCHED_RR_REG_CH_STRIDE); + + return 0; +} +EXPORT_SYMBOL(mqnic_scheduler_channel_enable); + +void mqnic_scheduler_channel_disable(struct mqnic_sched *sched, int ch) +{ + iowrite32(0, sched->rb->regs + MQNIC_RB_SCHED_RR_REG_CH0_CTRL + ch*MQNIC_RB_SCHED_RR_REG_CH_STRIDE); +} +EXPORT_SYMBOL(mqnic_scheduler_channel_disable); + +void mqnic_scheduler_channel_set_dest(struct mqnic_sched *sched, int ch, int val) +{ + iowrite16(val, sched->rb->regs + MQNIC_RB_SCHED_RR_REG_CH0_FC1_DEST + ch*MQNIC_RB_SCHED_RR_REG_CH_STRIDE); +} +EXPORT_SYMBOL(mqnic_scheduler_channel_set_dest); + +int mqnic_scheduler_channel_get_dest(struct mqnic_sched *sched, int ch) +{ + return ioread16(sched->rb->regs + MQNIC_RB_SCHED_RR_REG_CH0_FC1_DEST + ch*MQNIC_RB_SCHED_RR_REG_CH_STRIDE); +} +EXPORT_SYMBOL(mqnic_scheduler_channel_get_dest); + +void mqnic_scheduler_channel_set_pkt_budget(struct mqnic_sched *sched, int ch, int val) +{ + iowrite16(val, sched->rb->regs + MQNIC_RB_SCHED_RR_REG_CH0_FC1_PB + ch*MQNIC_RB_SCHED_RR_REG_CH_STRIDE); +} +EXPORT_SYMBOL(mqnic_scheduler_channel_set_pkt_budget); + +int mqnic_scheduler_channel_get_pkt_budget(struct mqnic_sched *sched, int ch) +{ + return ioread16(sched->rb->regs + MQNIC_RB_SCHED_RR_REG_CH0_FC1_PB + ch*MQNIC_RB_SCHED_RR_REG_CH_STRIDE); +} +EXPORT_SYMBOL(mqnic_scheduler_channel_get_pkt_budget); + +void mqnic_scheduler_channel_set_data_budget(struct mqnic_sched *sched, int ch, int val) +{ + val = (val + sched->fc_scale-1) / sched->fc_scale; + iowrite16(val, sched->rb->regs + MQNIC_RB_SCHED_RR_REG_CH0_FC2_DB + ch*MQNIC_RB_SCHED_RR_REG_CH_STRIDE); +} +EXPORT_SYMBOL(mqnic_scheduler_channel_set_data_budget); + +int mqnic_scheduler_channel_get_data_budget(struct mqnic_sched *sched, int ch) +{ + return (int)ioread16(sched->rb->regs + MQNIC_RB_SCHED_RR_REG_CH0_FC2_DB + ch*MQNIC_RB_SCHED_RR_REG_CH_STRIDE) * sched->fc_scale; +} +EXPORT_SYMBOL(mqnic_scheduler_channel_get_data_budget); + +void mqnic_scheduler_channel_set_pkt_limit(struct mqnic_sched *sched, int ch, int val) +{ + iowrite16(val, sched->rb->regs + MQNIC_RB_SCHED_RR_REG_CH0_FC2_PL + ch*MQNIC_RB_SCHED_RR_REG_CH_STRIDE); +} +EXPORT_SYMBOL(mqnic_scheduler_channel_set_pkt_limit); + +int mqnic_scheduler_channel_get_pkt_limit(struct mqnic_sched *sched, int ch) +{ + return ioread16(sched->rb->regs + MQNIC_RB_SCHED_RR_REG_CH0_FC2_PL + ch*MQNIC_RB_SCHED_RR_REG_CH_STRIDE); +} +EXPORT_SYMBOL(mqnic_scheduler_channel_get_pkt_limit); + +void mqnic_scheduler_channel_set_data_limit(struct mqnic_sched *sched, int ch, int val) +{ + val = (val + sched->fc_scale-1) / sched->fc_scale; + iowrite32(val, sched->rb->regs + MQNIC_RB_SCHED_RR_REG_CH0_FC3_DL + ch*MQNIC_RB_SCHED_RR_REG_CH_STRIDE); +} +EXPORT_SYMBOL(mqnic_scheduler_channel_set_data_limit); + +int mqnic_scheduler_channel_get_data_limit(struct mqnic_sched *sched, int ch) +{ + return (int)ioread32(sched->rb->regs + MQNIC_RB_SCHED_RR_REG_CH0_FC3_DL + ch*MQNIC_RB_SCHED_RR_REG_CH_STRIDE) * sched->fc_scale; +} +EXPORT_SYMBOL(mqnic_scheduler_channel_get_data_limit); + +int mqnic_scheduler_queue_enable(struct mqnic_sched *sched, int queue) +{ + iowrite32(MQNIC_SCHED_RR_CMD_SET_QUEUE_ENABLE | 1, sched->hw_addr + sched->queue_stride*queue); + + return 0; +} +EXPORT_SYMBOL(mqnic_scheduler_queue_enable); + +void mqnic_scheduler_queue_disable(struct mqnic_sched *sched, int queue) +{ + iowrite32(MQNIC_SCHED_RR_CMD_SET_QUEUE_ENABLE | 0, sched->hw_addr + sched->queue_stride*queue); +} +EXPORT_SYMBOL(mqnic_scheduler_queue_disable); + +void mqnic_scheduler_queue_set_pause(struct mqnic_sched *sched, int queue, int val) +{ + iowrite32(MQNIC_SCHED_RR_CMD_SET_QUEUE_PAUSE | (val ? 1 : 0), sched->hw_addr + sched->queue_stride*queue); +} +EXPORT_SYMBOL(mqnic_scheduler_queue_set_pause); + +int mqnic_scheduler_queue_get_pause(struct mqnic_sched *sched, int queue) +{ + return !!(ioread32(sched->hw_addr + sched->queue_stride*queue) & (1 << 7)); +} +EXPORT_SYMBOL(mqnic_scheduler_queue_get_pause); diff --git a/utils/mqnic-dump.c b/utils/mqnic-dump.c index abfb1b1f1..104b95972 100644 --- a/utils/mqnic-dump.c +++ b/utils/mqnic-dump.c @@ -323,20 +323,41 @@ int main(int argc, char *argv[]) { if (rb->type == MQNIC_RB_SCHED_RR_TYPE && rb->version == MQNIC_RB_SCHED_RR_VER) { + uint32_t val; + int ch_count; + int fc_scale; + printf("Round-robin scheduler\n"); - printf("Sched channel count: %d\n", mqnic_reg_read32(rb->regs, MQNIC_RB_SCHED_RR_REG_CH_COUNT)); - printf("Sched channel stride: %d\n", mqnic_reg_read32(rb->regs, MQNIC_RB_SCHED_RR_REG_CH_STRIDE)); - printf("Sched control: %d\n", mqnic_reg_read32(rb->regs, MQNIC_RB_SCHED_RR_REG_CTRL)); - printf("Sched dest: %d\n", mqnic_reg_read32(rb->regs, MQNIC_RB_SCHED_RR_REG_DEST)); + printf("Sched queue count: %d\n", mqnic_reg_read32(rb->regs, MQNIC_RB_SCHED_RR_REG_QUEUE_COUNT)); + printf("Sched queue stride: %d\n", mqnic_reg_read32(rb->regs, MQNIC_RB_SCHED_RR_REG_QUEUE_STRIDE)); + printf("Sched control: 0x%08x\n", mqnic_reg_read32(rb->regs, MQNIC_RB_SCHED_RR_REG_CTRL)); + + val = mqnic_reg_read32(rb->regs, MQNIC_RB_SCHED_RR_REG_CFG); + printf("Sched TC count: %d\n", val & 0xff); + printf("Sched port count: %d\n", (val >> 8) & 0xff); + ch_count = (val & 0xff) * ((val >> 8) & 0xff); + printf("Sched channel count: %d\n", ch_count); + fc_scale = 1 << ((val >> 16) & 0xff); + printf("Sched FC scale: %d\n", fc_scale); + + for (int k = 0; k < ch_count; k++) + { + printf("Sched CH%d control: 0x%08x\n", k, mqnic_reg_read32(rb->regs, MQNIC_RB_SCHED_RR_REG_CH_STRIDE*k + MQNIC_RB_SCHED_RR_REG_CH0_CTRL)); + printf("Sched CH%d dest: 0x%04x\n", k, mqnic_reg_read16(rb->regs, MQNIC_RB_SCHED_RR_REG_CH_STRIDE*k + MQNIC_RB_SCHED_RR_REG_CH0_FC1_DEST)); + printf("Sched CH%d pkt budget: %d\n", k, mqnic_reg_read16(rb->regs, MQNIC_RB_SCHED_RR_REG_CH_STRIDE*k + MQNIC_RB_SCHED_RR_REG_CH0_FC1_PB)); + printf("Sched CH%d data budget: %d\n", k, mqnic_reg_read16(rb->regs, MQNIC_RB_SCHED_RR_REG_CH_STRIDE*k + MQNIC_RB_SCHED_RR_REG_CH0_FC2_DB) * fc_scale); + printf("Sched CH%d pkt limit: %d\n", k, mqnic_reg_read16(rb->regs, MQNIC_RB_SCHED_RR_REG_CH_STRIDE*k + MQNIC_RB_SCHED_RR_REG_CH0_FC2_PL)); + printf("Sched CH%d data limit: %d\n", k, mqnic_reg_read32(rb->regs, MQNIC_RB_SCHED_RR_REG_CH_STRIDE*k + MQNIC_RB_SCHED_RR_REG_CH0_FC3_DL) * fc_scale); + } } else if (rb->type == MQNIC_RB_SCHED_CTRL_TDMA_TYPE && rb->version == MQNIC_RB_SCHED_CTRL_TDMA_VER) { printf("TDMA scheduler controller\n"); - printf("Sched channel count: %d\n", mqnic_reg_read32(rb->regs, MQNIC_RB_SCHED_CTRL_TDMA_REG_CH_COUNT)); - printf("Sched channel stride: %d\n", mqnic_reg_read32(rb->regs, MQNIC_RB_SCHED_CTRL_TDMA_REG_CH_STRIDE)); - printf("Sched control: %d\n", mqnic_reg_read32(rb->regs, MQNIC_RB_SCHED_CTRL_TDMA_REG_CTRL)); + printf("Sched queue count: %d\n", mqnic_reg_read32(rb->regs, MQNIC_RB_SCHED_CTRL_TDMA_REG_CH_COUNT)); + printf("Sched queue stride: %d\n", mqnic_reg_read32(rb->regs, MQNIC_RB_SCHED_CTRL_TDMA_REG_CH_STRIDE)); + printf("Sched control: 0x%08x\n", mqnic_reg_read32(rb->regs, MQNIC_RB_SCHED_CTRL_TDMA_REG_CTRL)); printf("Sched timeslot count: %d\n", mqnic_reg_read32(rb->regs, MQNIC_RB_SCHED_CTRL_TDMA_REG_TS_COUNT)); } else if (rb->type == MQNIC_RB_TDMA_SCH_TYPE && rb->version == MQNIC_RB_TDMA_SCH_VER) @@ -366,12 +387,13 @@ int main(int argc, char *argv[]) val = mqnic_reg_read32(base, MQNIC_EQ_CTRL_STATUS_REG); uint32_t irq = val & 0xffff; - uint8_t enable = (val & MQNIC_EQ_ENABLE_MASK) != 0; - uint8_t armed = (val & MQNIC_EQ_ARM_MASK) != 0; - uint8_t active = (val & MQNIC_EQ_ACTIVE_MASK) != 0; - if (enable) flags[0] = 'e'; - if (armed) flags[1] = 'r'; - if (active) flags[2] = 'a'; + int enable = val & MQNIC_EQ_ENABLE_MASK; + if (enable) + flags[0] = 'e'; + if (val & MQNIC_EQ_ARM_MASK) + flags[1] = 'r'; + if (val & MQNIC_EQ_ACTIVE_MASK) + flags[2] = 'a'; uint8_t log_queue_size = (val >> 28) & 0xf; if (!enable && !verbose) @@ -397,12 +419,13 @@ int main(int argc, char *argv[]) val = mqnic_reg_read32(base, MQNIC_CQ_CTRL_STATUS_REG); uint32_t eqn = val & 0xffff; - uint8_t enable = (val & MQNIC_CQ_ENABLE_MASK) != 0; - uint8_t armed = (val & MQNIC_CQ_ARM_MASK) != 0; - uint8_t active = (val & MQNIC_CQ_ACTIVE_MASK) != 0; - if (enable) flags[0] = 'e'; - if (armed) flags[1] = 'r'; - if (active) flags[2] = 'a'; + int enable = val & MQNIC_CQ_ENABLE_MASK; + if (enable) + flags[0] = 'e'; + if (val & MQNIC_CQ_ARM_MASK) + flags[1] = 'r'; + if (val & MQNIC_CQ_ACTIVE_MASK) + flags[2] = 'a'; uint8_t log_queue_size = (val >> 28) & 0xf; if (!enable && !verbose) @@ -427,10 +450,11 @@ int main(int argc, char *argv[]) char flags[8] = "--"; val = mqnic_reg_read32(base, MQNIC_QUEUE_CTRL_STATUS_REG); - uint8_t enable = (val & MQNIC_QUEUE_ENABLE_MASK) != 0; - uint8_t active = (val & MQNIC_QUEUE_ACTIVE_MASK) != 0; - if (enable) flags[0] = 'e'; - if (active) flags[1] = 'a'; + int enable = val & MQNIC_QUEUE_ENABLE_MASK; + if (enable) + flags[0] = 'e'; + if (val & MQNIC_QUEUE_ACTIVE_MASK) + flags[1] = 'a'; if (!enable && !verbose) continue; @@ -458,10 +482,11 @@ int main(int argc, char *argv[]) char flags[8] = "--"; val = mqnic_reg_read32(base, MQNIC_QUEUE_CTRL_STATUS_REG); - uint8_t enable = (val & MQNIC_QUEUE_ENABLE_MASK) != 0; - uint8_t active = (val & MQNIC_QUEUE_ACTIVE_MASK) != 0; - if (enable) flags[0] = 'e'; - if (active) flags[1] = 'a'; + int enable = val & MQNIC_QUEUE_ENABLE_MASK; + if (enable) + flags[0] = 'e'; + if (val & MQNIC_QUEUE_ACTIVE_MASK) + flags[1] = 'a'; if (!enable && !verbose) continue; @@ -480,15 +505,48 @@ int main(int argc, char *argv[]) printf("RXQ %4d 0x%016lx %-5s %d %2d %4d %6d %6d %6d\n", k, base_addr, flags, log_desc_block_size, log_queue_size, cqn, prod_ptr, cons_ptr, occupancy); } - if (verbose) + for (int k = 0; k < dev_sched_block->sched_count; k++) { - for (int k = 0; k < dev_sched_block->sched_count; k++) + struct mqnic_sched *sched = dev_sched_block->sched[k]; + printf("Scheduler block %d scheduler %d\n", sched_block, k); + printf("Sched Queue Flags"); + for (int k = 0; k < sched->port_count; k++) + printf(" Port %2d", k); + printf("\n"); + for (int l = 0; l < sched->queue_count; l++) { - printf("Scheduler block %d scheduler %d\n", sched_block, k); - for (int l = 0; l < mqnic_res_get_count(dev_interface->txq_res); l++) + volatile uint8_t *base = sched->regs + l*sched->queue_stride; + uint32_t val = mqnic_reg_read32(base, 0); + char flags[8] = "---"; + + int enable = val & MQNIC_SCHED_RR_QUEUE_EN; + if (enable) flags[0] = 'e'; + if (val & MQNIC_SCHED_RR_QUEUE_PAUSE) + flags[1] = 'p'; + if (val & MQNIC_SCHED_RR_QUEUE_ACTIVE) + flags[2] = 'a'; + + if (!enable && !verbose) + continue; + + printf("SCH %2d Q %4d %-5s", k, l, flags); + + for (int k = 0; k < sched->port_count; k++) { - printf("Sched %2d queue %4d state: 0x%08x\n", k, l, mqnic_reg_read32(dev_sched_block->sched[k]->regs, l*4)); + char flags[8] = "---"; + + int tc = (val >> (k*8)) & MQNIC_SCHED_RR_PORT_TC; + if ((val >> (k*8)) & MQNIC_SCHED_RR_PORT_EN) + flags[0] = 'e'; + if ((val >> (k*8)) & MQNIC_SCHED_RR_PORT_PAUSE) + flags[1] = 'p'; + if ((val >> (k*8)) & MQNIC_SCHED_RR_PORT_TC) + flags[2] = 's'; + + printf(" %-3s TC%d", flags, tc); } + + printf(" (0x%08x)\n", val); } } @@ -500,7 +558,7 @@ int main(int argc, char *argv[]) uint64_t val = mqnic_stats_read(dev, k); if (val || verbose) - printf("Index %d: %lu\n", k, mqnic_stats_read(dev, k)); + printf("%d: %lu\n", k, val); } }