Skip to content

Commit

Permalink
Tirpc Fixes to enabled NFS over RDMA
Browse files Browse the repository at this point in the history
    1>Connection management.
    2>Scalable per connection buffer management to use/reuse rdma registered
      buffers for NFS request/responses/data buffers.
    3>Granular buffer sizes for headers and data.
    4>Rdma credit management based on outstanding recv buffers.
    5>Async rdma_send/rdma_write callbacks.
    6>Fixes to make rdma_read and rdma_write work.
    7>Fixes to make reply_list and write_list work.
    8>Limit rdma connections.
    9>Connection disconnect handling to cleanup posted buffers.
   10>cbc ref management.
   11>rdma xprt ref management.

We have supporting patch in Ganesha to use rdma registered buffers for read and
readdir responses.
With this patch and supporting Ganesha patch, we should be able to use
NFSv3 and NFSv4.0 over RDMA.
Delegation has to be disabled for NFSv4.0, since callback channel is not
yet supported over RDMA.

We tested NFSoRDMA with both hardware and SoftROCE
Hardware:
Ethernet controller: Mellanox Technologies ConnectX Family mlx5Gen Virtual Function
Rocky Linux release 8.9 (Green Obsidian)
Linux kernel version 5.10

SoftROCE:
Rocky Linux release 8.9 (Green Obsidian)
Linux kernel version 5.4.275-1.el8.elrepo.x86_64
Tested with fsal=MEM

Signed-off-by: Gaurav Gangalwar <[email protected]>
  • Loading branch information
Gaurav-Gangalwar committed May 9, 2024
1 parent 5bfd6cb commit ad8c35d
Show file tree
Hide file tree
Showing 14 changed files with 2,206 additions and 863 deletions.
21 changes: 21 additions & 0 deletions ntirpc/rpc/svc.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,9 @@
#ifndef _TIRPC_SVC_H
#define _TIRPC_SVC_H

#ifdef USE_RPC_RDMA
#include <assert.h>
#endif
#include <sys/cdefs.h>
#include <rpc/rpc_msg.h>
#include <rpc/types.h>
Expand Down Expand Up @@ -142,6 +145,13 @@ typedef struct svc_init_params {
u_int gss_max_idle_gen;
u_int gss_max_gc;
uint32_t channels;

#if defined(_USE_NFS_RDMA) || defined(USE_RPC_RDMA)
uint16_t nfs_rdma_port; /* Shared with Ganesha */
uint32_t max_rdma_connections;
bool enable_rdma_dump;
#endif

int32_t idle_timeout;
uint32_t thr_stack_size;
} svc_init_params;
Expand Down Expand Up @@ -271,6 +281,11 @@ struct svc_xprt {
void *xp_u1; /* client user data */
void *xp_u2; /* client user data */

#if defined(_USE_NFS_RDMA) || defined(USE_RPC_RDMA)
bool xp_rdma; /* True if this xprt is RDMA enabled.
* Shared with Ganesha */
#endif

struct rpc_address xp_local; /* local address, length, port */
struct rpc_address xp_remote; /* remote address, length, port */
struct rpc_address xp_proxy; /* proxy address, length, port */
Expand Down Expand Up @@ -339,6 +354,12 @@ struct svc_req {
void *rq_u2; /* user data */
uint64_t rq_cksum;

#if defined(_USE_NFS_RDMA) || defined(USE_RPC_RDMA)
/* Data buffer used to server read/readdir from fs */
int data_chunk_length; /* Shared with Ganesha */
uint8_t *data_chunk; /* Shared with Ganesha */
#endif

/* Moved in N TI-RPC */
struct SVCAUTH *rq_auth; /* auth handle */
void *rq_ap1; /* auth private */
Expand Down
38 changes: 37 additions & 1 deletion ntirpc/rpc/xdr_ioq.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,10 +34,24 @@
#include <rpc/work_pool.h>
#include <rpc/xdr.h>

#ifdef USE_RPC_RDMA
typedef enum xdr_ioq_uv_type {
UV_DATA = 1, /* Data buffers of bug size */
UV_HDR /* Header buffers of smaller size */
} xdr_ioq_uv_type_t;
#endif

struct xdr_ioq_uv
{
struct poolq_entry uvq;

#ifdef USE_RPC_RDMA
bool rdma_uv; /* Flag to identify uv used for RDMA */
struct xdr_vio rdma_v; /* Used to reset v after UIO_REFER */
struct xdr_uio rdma_u; /* Used to reset u after UIO_REFER */
xdr_ioq_uv_type_t uv_type;
#endif

/* spliced buffers, if any */
struct xdr_uio u;

Expand Down Expand Up @@ -84,14 +98,18 @@ struct xdr_ioq {
struct work_pool_entry ioq_wpe;
struct poolq_entry ioq_s; /* segment of stream */
pthread_cond_t ioq_cond;

struct poolq_head *ioq_pool;
struct xdr_ioq_uv_head ioq_uv; /* header/vectors */

uint64_t id;
uint32_t write_start; /* Position to start write at */
int frag_hdr_bytes_sent; /* Indicates a fragment header has been sent */
bool has_blocked;

#ifdef USE_RPC_RDMA
bool rdma_ioq;
#endif

struct rpc_dplx_rec *rec;
};

Expand Down Expand Up @@ -132,4 +150,22 @@ extern void xdr_ioq_destroy_pool(struct poolq_head *ioqh);

extern const struct xdr_ops xdr_ioq_ops;

#ifdef USE_RPC_RDMA
extern const struct xdr_ops xdr_ioq_ops_rdma;
extern void xdr_rdma_ioq_uv_release(struct xdr_ioq_uv *uv);
extern void xdr_rdma_ioq_release(struct poolq_head *ioqh, bool xioq_recycle,
struct xdr_ioq *xioq);
extern void xdr_rdma_buf_pool_destroy(struct poolq_head *ioqh);

extern struct poolq_entry *xdr_rdma_ioq_uv_fetch(struct xdr_ioq *xioq,
struct poolq_head *ioqh,
char *comment,
u_int count,
u_int ioq_flags);
extern struct poolq_entry *xdr_rdma_ioq_uv_fetch_nothing(struct xdr_ioq *xioq,
struct poolq_head *ioqh,
char *comment,
u_int count,
u_int ioq_flags);
#endif
#endif /* XDR_IOQ_H */
18 changes: 9 additions & 9 deletions src/clnt_rdma.c
Original file line number Diff line number Diff line change
Expand Up @@ -159,25 +159,25 @@ clnt_rdma_call(struct clnt_req *cc)
struct rpc_dplx_rec *rec = cx->cx_rec;
RDMAXPRT *xd = RDMA_DR(rec);
struct poolq_entry *have =
xdr_ioq_uv_fetch(&xd->sm_dr.ioq, &xd->cbqh,
xdr_rdma_ioq_uv_fetch(&xd->sm_dr.ioq, &xd->cbqh,
"call context", 1, IOQ_FLAG_NONE);
struct rpc_rdma_cbc *cbc = (struct rpc_rdma_cbc *)(_IOQ(have));
XDR *xdrs;
u_int32_t *uint32p;

/* free old buffers (should do nothing) */
xdr_ioq_release(&cbc->workq.ioq_uv.uvqh);
xdr_ioq_release(&cbc->holdq.ioq_uv.uvqh);
xdr_ioq_release(&cbc->recvq.ioq_uv.uvqh);
xdr_ioq_release(&cbc->sendq.ioq_uv.uvqh);
xdr_rdma_callq(xd);

cbc->workq.xdrs[0].x_lib[1] =
cbc->holdq.xdrs[0].x_lib[1] = xd;
cbc->recvq.xdrs[0].x_lib[1] =
cbc->sendq.xdrs[0].x_lib[1] = xd;

(void) xdr_ioq_uv_fetch(&cbc->holdq, &xd->outbufs.uvqh,
(void) xdr_rdma_ioq_uv_fetch(&cbc->sendq, &xd->outbufs_data.uvqh,
"call buffer", 1, IOQ_FLAG_NONE);
xdr_ioq_reset(&cbc->holdq, 0);
xdr_ioq_reset(&cbc->sendq, 0);

xdrs = cbc->holdq.xdrs;
xdrs = cbc->sendq.xdrs;
cc->cc_error.re_status = RPC_SUCCESS;

mutex_lock(&cl->cl_lock);
Expand All @@ -194,7 +194,7 @@ clnt_rdma_call(struct clnt_req *cc)
__warnx(TIRPC_DEBUG_FLAG_CLNT_RDMA,
"%s: %p@%p failed",
__func__, cl, cx->cx_rec);
xdr_ioq_release(&cbc->holdq.ioq_uv.uvqh);
xdr_ioq_release(&cbc->sendq.ioq_uv.uvqh);
return (RPC_CANTENCODEARGS);
}
mutex_unlock(&cl->cl_lock);
Expand Down
4 changes: 4 additions & 0 deletions src/rpc_dplx_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,10 @@ struct rpc_dplx_rec {

size_t maxrec;
long pagesz;
#ifdef USE_RPC_RDMA
u_int recv_hdr_sz;
u_int send_hdr_sz;
#endif
u_int recvsz;
u_int sendsz;
uint32_t call_xid; /**< current call xid */
Expand Down
Loading

0 comments on commit ad8c35d

Please sign in to comment.