Skip to content

Commit

Permalink
prov/udp: detect and use MTU to set max_msg_size and inject_size
Browse files Browse the repository at this point in the history
For each interface detected by the udp provider, determine the MTU of
the interface, and use that value to set the max_msg_size field of the
fi_ep_attr and fi_tx_attr values of the fi_info element. When the MTU
cannot be determined, the MTU value assumed by previous code
versions (1500) is used.
  • Loading branch information
mpokorny committed Oct 25, 2024
1 parent f236201 commit 1617801
Show file tree
Hide file tree
Showing 11 changed files with 121 additions and 17 deletions.
7 changes: 5 additions & 2 deletions include/freebsd/osd.h
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,11 @@ static inline size_t ofi_ifaddr_get_speed(struct ifaddrs *ifa)
return 0;
}

static inline int ofi_ifaddr_get_mtu(struct ifaddrs *ifa)
{
return -1;
}

static inline ssize_t ofi_process_vm_readv(pid_t pid,
const struct iovec *local_iov,
unsigned long liovcnt,
Expand Down Expand Up @@ -185,5 +190,3 @@ ofi_recvv_socket(SOCKET fd, const struct iovec *iov, size_t cnt, int flags)
}

#endif /* _FREEBSD_OSD_H_ */


2 changes: 2 additions & 0 deletions include/linux/osd.h
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,8 @@ static inline int ofi_hugepage_enabled(void)

size_t ofi_ifaddr_get_speed(struct ifaddrs *ifa);

int ofi_ifaddr_get_mtu(struct ifaddrs *ifa);

#ifndef __NR_process_vm_readv
# define __NR_process_vm_readv 310
#endif
Expand Down
5 changes: 5 additions & 0 deletions include/osx/osd.h
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,11 @@ static inline size_t ofi_ifaddr_get_speed(struct ifaddrs *ifa)
return 0;
}

static inline int ofi_ifaddr_get_mtu(struct ifaddrs *ifa)
{
return -1;
}

static inline int ofi_hugepage_enabled(void)
{
return 0;
Expand Down
2 changes: 1 addition & 1 deletion include/windows/ifaddrs.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,8 @@ struct ifaddrs {

char ad_name[16];
size_t speed;
int mtu;
};

int getifaddrs(struct ifaddrs **ifap);
void freeifaddrs(struct ifaddrs *ifa);

2 changes: 2 additions & 0 deletions include/windows/osd.h
Original file line number Diff line number Diff line change
Expand Up @@ -1006,6 +1006,8 @@ static inline int ofi_is_loopback_addr(struct sockaddr *addr) {

size_t ofi_ifaddr_get_speed(struct ifaddrs *ifa);

int ofi_ifaddr_get_mtu(struct ifaddrs *ifa);

#define file2unix_time 10000000i64
#define win2unix_epoch 116444736000000000i64
#define CLOCK_REALTIME 0
Expand Down
10 changes: 6 additions & 4 deletions man/fi_udp.7.md
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,8 @@ receiving datagram messages over an unreliable endpoint.

# LIMITATIONS

The UDP provider has hard-coded maximums for supported queue sizes and data
transfers. These values are reflected in the related fabric attribute
structures
The UDP provider has a hard-coded maximum for supported queue sizes.
This value is reflected in the related fabric attribute structures.

EPs must be bound to both RX and TX CQs.

Expand All @@ -53,7 +52,10 @@ No support for counters.

# RUNTIME PARAMETERS

No runtime parameters are currently defined.
The UDP provider checks for the following environment variables -

*FI_UDP_IFACE*
: An string value that specifies the name of the interface.

# SEE ALSO

Expand Down
8 changes: 5 additions & 3 deletions prov/udp/src/udpx.h
Original file line number Diff line number Diff line change
Expand Up @@ -63,22 +63,22 @@
#ifndef _UDPX_H_
#define _UDPX_H_


extern struct fi_provider udpx_prov;
extern struct util_prov udpx_util_prov;
extern struct fi_info udpx_info;


int udpx_fabric(struct fi_fabric_attr *attr, struct fid_fabric **fabric,
void *context);
int udpx_domain_open(struct fid_fabric *fabric, struct fi_info *info,
struct fid_domain **dom, void *context);
int udpx_eq_open(struct fid_fabric *fabric, struct fi_eq_attr *attr,
struct fid_eq **eq, void *context);

int udpx_util_prov_init(uint32_t version, const char *node, const char *service,
uint64_t flags);

#define UDPX_FLAG_MULTI_RECV 1
#define UDPX_IOV_LIMIT 4
#define UDPX_MTU 1500

struct udpx_ep_entry {
void *context;
Expand All @@ -88,6 +88,8 @@ struct udpx_ep_entry {
uint8_t resv[sizeof(size_t) - 2];
};

#define UDPX_MAX_MSG_SIZE(mtu) ((mtu) - 28)

OFI_DECLARE_CIRQUE(struct udpx_ep_entry, udpx_rx_cirq);

struct udpx_ep;
Expand Down
42 changes: 38 additions & 4 deletions prov/udp/src/udpx_attr.c
Original file line number Diff line number Diff line change
Expand Up @@ -31,14 +31,15 @@
*/

#include "udpx.h"
#include "ofi_osd.h"

#define UDPX_TX_CAPS (OFI_TX_MSG_CAPS | FI_MULTICAST)
#define UDPX_RX_CAPS (FI_SOURCE | OFI_RX_MSG_CAPS)
#define UDPX_DOMAIN_CAPS (FI_LOCAL_COMM | FI_REMOTE_COMM)

struct fi_tx_attr udpx_tx_attr = {
.caps = UDPX_TX_CAPS,
.inject_size = 1472,
.inject_size = UDPX_MAX_MSG_SIZE(UDPX_MTU),
.size = 1024,
.iov_limit = UDPX_IOV_LIMIT
};
Expand All @@ -53,7 +54,7 @@ struct fi_ep_attr udpx_ep_attr = {
.type = FI_EP_DGRAM,
.protocol = FI_PROTO_UDP,
.protocol_version = 0,
.max_msg_size = 1472,
.max_msg_size = UDPX_MAX_MSG_SIZE(UDPX_MTU),
.tx_ctx_cnt = 1,
.rx_ctx_cnt = 1
};
Expand Down Expand Up @@ -93,6 +94,39 @@ struct fi_info udpx_info = {

struct util_prov udpx_util_prov = {
.prov = &udpx_prov,
.info = &udpx_info,
.flags = 0,
.info = NULL,
.flags = 0,
};

static int detect_mtu(const struct fi_info* info) {

struct ifaddrs ifaddrs;
ifaddrs.ifa_next = NULL;
ifaddrs.ifa_flags = 0;
ifaddrs.ifa_netmask = NULL;
ifaddrs.ifa_name = info->domain_attr->name;
ifaddrs.ifa_addr = info->src_addr;
return ofi_ifaddr_get_mtu(&ifaddrs);
}

int udpx_util_prov_init(uint32_t version, const char *node, const char *service,
uint64_t flags) {

struct fi_info* cur;
struct fi_info* info;
int max_msg_size;
if (udpx_util_prov.info == NULL) {
udpx_util_prov.info = &udpx_info;
info = fi_allocinfo();
ofi_ip_getinfo(&udpx_util_prov, version, node, service, flags,
NULL, &info);
for (cur = info; cur; cur = cur->next) {
max_msg_size = UDPX_MAX_MSG_SIZE(detect_mtu(cur));
if (max_msg_size > 0) {
cur->tx_attr->inject_size = max_msg_size;
cur->ep_attr->max_msg_size = max_msg_size;
}
}
udpx_util_prov.info = info;
}
}
13 changes: 10 additions & 3 deletions prov/udp/src/udpx_init.c
Original file line number Diff line number Diff line change
Expand Up @@ -37,20 +37,26 @@

#include <sys/types.h>

static ofi_mutex_t init_lock;

static int udpx_getinfo(uint32_t version, const char *node, const char *service,
uint64_t flags, const struct fi_info *hints,
struct fi_info **info)
{
return ofi_ip_getinfo(&udpx_util_prov, version, node, service, flags,
hints, info);
ofi_mutex_lock(&init_lock);
udpx_util_prov_init(version, node, service, flags);
ofi_mutex_unlock(&init_lock);
return util_getinfo(&udpx_util_prov, version, node, service, flags,
hints, info);
}

static void udpx_fini(void)
{
/* yawn */
if (udpx_util_prov.info != NULL)
fi_freeinfo(udpx_util_prov.info);
}


struct fi_provider udpx_prov = {
.name = "udp",
.version = OFI_VERSION_DEF_PROV,
Expand All @@ -65,5 +71,6 @@ UDP_INI
fi_param_define(&udpx_prov, "iface", FI_PARAM_STRING,
"Specify interface name");

ofi_mutex_init(&init_lock);
return &udpx_prov;
}
46 changes: 46 additions & 0 deletions src/linux/osd.c
Original file line number Diff line number Diff line change
Expand Up @@ -257,3 +257,49 @@ size_t ofi_ifaddr_get_speed(struct ifaddrs *ifa)
}

#endif /* HAVE_ETHTOOL */

int ofi_ifaddr_get_mtu(struct ifaddrs *ifa)
{
FILE *fd;
char *line = NULL;
size_t len = 0;
char *mtu_filename_prefix = "/sys/class/net/";
char *mtu_filename_suffix = "/mtu";
char *mtu_filename;
size_t mtu;
/* IF_NAMESIZE includes NULL-terminated symbol */
size_t filename_len = strlen(mtu_filename_prefix) +
strlen(mtu_filename_prefix) +
IF_NAMESIZE;

mtu_filename = calloc(1, filename_len);
if (!mtu_filename)
return 0;

snprintf(mtu_filename, filename_len, "%s%s%s",
mtu_filename_prefix, ifa->ifa_name, mtu_filename_suffix);

fd = fopen(mtu_filename, "r");
if (!fd)
goto err1;

if (getline(&line, &len, fd) == -1) {
goto err2;
}

if (sscanf(line, "%d", &mtu) != 1)
goto err3;

free(line);
fclose(fd);
free(mtu_filename);

return mtu;
err3:
free(line);
err2:
fclose(fd);
err1:
free(mtu_filename);
return 0;
}
1 change: 1 addition & 0 deletions src/windows/osd.c
Original file line number Diff line number Diff line change
Expand Up @@ -477,6 +477,7 @@ int getifaddrs(struct ifaddrs **ifap)
(*addr6) = *(struct sockaddr_in6 *) pSockAddr;
}
fa->speed = aa->TransmitLinkSpeed;
fa->mtu = aa->Mtu;
/* Generate fake Unix-like device names */
sprintf_s(fa->ad_name, sizeof(fa->ad_name), "eth%d", i++);
}
Expand Down

0 comments on commit 1617801

Please sign in to comment.