From 20a72a49686226e6c6d2b72f857f7020de6d7442 Mon Sep 17 00:00:00 2001 From: Mike Uttormark Date: Thu, 19 Dec 2024 10:33:01 -0600 Subject: [PATCH] prov/util: Define monitor unsubscribe_on_delete Not all memory montiors have a 1-N relationship between subscribe and the MR cache entries. Some memory monitors, such as kdreg2, have a 1-1 relationship and require unsubscribe to be called when the corresponding MR cache entry is deleted. To meet this requirement, unsubscribe_on_delete is defined. When true for a memory monitor, ofi_monitor_unsubscribe() will be called on MR cache being freed. Signed-off-by: Mike Uttormark Signed-off-by: Ian Ziemba --- include/ofi_mr.h | 11 +++++++++++ prov/util/src/cuda_ipc_monitor.c | 1 + prov/util/src/cuda_mem_monitor.c | 1 + prov/util/src/import_mem_monitor.c | 1 + prov/util/src/kdreg2_mem_monitor.c | 5 +++++ prov/util/src/rocr_ipc_monitor.c | 1 + prov/util/src/rocr_mem_monitor.c | 2 ++ prov/util/src/uffd_mem_monitor.c | 6 +++++- prov/util/src/util_mr_cache.c | 9 +++++++++ prov/util/src/xpmem_monitor.c | 1 + prov/util/src/ze_ipc_monitor.c | 1 + prov/util/src/ze_mem_monitor.c | 1 + 12 files changed, 39 insertions(+), 1 deletion(-) diff --git a/include/ofi_mr.h b/include/ofi_mr.h index b0556eee019..c2a56ae175f 100644 --- a/include/ofi_mr.h +++ b/include/ofi_mr.h @@ -192,6 +192,17 @@ struct ofi_mem_monitor { bool (*valid)(struct ofi_mem_monitor *notifier, const struct ofi_mr_info *info, struct ofi_mr_entry *entry); const char *name; + + /* Set this to true if the monitor tracks each individual memory + * registration. That is, the monitor has a 1-to-1 mapping to MR Cache + * entry and corresponding subscribe. + * + * When true, the MR Cache will call the memory monitor unsubscribe() + * method when deleting an MR Cache entry. This allows the monitor + * stop tracking memory regions for which there are no longer + * registrations. + */ + bool unsubscribe_on_delete; }; void ofi_monitor_init(struct ofi_mem_monitor *monitor); diff --git a/prov/util/src/cuda_ipc_monitor.c b/prov/util/src/cuda_ipc_monitor.c index 8ce04dd338b..331350fd346 100644 --- a/prov/util/src/cuda_ipc_monitor.c +++ b/prov/util/src/cuda_ipc_monitor.c @@ -64,6 +64,7 @@ static struct ofi_mem_monitor cuda_ipc_monitor_ = { .unsubscribe = ofi_monitor_unsubscribe_no_op, .valid = cuda_ipc_monitor_valid, .name = "cuda_ipc", + .unsubscribe_on_delete = false, }; struct ofi_mem_monitor *cuda_ipc_monitor = &cuda_ipc_monitor_; diff --git a/prov/util/src/cuda_mem_monitor.c b/prov/util/src/cuda_mem_monitor.c index 78d0c7ae994..f7ca200ba94 100644 --- a/prov/util/src/cuda_mem_monitor.c +++ b/prov/util/src/cuda_mem_monitor.c @@ -154,6 +154,7 @@ static struct ofi_mem_monitor cuda_mm = { .unsubscribe = cuda_mm_unsubscribe, .valid = cuda_mm_valid, .name = "cuda", + .unsubscribe_on_delete = false, }; struct ofi_mem_monitor *cuda_monitor = &cuda_mm; diff --git a/prov/util/src/import_mem_monitor.c b/prov/util/src/import_mem_monitor.c index e7be581526f..a1c4ca097f8 100644 --- a/prov/util/src/import_mem_monitor.c +++ b/prov/util/src/import_mem_monitor.c @@ -70,6 +70,7 @@ static struct ofi_import_monitor impmon = { .monitor.unsubscribe = ofi_import_monitor_unsubscribe, .monitor.valid = ofi_import_monitor_valid, .monitor.name = "import", + .monitor.unsubscribe_on_delete = false, }; struct ofi_mem_monitor *import_monitor = &impmon.monitor; diff --git a/prov/util/src/kdreg2_mem_monitor.c b/prov/util/src/kdreg2_mem_monitor.c index ba7c2a21d31..d95a2722572 100644 --- a/prov/util/src/kdreg2_mem_monitor.c +++ b/prov/util/src/kdreg2_mem_monitor.c @@ -39,6 +39,8 @@ #define EVICTOR_THREAD_ATTR NULL #define INFINITE_TIMEOUT -1 +/* kdreg2 monitors each address range (byte accurate) with a unique cookie */ + static int kdreg2_monitor_subscribe(struct ofi_mem_monitor *monitor, const void *addr, size_t len, union ofi_mr_hmem_info *hmem_info) @@ -63,6 +65,8 @@ static int kdreg2_monitor_subscribe(struct ofi_mem_monitor *monitor, return 0; } +/* Unsubscribe is via cookie, not address range */ + static void kdreg2_monitor_unsubscribe(struct ofi_mem_monitor *monitor, const void *addr, size_t len, union ofi_mr_hmem_info *hmem_info) @@ -359,6 +363,7 @@ static struct ofi_kdreg2 kdreg2_mm = { .monitor.unsubscribe = kdreg2_monitor_unsubscribe, .monitor.valid = kdreg2_monitor_valid, .monitor.name = "kdreg2", + .monitor.unsubscribe_on_delete = true, .fd = -1, .exit_pipe = { -1, -1 }, .status_data = NULL, diff --git a/prov/util/src/rocr_ipc_monitor.c b/prov/util/src/rocr_ipc_monitor.c index 9941efc127b..0ad800efcb3 100644 --- a/prov/util/src/rocr_ipc_monitor.c +++ b/prov/util/src/rocr_ipc_monitor.c @@ -65,6 +65,7 @@ static struct ofi_mem_monitor rocr_ipc_monitor_ = { .unsubscribe = ofi_monitor_unsubscribe_no_op, .valid = rocr_ipc_monitor_valid, .name = "rocr_ipc", + .unsubscribe_on_delete = false, }; struct ofi_mem_monitor *rocr_ipc_monitor = &rocr_ipc_monitor_; diff --git a/prov/util/src/rocr_mem_monitor.c b/prov/util/src/rocr_mem_monitor.c index c194814c640..207cda5371e 100644 --- a/prov/util/src/rocr_mem_monitor.c +++ b/prov/util/src/rocr_mem_monitor.c @@ -73,6 +73,7 @@ static struct rocr_mm rocr_mm = { .unsubscribe = rocr_mm_unsubscribe, .valid = rocr_mm_valid, .name = "rocr", + .unsubscribe_on_delete = false, }, }; @@ -404,6 +405,7 @@ static struct ofi_mem_monitor rocr_mm = { .unsubscribe = rocr_mm_unsubscribe, .valid = rocr_mm_valid, .name = "rocr", + .unsubscribe_on_delete = false, }; struct ofi_mem_monitor *rocr_monitor = &rocr_mm; diff --git a/prov/util/src/uffd_mem_monitor.c b/prov/util/src/uffd_mem_monitor.c index c06b49178a1..9bcb0dba77f 100644 --- a/prov/util/src/uffd_mem_monitor.c +++ b/prov/util/src/uffd_mem_monitor.c @@ -54,6 +54,7 @@ static struct ofi_uffd uffd = { .monitor.start = ofi_uffd_start, .monitor.stop = ofi_uffd_stop, .monitor.name = "uffd", + .monitor.unsubscribe_on_delete = false, .fd = -1, .exit_pipe = { -1, -1 }, }; @@ -266,6 +267,9 @@ static int ofi_uffd_register(const void *addr, size_t len, size_t page_size) return 0; } +/* Uffd subscription is by full page, which gets tracked in the kernel to + * an entire VMA. + */ static int ofi_uffd_subscribe(struct ofi_mem_monitor *monitor, const void *addr, size_t len, union ofi_mr_hmem_info *hmem_info) @@ -299,7 +303,7 @@ static int ofi_uffd_unregister(const void *addr, size_t len, size_t page_size) return 0; } -/* May be called from mr cache notifier callback */ +/* Uffd unsubscribes entire pages */ static void ofi_uffd_unsubscribe(struct ofi_mem_monitor *monitor, const void *addr, size_t len, union ofi_mr_hmem_info *hmem_info) diff --git a/prov/util/src/util_mr_cache.c b/prov/util/src/util_mr_cache.c index ea8bf15570f..987ac8c0115 100644 --- a/prov/util/src/util_mr_cache.c +++ b/prov/util/src/util_mr_cache.c @@ -114,10 +114,19 @@ static void util_mr_entry_free(struct ofi_mr_cache *cache, static void util_mr_free_entry(struct ofi_mr_cache *cache, struct ofi_mr_entry *entry) { + enum fi_hmem_iface iface = entry->info.iface; + struct ofi_mem_monitor *monitor = cache->monitors[iface]; + FI_DBG(cache->prov, FI_LOG_MR, "free %p (len: %zu)\n", entry->info.iov.iov_base, entry->info.iov.iov_len); assert(!entry->node); + + if (monitor->unsubscribe_on_delete) + ofi_monitor_unsubscribe(monitor, entry->info.iov.iov_base, + entry->info.iov.iov_len, + &entry->hmem_info); + cache->delete_region(cache, entry); util_mr_entry_free(cache, entry); } diff --git a/prov/util/src/xpmem_monitor.c b/prov/util/src/xpmem_monitor.c index 9824cc8f9c0..796bc7bef93 100644 --- a/prov/util/src/xpmem_monitor.c +++ b/prov/util/src/xpmem_monitor.c @@ -60,6 +60,7 @@ static struct ofi_mem_monitor xpmem_monitor_ = { .unsubscribe = ofi_monitor_unsubscribe_no_op, .valid = xpmem_monitor_valid, .name = "xpmem", + .unsubscribe_on_delete = false, }; struct ofi_mem_monitor *xpmem_monitor = &xpmem_monitor_; diff --git a/prov/util/src/ze_ipc_monitor.c b/prov/util/src/ze_ipc_monitor.c index 70cb61a04d4..743360f7bf6 100644 --- a/prov/util/src/ze_ipc_monitor.c +++ b/prov/util/src/ze_ipc_monitor.c @@ -65,6 +65,7 @@ static struct ofi_mem_monitor ze_ipc_monitor_ = { .unsubscribe = ofi_monitor_unsubscribe_no_op, .valid = ze_ipc_monitor_valid, .name = "ze_ipc", + .unsubscribe_on_delete = false, }; struct ofi_mem_monitor *ze_ipc_monitor = &ze_ipc_monitor_; diff --git a/prov/util/src/ze_mem_monitor.c b/prov/util/src/ze_mem_monitor.c index b379df28852..0f396b3eadc 100644 --- a/prov/util/src/ze_mem_monitor.c +++ b/prov/util/src/ze_mem_monitor.c @@ -113,6 +113,7 @@ static struct ofi_mem_monitor ze_mm = { .unsubscribe = ze_mm_unsubscribe, .valid = ze_mm_valid, .name = "ze", + .unsubscribe_on_delete = false, }; struct ofi_mem_monitor *ze_monitor = &ze_mm;