Patchwork [dpdk-dev,RFC] malloc: fix deadlock when using malloc stats

login
register
mail settings
Submitter Burakov, Anatoly
Date Dec. 4, 2018, 3:57 p.m.
Message ID <8936c9a4c7afc6671ff89469ba1c406f6b494e67.1543937627.git.anatoly.burakov@intel.com>
Download mbox | patch
Permalink /patch/672137/
State New
Headers show

Comments

Burakov, Anatoly - Dec. 4, 2018, 3:57 p.m.
Currently, malloc statistics and external heap creation code
use memory hotplug lock as a way to synchronize accesses to
heaps (as in, locking the hotplug lock to prevent list of heaps
from changing under our feet). At the same time, malloc
statistics code will also lock the heap because it needs to
access heap data and does not want any other thread to allocate
anything from that heap.

In such scheme, it is possible to enter a deadlock with the
following sequence of events:

thread 1		thread 2
rte_malloc()
			rte_malloc_dump_stats()
take heap lock
			take hotplug lock
attempt to take
hotplug lock
			attempt to take heap lock

Neither thread will be able to continue, as both of them are
waiting for the other one to drop the lock. This can be
fixed by adding an additional lock protecting the heap list.

In addition, to prevent further issues, we must clearly
define what each lock is for, and how to use them. As is
explained in the code comments, there are now three locks:

- Heap list lock
  - This lock guards the changes to heap list. We need it
    because when we allocate, we might attempt to allocate
    from more than one heap, and we also have functions that
    can create or destroy heaps. We need heap list to be
    consistent for the duration of allocation attempt.
- Heap lock
  - This lock protects data inside a specific heap.
- Memseg list lock
  - Also known as memory hotplug lock. This lock protects
    access to our internal page tables - only one thread
    at a time is allowed to make changes to the page
    table (be it from dynamic allocation or from creating
    additional page tables for external heaps).

For any given operation, not all of these locks may be
necessary, but when they are taken out, they are to be
taken out in the exact order specified above - heap list
lock first, then heap lock, then memseg list lock.

Fixes: 72cf92b31855 ("malloc: index heaps using heap ID rather than NUMA node")

Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
---

Notes:
    This breaks EAL ABI, hence submitting as RFC as it may not be
    possible to include this in 19.02.

 .../common/include/rte_eal_memconfig.h        | 33 +++++++++++-
 lib/librte_eal/common/malloc_heap.c           | 38 +++++++++----
 lib/librte_eal/common/rte_malloc.c            | 54 +++++++++++--------
 3 files changed, 92 insertions(+), 33 deletions(-)

Patch

diff --git a/lib/librte_eal/common/include/rte_eal_memconfig.h b/lib/librte_eal/common/include/rte_eal_memconfig.h
index 84aabe36c..da3310da2 100644
--- a/lib/librte_eal/common/include/rte_eal_memconfig.h
+++ b/lib/librte_eal/common/include/rte_eal_memconfig.h
@@ -41,7 +41,35 @@  struct rte_memseg_list {
 /**
  * the structure for the memory configuration for the RTE.
  * Used by the rte_config structure. It is separated out, as for multi-process
- * support, the memory details should be shared across instances
+ * support, the memory details should be shared across instances.
+ *
+ * For the memory subsystem, there are three locks that will be in use
+ * throughout the memory code: heap list lock, heap lock, and memseg list lock
+ * (aka memory hotplug lock).
+ *
+ * Each of these locks may not always be necessary, but any time they are used,
+ * they must *always* be taken out in the above specified order. The purpose of
+ * each lock is as follows:
+ *
+ * Heap list lock protects changes to list of malloc heaps and prevents changes
+ * in number and/or contents of ``malloc_heaps`` array of structures. For
+ * example, if we're allocating memory, we need to know to which heap a given
+ * socket ID belongs to, and we might need this information multiple times (in
+ * case of SOCKET_ID_ANY), so the heap list must stay as is untill we finish our
+ * allocation. By convention, this lock also protects the ``next_socket_id``
+ * value, and so must also be taken out any time this value is accessed (even if
+ * no changes to heap list are to be expected).
+ *
+ * Individual heap lock is part of ``malloc_heap`` structure, and protects
+ * contents of each individual heap. That is, multiple above described
+ * allocations may take place concurrently, but only one of them can happen from
+ * a given heap.
+ *
+ * Memseg list lock (aka memory hotplug lock) protects the internal page table
+ * stored in the ``memsegs`` array of structures. Any time internal page tables
+ * are to be accessed (such as allocating more memory from the system,
+ * registering a new external heap, or even simply reading the memory map), this
+ * lock must be taken out to manage concurrent accesses to page tables.
  */
 struct rte_mem_config {
 	volatile uint32_t magic;   /**< Magic number - Sanity check. */
@@ -72,6 +100,9 @@  struct rte_mem_config {
 
 	struct rte_tailq_head tailq_head[RTE_MAX_TAILQ]; /**< Tailqs for objects */
 
+	rte_rwlock_t heap_list_lock;
+	/**< indicates whether list of heaps is locked */
+
 	/* Heaps of Malloc */
 	struct malloc_heap malloc_heaps[RTE_MAX_HEAPS];
 
diff --git a/lib/librte_eal/common/malloc_heap.c b/lib/librte_eal/common/malloc_heap.c
index c6a6d4f6b..0d2fc4025 100644
--- a/lib/librte_eal/common/malloc_heap.c
+++ b/lib/librte_eal/common/malloc_heap.c
@@ -690,6 +690,7 @@  void *
 malloc_heap_alloc(const char *type, size_t size, int socket_arg,
 		unsigned int flags, size_t align, size_t bound, bool contig)
 {
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
 	int socket, heap_id, i;
 	void *ret;
 
@@ -705,16 +706,21 @@  malloc_heap_alloc(const char *type, size_t size, int socket_arg,
 	else
 		socket = socket_arg;
 
+	/* do not allow any alterations to heaps while we're allocating */
+	rte_rwlock_read_lock(&mcfg->heap_list_lock);
+
 	/* turn socket ID into heap ID */
 	heap_id = malloc_socket_to_heap_id(socket);
 	/* if heap id is negative, socket ID was invalid */
-	if (heap_id < 0)
-		return NULL;
+	if (heap_id < 0) {
+		ret = NULL;
+		goto unlock;
+	}
 
 	ret = malloc_heap_alloc_on_heap_id(type, size, heap_id, flags, align,
 			bound, contig);
 	if (ret != NULL || socket_arg != SOCKET_ID_ANY)
-		return ret;
+		goto unlock;
 
 	/* try other heaps. we are only iterating through native DPDK sockets,
 	 * so external heaps won't be included.
@@ -725,9 +731,12 @@  malloc_heap_alloc(const char *type, size_t size, int socket_arg,
 		ret = malloc_heap_alloc_on_heap_id(type, size, i, flags, align,
 				bound, contig);
 		if (ret != NULL)
-			return ret;
+			goto unlock;
 	}
-	return NULL;
+unlock:
+	rte_rwlock_read_unlock(&mcfg->heap_list_lock);
+
+	return ret;
 }
 
 static void *
@@ -753,6 +762,7 @@  void *
 malloc_heap_alloc_biggest(const char *type, int socket_arg, unsigned int flags,
 		size_t align, bool contig)
 {
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
 	int socket, i, cur_socket, heap_id;
 	void *ret;
 
@@ -768,16 +778,21 @@  malloc_heap_alloc_biggest(const char *type, int socket_arg, unsigned int flags,
 	else
 		socket = socket_arg;
 
+	/* do not allow any alterations to heaps while we're allocating */
+	rte_rwlock_read_lock(&mcfg->heap_list_lock);
+
 	/* turn socket ID into heap ID */
 	heap_id = malloc_socket_to_heap_id(socket);
 	/* if heap id is negative, socket ID was invalid */
-	if (heap_id < 0)
-		return NULL;
+	if (heap_id < 0) {
+		ret = NULL;
+		goto unlock;
+	}
 
 	ret = heap_alloc_biggest_on_heap_id(type, heap_id, flags, align,
 			contig);
 	if (ret != NULL || socket_arg != SOCKET_ID_ANY)
-		return ret;
+		goto unlock;
 
 	/* try other heaps */
 	for (i = 0; i < (int) rte_socket_count(); i++) {
@@ -787,9 +802,12 @@  malloc_heap_alloc_biggest(const char *type, int socket_arg, unsigned int flags,
 		ret = heap_alloc_biggest_on_heap_id(type, i, flags, align,
 				contig);
 		if (ret != NULL)
-			return ret;
+			goto unlock;
 	}
-	return NULL;
+unlock:
+	rte_rwlock_read_unlock(&mcfg->heap_list_lock);
+
+	return ret;
 }
 
 /* this function is exposed in malloc_mp.h */
diff --git a/lib/librte_eal/common/rte_malloc.c b/lib/librte_eal/common/rte_malloc.c
index 0da5ad5e8..8010341b6 100644
--- a/lib/librte_eal/common/rte_malloc.c
+++ b/lib/librte_eal/common/rte_malloc.c
@@ -158,7 +158,7 @@  rte_malloc_get_socket_stats(int socket,
 	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
 	int heap_idx, ret = -1;
 
-	rte_rwlock_read_lock(&mcfg->memory_hotplug_lock);
+	rte_rwlock_read_lock(&mcfg->heap_list_lock);
 
 	heap_idx = malloc_socket_to_heap_id(socket);
 	if (heap_idx < 0)
@@ -167,7 +167,7 @@  rte_malloc_get_socket_stats(int socket,
 	ret = malloc_heap_get_stats(&mcfg->malloc_heaps[heap_idx],
 			socket_stats);
 unlock:
-	rte_rwlock_read_unlock(&mcfg->memory_hotplug_lock);
+	rte_rwlock_read_unlock(&mcfg->heap_list_lock);
 
 	return ret;
 }
@@ -181,14 +181,14 @@  rte_malloc_dump_heaps(FILE *f)
 	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
 	unsigned int idx;
 
-	rte_rwlock_read_lock(&mcfg->memory_hotplug_lock);
+	rte_rwlock_read_lock(&mcfg->heap_list_lock);
 
 	for (idx = 0; idx < RTE_MAX_HEAPS; idx++) {
 		fprintf(f, "Heap id: %u\n", idx);
 		malloc_heap_dump(&mcfg->malloc_heaps[idx], f);
 	}
 
-	rte_rwlock_read_unlock(&mcfg->memory_hotplug_lock);
+	rte_rwlock_read_unlock(&mcfg->heap_list_lock);
 }
 
 int
@@ -206,7 +206,7 @@  rte_malloc_heap_get_socket(const char *name)
 		rte_errno = EINVAL;
 		return -1;
 	}
-	rte_rwlock_read_lock(&mcfg->memory_hotplug_lock);
+	rte_rwlock_read_lock(&mcfg->heap_list_lock);
 	for (idx = 0; idx < RTE_MAX_HEAPS; idx++) {
 		struct malloc_heap *tmp = &mcfg->malloc_heaps[idx];
 
@@ -222,7 +222,7 @@  rte_malloc_heap_get_socket(const char *name)
 		rte_errno = ENOENT;
 		ret = -1;
 	}
-	rte_rwlock_read_unlock(&mcfg->memory_hotplug_lock);
+	rte_rwlock_read_unlock(&mcfg->heap_list_lock);
 
 	return ret;
 }
@@ -237,7 +237,7 @@  rte_malloc_heap_socket_is_external(int socket_id)
 	if (socket_id == SOCKET_ID_ANY)
 		return 0;
 
-	rte_rwlock_read_lock(&mcfg->memory_hotplug_lock);
+	rte_rwlock_read_lock(&mcfg->heap_list_lock);
 	for (idx = 0; idx < RTE_MAX_HEAPS; idx++) {
 		struct malloc_heap *tmp = &mcfg->malloc_heaps[idx];
 
@@ -247,7 +247,7 @@  rte_malloc_heap_socket_is_external(int socket_id)
 			break;
 		}
 	}
-	rte_rwlock_read_unlock(&mcfg->memory_hotplug_lock);
+	rte_rwlock_read_unlock(&mcfg->heap_list_lock);
 
 	return ret;
 }
@@ -262,7 +262,7 @@  rte_malloc_dump_stats(FILE *f, __rte_unused const char *type)
 	unsigned int heap_id;
 	struct rte_malloc_socket_stats sock_stats;
 
-	rte_rwlock_read_lock(&mcfg->memory_hotplug_lock);
+	rte_rwlock_read_lock(&mcfg->heap_list_lock);
 
 	/* Iterate through all initialised heaps */
 	for (heap_id = 0; heap_id < RTE_MAX_HEAPS; heap_id++) {
@@ -280,7 +280,7 @@  rte_malloc_dump_stats(FILE *f, __rte_unused const char *type)
 		fprintf(f, "\tAlloc_count:%u,\n",sock_stats.alloc_count);
 		fprintf(f, "\tFree_count:%u,\n", sock_stats.free_count);
 	}
-	rte_rwlock_read_unlock(&mcfg->memory_hotplug_lock);
+	rte_rwlock_read_unlock(&mcfg->heap_list_lock);
 	return;
 }
 
@@ -351,7 +351,7 @@  rte_malloc_heap_memory_add(const char *heap_name, void *va_addr, size_t len,
 		rte_errno = EINVAL;
 		return -1;
 	}
-	rte_rwlock_write_lock(&mcfg->memory_hotplug_lock);
+	rte_rwlock_write_lock(&mcfg->heap_list_lock);
 
 	/* find our heap */
 	heap = find_named_heap(heap_name);
@@ -373,13 +373,18 @@  rte_malloc_heap_memory_add(const char *heap_name, void *va_addr, size_t len,
 		goto unlock;
 	}
 
+	/* see rte_eal_memconfig.h for details on locking */
 	rte_spinlock_lock(&heap->lock);
+	rte_rwlock_write_lock(&mcfg->memory_hotplug_lock);
+
 	ret = malloc_heap_add_external_memory(heap, va_addr, iova_addrs, n,
 			page_sz);
+
+	rte_rwlock_write_unlock(&mcfg->memory_hotplug_lock);
 	rte_spinlock_unlock(&heap->lock);
 
 unlock:
-	rte_rwlock_write_unlock(&mcfg->memory_hotplug_lock);
+	rte_rwlock_write_unlock(&mcfg->heap_list_lock);
 
 	return ret;
 }
@@ -398,7 +403,7 @@  rte_malloc_heap_memory_remove(const char *heap_name, void *va_addr, size_t len)
 		rte_errno = EINVAL;
 		return -1;
 	}
-	rte_rwlock_write_lock(&mcfg->memory_hotplug_lock);
+	rte_rwlock_write_lock(&mcfg->heap_list_lock);
 	/* find our heap */
 	heap = find_named_heap(heap_name);
 	if (heap == NULL) {
@@ -413,12 +418,17 @@  rte_malloc_heap_memory_remove(const char *heap_name, void *va_addr, size_t len)
 		goto unlock;
 	}
 
+	/* see rte_eal_memconfig.h for details on locking */
 	rte_spinlock_lock(&heap->lock);
+	rte_rwlock_write_lock(&mcfg->memory_hotplug_lock);
+
 	ret = malloc_heap_remove_external_memory(heap, va_addr, len);
+
+	rte_rwlock_write_unlock(&mcfg->memory_hotplug_lock);
 	rte_spinlock_unlock(&heap->lock);
 
 unlock:
-	rte_rwlock_write_unlock(&mcfg->memory_hotplug_lock);
+	rte_rwlock_write_unlock(&mcfg->heap_list_lock);
 
 	return ret;
 }
@@ -489,7 +499,7 @@  sync_memory(const char *heap_name, void *va_addr, size_t len, bool attach)
 		rte_errno = EINVAL;
 		return -1;
 	}
-	rte_rwlock_read_lock(&mcfg->memory_hotplug_lock);
+	rte_rwlock_read_lock(&mcfg->heap_list_lock);
 
 	/* find our heap */
 	heap = find_named_heap(heap_name);
@@ -511,8 +521,8 @@  sync_memory(const char *heap_name, void *va_addr, size_t len, bool attach)
 	wa.result = -ENOENT; /* fail unless explicitly told to succeed */
 	wa.attach = attach;
 
-	/* we're already holding a read lock */
-	rte_memseg_list_walk_thread_unsafe(sync_mem_walk, &wa);
+	/* we don't need the per-heap lock here */
+	rte_memseg_list_walk(sync_mem_walk, &wa);
 
 	if (wa.result < 0) {
 		rte_errno = -wa.result;
@@ -525,7 +535,7 @@  sync_memory(const char *heap_name, void *va_addr, size_t len, bool attach)
 		ret = 0;
 	}
 unlock:
-	rte_rwlock_read_unlock(&mcfg->memory_hotplug_lock);
+	rte_rwlock_read_unlock(&mcfg->heap_list_lock);
 	return ret;
 }
 
@@ -558,7 +568,7 @@  rte_malloc_heap_create(const char *heap_name)
 	/* check if there is space in the heap list, or if heap with this name
 	 * already exists.
 	 */
-	rte_rwlock_write_lock(&mcfg->memory_hotplug_lock);
+	rte_rwlock_write_lock(&mcfg->heap_list_lock);
 
 	for (i = 0; i < RTE_MAX_HEAPS; i++) {
 		struct malloc_heap *tmp = &mcfg->malloc_heaps[i];
@@ -587,7 +597,7 @@  rte_malloc_heap_create(const char *heap_name)
 	/* we're sure that we can create a new heap, so do it */
 	ret = malloc_heap_create(heap, heap_name);
 unlock:
-	rte_rwlock_write_unlock(&mcfg->memory_hotplug_lock);
+	rte_rwlock_write_unlock(&mcfg->heap_list_lock);
 
 	return ret;
 }
@@ -606,7 +616,7 @@  rte_malloc_heap_destroy(const char *heap_name)
 		rte_errno = EINVAL;
 		return -1;
 	}
-	rte_rwlock_write_lock(&mcfg->memory_hotplug_lock);
+	rte_rwlock_write_lock(&mcfg->heap_list_lock);
 
 	/* start from non-socket heaps */
 	heap = find_named_heap(heap_name);
@@ -630,7 +640,7 @@  rte_malloc_heap_destroy(const char *heap_name)
 	if (ret < 0)
 		rte_spinlock_unlock(&heap->lock);
 unlock:
-	rte_rwlock_write_unlock(&mcfg->memory_hotplug_lock);
+	rte_rwlock_write_unlock(&mcfg->heap_list_lock);
 
 	return ret;
 }